PyPI - pg-sui - Versions diffs - 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl - Mend

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show

{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
pg_sui-1.6.8.dist-info/RECORD +78 -0
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
pg_sui-1.6.8.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +635 -0
pgsui/data_processing/config.py +576 -0
pgsui/data_processing/containers.py +1782 -0
pgsui/data_processing/transformers.py +121 -1103
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +189 -0
pgsui/electron/app/package-lock.json +6893 -0
pgsui/electron/app/package.json +50 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +146 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +130 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +59 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
pgsui/impute/deterministic/imputers/mode.py +679 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +971 -0
pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
pgsui/impute/supervised/base.py +339 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
pgsui/impute/supervised/imputers/random_forest.py +287 -0
pgsui/impute/unsupervised/base.py +924 -0
pgsui/impute/unsupervised/callbacks.py +89 -263
pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
pgsui/impute/unsupervised/imputers/vae.py +957 -0
pgsui/impute/unsupervised/loss_functions.py +158 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
pgsui/impute/unsupervised/models/vae_model.py +259 -618
pgsui/impute/unsupervised/nn_scorers.py +215 -0
pgsui/utils/classification_viz.py +591 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +514 -824
pgsui/utils/scorers.py +212 -438
pg_sui-1.0.2.1.dist-info/RECORD +0 -75
pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -735
pgsui/impute/impute.py +0 -1486
pgsui/impute/simple_imputers.py +0 -1439
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
pgsui/impute/unsupervised/keras_classifiers.py +0 -702
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -297
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -214
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
/pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
/pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0

pgsui/impute/unsupervised/models/in_development/cnn_model.py DELETED Viewed

@@ -1,486 +0,0 @@
-import logging
-import os
-import sys
-import warnings
-import math
-# Import tensorflow with reduced warnings.
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-logging.getLogger("tensorflow").disabled = True
-warnings.filterwarnings("ignore", category=UserWarning)
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-# Disable can't find cuda .dll errors. Also turns of GPU support.
-tf.config.set_visible_devices([], "GPU")
-from tensorflow.python.util import deprecation
-# Disable warnings and info logs.
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-tf.get_logger().setLevel(logging.ERROR)
-# Monkey patching deprecation utils to supress warnings.
-# noinspection PyUnusedLocal
-def deprecated(
-    date, instructions, warn_once=True
-):  # pylint: disable=unused-argument
-    def deprecated_wrapper(func):
-        return func
-    return deprecated_wrapper
-deprecation.deprecated = deprecated
-from tensorflow.keras.layers import (
-    Dropout,
-    Dense,
-    Reshape,
-    Activation,
-    Flatten,
-    BatchNormalization,
-    LeakyReLU,
-    PReLU,
-)
-from tensorflow.keras.regularizers import l1_l2
-# Custom Modules
-try:
-    from ...neural_network_methods import NeuralNetworkMethods
-except (ModuleNotFoundError, ValueError, ImportError):
-    from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
-class SoftOrdering1DCNN(tf.keras.Model):
-    def __init__(
-        self,
-        y=None,
-        output_shape=None,
-        weights_initializer="glorot_normal",
-        hidden_layer_sizes="midpoint",
-        num_hidden_layers=1,
-        hidden_activation="elu",
-        l1_penalty=1e-6,
-        l2_penalty=1e-6,
-        dropout_rate=0.2,
-        num_classes=4,
-        sample_weight=None,
-        batch_size=32,
-        missing_mask=None,
-        activation=None,
-        channel_increase_rate=2,
-        initial_hidden_size=2048,
-        num_groups=256,
-    ):
-        super(SoftOrdering1DCNN, self).__init__()
-        self._y = y
-        self._missing_mask = missing_mask
-        self._sample_weight = sample_weight
-        self._batch_idx = 0
-        self._batch_size = batch_size
-        self.output_activation = activation
-        self.sample_weight = sample_weight
-        self.nn_ = NeuralNetworkMethods()
-        self.binary_accuracy = self.nn_.make_masked_binary_accuracy(
-            is_vae=True
-        )
-        self.total_loss_tracker = tf.keras.metrics.Mean(name="loss")
-        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
-            name="reconstruction_loss"
-        )
-        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")
-        self.accuracy_tracker = tf.keras.metrics.Mean(name="accuracy")
-        # y_train[1] dimension.
-        self.n_features = output_shape * num_classes
-        self.weights_initializer = weights_initializer
-        self.hidden_layer_sizes = hidden_layer_sizes
-        self.num_hidden_layers = num_hidden_layers
-        self.hidden_activation = hidden_activation
-        self.l1_penalty = l1_penalty
-        self.l2_penalty = l2_penalty
-        self.dropout_rate = dropout_rate
-        self.num_classes = num_classes
-        self.channel_increase_rate = channel_increase_rate
-        self.initial_hidden_size = initial_hidden_size
-        self.channel_size1 = num_groups
-        self.channel_size2 = num_groups * 2
-        self.channel_size3 = num_groups * 2
-        nn = NeuralNetworkMethods()
-        # hidden_layer_sizes = nn.validate_hidden_layers(
-        #     self.hidden_layer_sizes, self.num_hidden_layers
-        # )
-        # hidden_layer_sizes = nn.get_hidden_layer_sizes(
-        #     self.n_features, self.n_components, hidden_layer_sizes, vae=True
-        # )
-        # hidden_layer_sizes = [h * self.num_classes for h in hidden_layer_sizes]
-        if self.l1_penalty == 0.0 and self.l2_penalty == 0.0:
-            kernel_regularizer = None
-        else:
-            kernel_regularizer = l1_l2(self.l1_penalty, self.l2_penalty)
-        kernel_initializer = self.weights_initializer
-        if self.hidden_activation.lower() == "leaky_relu":
-            activation = LeakyReLU(alpha=0.01)
-        elif self.hidden_activation.lower() == "prelu":
-            activation = PReLU()
-        elif self.hidden_activation.lower() == "selu":
-            activation = "selu"
-            kernel_initializer = "lecun_normal"
-        else:
-            activation = self.hidden_activation
-        if num_hidden_layers > 5:
-            raise ValueError(
-                f"The maximum number of hidden layers is 5, but got "
-                f"{num_hidden_layers}"
-            )
-        hidden_size = initial_hidden_size
-        if self.n_features >= hidden_size:
-            scaling_factor = int(math.ceil(self.n_features / hidden_size)) * 2
-            hidden_size *= num_groups * int(
-                math.ceil((scaling_factor / num_groups))
-            )
-        else:
-            # If hidden_size is close in number to n_features
-            if abs(hidden_size - self.n_features) <= (hidden_size // 2):
-                hidden_size *= 2
-        # Model adapted from: https://medium.com/spikelab/convolutional-neural-networks-on-tabular-datasets-part-1-4abdd67795b6
-        signal_size1 = hidden_size // num_groups
-        signal_size2 = signal_size1 // 2
-        signal_size3 = signal_size1 // 4 * self.channel_size3
-        self.signal_size1 = signal_size1
-        self.signal_size2 = signal_size2
-        self.signal_size3 = signal_size3
-        self.batch_norm1 = BatchNormalization()
-        self.dropout1 = Dropout(self.dropout_rate)
-        self.dense1 = Dense(
-            hidden_size,
-            input_shape=(self.n_features,),
-            activation=hidden_activation,
-            kernel_initializer=kernel_initializer,
-        )
-        self.rshp = Reshape((num_groups, signal_size1))
-        self.batch_norm_c1 = BatchNormalization()
-        self.conv1 = tf.keras.layers.Conv1D(
-            self.channel_size1 * self.channel_increase_rate,
-            kernel_size=5,
-            stride=1,
-            padding=2,
-            groups=signal_size1,
-            kernel_initializer=kernel_initializer,
-            activation=hidden_activation,
-        )
-        self.avg_po_c1 = tf.keras.layers.AveragePooling1D(
-            pool_size=4, padding="valid"
-        )
-        self.batch_norm_c2 = BatchNormalization()
-        self.dropout_c2 = Dropout(self.dropout_rate)
-        self.conv2 = tf.keras.layers.Conv1D(
-            self.channel_size2,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            kernel_initializer=kernel_initializer,
-            activation=hidden_activation,
-        )
-        self.batch_norm_c3 = BatchNormalization()
-        self.dropout_c3 = Dropout(self.dropout_rate)
-        self.conv3 = tf.keras.layers.Conv1D(
-            self.channel_size2,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            kernel_initializer=kernel_initializer,
-            activation=hidden_activation,
-        )
-        self.batch_norm_c4 = BatchNormalization()
-        self.dropout_c4 = Dropout(self.dropout_rate)
-        self.conv4 = tf.keras.layers.Conv1D(
-            self.channel_size2,
-            kernel_size=5,
-            stride=1,
-            padding=2,
-            groups=signal_size1,
-            kernel_initializer=kernel_initializer,
-            activation=None,
-        )
-        self.act_c4 = Activation(hidden_activation)
-        self.max_po_c4 = tf.keras.layers.MaxPooling1D(
-            pool_size=4, stride=2, padding=1
-        )
-        self.flatten = Flatten()
-        self.batch_norm2 = BatchNormalization()
-        self.dropout2 = Dropout(self.dropout_rate)
-        self.dense2 = Dense(
-            self.n_features, kernel_initializer=kernel_initializer
-        )
-        self.rshp2 = Reshape((output_shape, num_classes))
-        self.act2 = Activation(activation)
-    def call(self, inputs, training=None):
-        """Call the model on a particular input.
-        Args:
-            input (tf.Tensor): Input tensor. Must be one-hot encoded.
-        Returns:
-            tf.Tensor: Output predictions. Will be one-hot encoded.
-        """
-        x = self.dense1(inputs)
-        x = self.batch_norm1(x, training=training)
-        x = self.dropout1(x, training=training)
-        x = self.rshp(x)
-        x = self.conv1(x)
-        x = self.batch_norm_c1(x, training=training)
-        x = self.avg_po_c1(x)
-        x = self.conv2(x)
-        x = self.batch_norm(x, training=training)
-        x = self.dropout_c2(x, training=training)
-        x_s = x
-        x = self.conv3(x)
-        x = self.batch_norm_c3(x, training=training)
-        x = self.dropout(x, training=training)
-        x = self.conv4(x)
-        x = self.batch_norm_c4(x, training=training)
-        x += x_s
-        x = self.act_c4(x)
-        x = self.max_po_c4(x)
-        x = self.dropout1(x)
-        x = self.rshp(x)
-        x = self.batch_norm_c1(x)
-        x = self.conv1(x)
-        x = self.avg_po_c1(x)
-        x = self.flatten(x)
-        x = self.dense2(x)
-        x = self.batch_norm2(x, training=training)
-        x = self.dropout2(x, training=training)
-        x = self.rshp2(x)
-        return self.act2(x)
-    def model(self):
-        """Here so that mymodel.model().summary() can be called for debugging."""
-        x = tf.keras.Input(shape=(self.n_features * self.num_classes,))
-        return tf.keras.Model(inputs=[x], outputs=self.call(x))
-    def set_model_outputs(self):
-        x = tf.keras.Input(shape=(self.n_features * self.num_classes,))
-        model = tf.keras.Model(inputs=[x], outputs=self.call(x))
-        self.outputs = model.outputs
-    @property
-    def metrics(self):
-        return [
-            self.total_loss_tracker,
-            self.reconstruction_loss_tracker,
-            self.kl_loss_tracker,
-            self.accuracy_tracker,
-        ]
-    @tf.function
-    def train_step(self, data):
-        # if isinstance(data, tuple):
-        #     if len(data) == 2:
-        #         x, y = data
-        #         sample_weight = None
-        #     else:
-        #         x, y, sample_weight = data
-        # else:
-        #     raise TypeError("Target y must be supplied to fit for this model.")
-        # Set in the UBPCallbacks() callback.
-        y = self._y
-        (
-            y,
-            y_true,
-            sample_weight,
-            missing_mask,
-            batch_start,
-            batch_end,
-        ) = self.nn_.prepare_training_batches(
-            y,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-            ubp=False,
-        )
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
-        with tf.GradientTape() as tape:
-            reconstruction = self(tf.convert_to_tensor(y), training=True)
-            y_pred_masked = tf.boolean_mask(
-                reconstruction, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-            )
-            # Returns binary crossentropy loss.
-            loss = self.compiled_loss(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
-        grads = tape.gradient(loss, self.trainable_weights)
-        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
-        self.total_loss_tracker.update_state(loss)
-        ### NOTE: If you get the error, "'tuple' object has no attribute
-        ### 'rank', then convert y_true to a tensor object."
-        # self.compiled_metrics.update_state(
-        self.accuracy_tracker.update_state(
-            self.binary_accuracy(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
-        )
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "accuracy": self.accuracy_tracker.result(),
-        }
-    @tf.function
-    def test_step(self, data):
-        if isinstance(data, tuple):
-            if len(data) == 2:
-                x, y = data
-                sample_weight = None
-            else:
-                x, y, sample_weight = data
-        else:
-            raise TypeError("Target y must be supplied to fit in this model.")
-        if sample_weight is not None:
-            sample_weight_masked = tf.boolean_mask(
-                tf.convert_to_tensor(sample_weight),
-                tf.reduce_any(tf.not_equal(y, -1), axis=2),
-            )
-        else:
-            sample_weight_masked = None
-        reconstruction, z_mean, z_log_var, z = self(x, training=False)
-        reconstruction_loss = self.compiled_loss(
-            y,
-            reconstruction,
-            sample_weight=sample_weight_masked,
-        )
-        # Includes KL Divergence Loss.
-        regularization_loss = sum(self.losses)
-        total_loss = reconstruction_loss + regularization_loss
-        self.accuracy_tracker.update_state(
-            self.cateogrical_accuracy(
-                y,
-                reconstruction,
-                sample_weight=sample_weight_masked,
-            )
-        )
-        self.total_loss_tracker.update_state(total_loss)
-        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
-        self.kl_loss_tracker.update_state(regularization_loss)
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
-            "kl_loss": self.kl_loss_tracker.result(),
-            "accuracy": self.accuracy_tracker.result(),
-        }
-    @property
-    def batch_size(self):
-        """Batch (=step) size per epoch."""
-        return self._batch_size
-    @property
-    def batch_idx(self):
-        """Current batch (=step) index."""
-        return self._batch_idx
-    @property
-    def y(self):
-        return self._y
-    @property
-    def missing_mask(self):
-        return self._missing_mask
-    @property
-    def sample_weight(self):
-        return self._sample_weight
-    @batch_size.setter
-    def batch_size(self, value):
-        """Set batch_size parameter."""
-        self._batch_size = int(value)
-    @batch_idx.setter
-    def batch_idx(self, value):
-        """Set current batch (=step) index."""
-        self._batch_idx = int(value)
-    @y.setter
-    def y(self, value):
-        """Set y after each epoch."""
-        self._y = value
-    @missing_mask.setter
-    def missing_mask(self, value):
-        """Set y after each epoch."""
-        self._missing_mask = value
-    @sample_weight.setter
-    def sample_weight(self, value):
-        self._sample_weight = value

pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

Potentially problematic release.

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl