PyPI - pg-sui - Versions diffs - 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl - Mend

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show

{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
pg_sui-1.6.8.dist-info/RECORD +78 -0
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
pg_sui-1.6.8.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +635 -0
pgsui/data_processing/config.py +576 -0
pgsui/data_processing/containers.py +1782 -0
pgsui/data_processing/transformers.py +121 -1103
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +189 -0
pgsui/electron/app/package-lock.json +6893 -0
pgsui/electron/app/package.json +50 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +146 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +130 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +59 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
pgsui/impute/deterministic/imputers/mode.py +679 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +971 -0
pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
pgsui/impute/supervised/base.py +339 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
pgsui/impute/supervised/imputers/random_forest.py +287 -0
pgsui/impute/unsupervised/base.py +924 -0
pgsui/impute/unsupervised/callbacks.py +89 -263
pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
pgsui/impute/unsupervised/imputers/vae.py +957 -0
pgsui/impute/unsupervised/loss_functions.py +158 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
pgsui/impute/unsupervised/models/vae_model.py +259 -618
pgsui/impute/unsupervised/nn_scorers.py +215 -0
pgsui/utils/classification_viz.py +591 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +514 -824
pgsui/utils/scorers.py +212 -438
pg_sui-1.0.2.1.dist-info/RECORD +0 -75
pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -735
pgsui/impute/impute.py +0 -1486
pgsui/impute/simple_imputers.py +0 -1439
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
pgsui/impute/unsupervised/keras_classifiers.py +0 -702
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -297
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -214
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
/pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
/pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0

pgsui/impute/unsupervised/callbacks.py CHANGED Viewed

@@ -1,286 +1,112 @@
-import math
-import sys
+from snpio.utils.logging import LoggerManager
-import numpy as np
-import tensorflow as tf
+class EarlyStopping:
+    """Class to stop the training when a monitored metric has stopped improving.
-class CyclicalAnnealingCallback(tf.keras.callbacks.Callback):
-    """Perform cyclical annealing with KL Divergence weights.
-    The dynamically changing weight (beta) is multiplied with the KL Divergence loss.
-    This process is supposed to improve the latent distribution sampling for the variational autoencoder model and eliminate the KL vanishing issue.
-    Three types of cycle curves can be used that determine how the weight increases: 'linear', 'sigmoid', and 'cosine'..
-    Code is adapted from: https://github.com/haofuml/cyclical_annealing
-    The cyclical annealing process was first described in the following paper: https://aclanthology.org/N19-1021.pdf
-    Args:
-        n_iter (int): Number of iterations (epochs) being used in training.
-        start (float, optional): Where to start cycles. Defaults to 0.0.
-        stop (float, optional): Where to stop cycles. Defaults to 1.0.
-        n_cycle (int, optional): How many cycles to use across all the epochs. Defaults to 4.
-        ratio (float, optional): Ratio to determine proportion used to increase beta. Defaults to 0.5.
-        schedule_type (str, optional): Type of curve to use for scheduler. Possible options include: 'linear', 'sigmoid', or 'cosine'. Defaults to 'linear'.
+    This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered.
+    Example:
+        >>> early_stopping = EarlyStopping(patience=25, verbose=1, min_epochs=100)
+        >>> for epoch in range(1, 1001):
+        >>>     val_loss = train_epoch(...)
+        >>>     early_stopping(val_loss, model)
+        >>>     if early_stopping.early_stop:
+        >>>         break
     """
     def __init__(
         self,
-        n_iter,
-        start=0.0,
-        stop=1.0,
-        n_cycle=4,
-        ratio=0.5,
-        schedule_type="linear",
+        patience: int = 25,
+        delta: float = 0.0,
+        verbose: int = 0,
+        mode: str = "min",
+        min_epochs: int = 100,
+        prefix: str = "pgsui_output",
+        debug: bool = False,
     ):
-        self.n_iter = n_iter
-        self.start = start
-        self.stop = stop
-        self.n_cycle = n_cycle
-        self.ratio = ratio
-        self.schedule_type = schedule_type
-        self.arr = None
+        """Early stopping callback for PyTorch training.
-    def on_train_begin(self, logs=None):
-        """Executes on training begin.
+        This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered. The `mode` parameter can be set to "min" or "max" to indicate whether the metric should be minimized or maximized, respectively.
-        Here, the cycle curve is generated and stored as a class variable.
+        Args:
+            patience (int): Number of epochs to wait after the last time the monitored metric improved.
+            delta (float): Minimum change in the monitored metric to qualify as an improvement.
+            verbose (int): Verbosity level (0 = silent, 1 = improvement messages, 2+ = more).
+            mode (str): "min" or "max" to indicate how improvement is defined.
+            prefix (str): Prefix for directory naming.
+            output_dir (Path): Directory in which to create subfolders/checkpoints.
+            min_epochs (int): Minimum epoch count before early stopping can take effect.
+            debug (bool): Debug mode for logging messages
+        Raises:
+            ValueError: If an invalid mode is provided. Must be "min" or "max".
         """
-        if self.schedule_type == "linear":
-            cycle_func = self._linear_cycle_range
-        elif self.schedule_type == "sigmoid":
-            cycle_func = self._sigmoid_cycle_range
-        elif self.schedule_type == "cosine":
-            cycle_func = self._cosine_cycle_range
+        self.patience = patience
+        self.delta = delta
+        self.verbose = verbose >= 2 or debug
+        self.debug = debug
+        self.mode = mode
+        self.counter = 0
+        self.epoch_count = 0
+        self.best_score = None
+        self.early_stop = False
+        self.best_model = None
+        self.min_epochs = min_epochs
+        is_verbose = verbose >= 2 or debug
+        logman = LoggerManager(name=__name__, prefix=prefix, verbose=is_verbose)
+        self.logger = logman.get_logger()
+        # Define the comparison function for the monitored metric
+        if mode == "min":
+            self.monitor = lambda current, best: current < best - self.delta
+        elif mode == "max":
+            self.monitor = lambda current, best: current > best + self.delta
         else:
-            raise ValueError(
-                f"Invalid schedule_type value provided: {self.schedule_type}"
-            )
+            msg = f"Invalid mode provided: '{mode}'. Use 'min' or 'max'."
+            self.logger.error(msg)
+            raise ValueError(msg)
-        self.arr = cycle_func()
-    def on_epoch_begin(self, epoch, logs=None):
-        """Executes each time an epoch begins.
-        Here, the new kl_beta weight is set.
+    def __call__(self, score, model):
+        """Checks if early stopping condition is met and checkpoints model accordingly.
         Args:
-            epoch (int): Current epoch iteration.
-            logs (None, optional): For compatibility. Not used. Defaults to None.
-        """
-        idx = epoch - 1
-        new_weight = self.arr[idx]
-        tf.keras.backend.set_value(self.model.kl_beta, new_weight)
-    def _linear_cycle_range(self):
-        """Get an array with a linear cycle curve ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Linear cycle range.
-        """
-        L = np.ones(self.n_iter) * self.stop
-        period = self.n_iter / self.n_cycle
-        # Linear schedule
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # linear schedule
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
-            while v <= self.stop and (int(i + c * period) < self.n_iter):
-                L[int(i + c * period)] = v
-                v += step
-                i += 1
-        return L
-    def _sigmoid_cycle_range(self):
-        """Get sigmoidal curve cycle ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Sigmoidal cycle range.
+            score (float): The current metric value (e.g., validation loss/accuracy).
+            model (torch.nn.Module): The model being trained.
         """
-        L = np.ones(self.n_iter)
-        period = self.n_iter / self.n_cycle
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # step is in [0,1]
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
+        # Increment the epoch count each time we call this function
+        self.epoch_count += 1
+        # If this is the first epoch, initialize best_score and save model
+        if self.best_score is None:
+            self.best_score = score
+            return
+        # Check if there is improvement
+        if self.monitor(score, self.best_score):
+            # If improved, reset counter and update the best score/model
+            self.best_score = score
+            self.best_model = model
+            self.counter = 0
+        else:
+            # No improvement: increase counter
+            self.counter += 1
-            while v <= self.stop:
-                L[int(i + c * period)] = 1.0 / (
-                    1.0 + np.exp(-(v * 12.0 - 6.0))
+            if self.verbose:
+                self.logger.info(
+                    f"EarlyStopping counter: {self.counter}/{self.patience}"
                 )
-                v += step
-                i += 1
-        return L
-    def _cosine_cycle_range(self):
-        """Get cosine curve cycle ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Cosine cycle range.
-        """
-        L = np.ones(self.n_iter)
-        period = self.n_iter / self.n_cycle
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # step is in [0,1]
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
-            while v <= self.stop:
-                L[int(i + c * period)] = 0.5 - 0.5 * math.cos(v * math.pi)
-                v += step
-                i += 1
-        return L
-class VAECallbacks(tf.keras.callbacks.Callback):
-    """Custom callbacks to use with subclassed VAE Keras model.
-    Requires y, missing_mask, and sample_weight to be input variables to be properties with setters in the subclassed model.
-    """
+            # Now check if we surpass patience AND have reached min_epochs
+            if self.counter >= self.patience and self.epoch_count >= self.min_epochs:
-    def __init__(self):
-        self.indices = None
+                if self.best_model is None:
+                    self.best_model = model
-    def on_epoch_begin(self, epoch, logs=None):
-        """Shuffle input and target at start of epoch."""
-        y = self.model.y.copy()
-        missing_mask = self.model.missing_mask
-        sample_weight = self.model.sample_weight
-        n_samples = len(y)
-        self.indices = np.arange(n_samples)
-        np.random.shuffle(self.indices)
-        self.model.y = y[self.indices]
-        self.model.missing_mask = missing_mask[self.indices]
-        if sample_weight is not None:
-            self.model.sample_weight = sample_weight[self.indices]
-    def on_train_batch_begin(self, batch, logs=None):
-        """Get batch index."""
-        self.model.batch_idx = batch
-    def on_epoch_end(self, epoch, logs=None):
-        """Unsort the row indices."""
-        unshuffled = np.argsort(self.indices)
-        self.model.y = self.model.y[unshuffled]
-        self.model.missing_mask = self.model.missing_mask[unshuffled]
-        if self.model.sample_weight is not None:
-            self.model.sample_weight = self.model.sample_weight[unshuffled]
-class UBPCallbacks(tf.keras.callbacks.Callback):
-    """Custom callbacks to use with subclassed NLPCA/ UBP Keras models.
-    Requires y, missing_mask, V_latent, and sample_weight to be input variables to be properties with setters in the subclassed model.
-    """
-    def __init__(self):
-        self.indices = None
-    def on_epoch_begin(self, epoch, logs=None):
-        """Shuffle input and target at start of epoch."""
-        y = self.model.y.copy()
-        missing_mask = self.model.missing_mask
-        sample_weight = self.model.sample_weight
-        n_samples = len(y)
-        self.indices = np.arange(n_samples)
-        np.random.shuffle(self.indices)
-        self.model.y = y[self.indices]
-        self.model.V_latent = self.model.V_latent[self.indices]
-        self.model.missing_mask = missing_mask[self.indices]
-        if sample_weight is not None:
-            self.model.sample_weight = sample_weight[self.indices]
-    def on_train_batch_begin(self, batch, logs=None):
-        """Get batch index."""
-        self.model.batch_idx = batch
-    def on_epoch_end(self, epoch, logs=None):
-        """Unsort the row indices."""
-        unshuffled = np.argsort(self.indices)
-        self.model.y = self.model.y[unshuffled]
-        self.model.V_latent = self.model.V_latent[unshuffled]
-        self.model.missing_mask = self.model.missing_mask[unshuffled]
-        if self.model.sample_weight is not None:
-            self.model.sample_weight = self.model.sample_weight[unshuffled]
-class UBPEarlyStopping(tf.keras.callbacks.Callback):
-    """Stop training when the loss is at its min, i.e. the loss stops decreasing.
-    Args:
-        patience (int, optional): Number of epochs to wait after min has been hit. After this
-        number of no improvement, training stops. Defaults to 0.
-        phase (int, optional): Current UBP Phase. Defaults to 3.
-    """
-    def __init__(self, patience=0, phase=3):
-        super(UBPEarlyStopping, self).__init__()
-        self.patience = patience
-        self.phase = phase
-        # best_weights to store the weights at which the minimum loss occurs.
-        self.best_weights = None
-        # In UBP, the input gets refined during training.
-        # So we have to revert it too.
-        self.best_input = None
-    def on_train_begin(self, logs=None):
-        # The number of epoch it has waited when loss is no longer minimum.
-        self.wait = 0
-        # The epoch the training stops at.
-        self.stopped_epoch = 0
-        # Initialize the best as infinity.
-        self.best = np.Inf
-    def on_epoch_end(self, epoch, logs=None):
-        current = logs.get("loss")
-        if np.less(current, self.best):
-            self.best = current
-            self.wait = 0
-            # Record the best weights if current results is better (less).
-            self.best_weights = self.model.get_weights()
-            if self.phase != 2:
-                # Only refine input in phase 2.
-                self.best_input = self.model.V_latent
-        else:
-            self.wait += 1
-            if self.wait >= self.patience:
-                self.stopped_epoch = epoch
-                self.model.stop_training = True
-                self.model.set_weights(self.best_weights)
+                self.early_stop = True
-                if self.phase != 2:
-                    self.model.V_latent = self.best_input
+                if self.verbose:
+                    self.logger.info(
+                        f"Early stopping triggered at epoch {self.epoch_count}"
+                    )

pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

Potentially problematic release.

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl