PyPI - pg-sui - Versions diffs - 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl - Mend

pg-sui 0.2.3py3-none-any.whl → 1.6.16a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

pg_sui-1.6.16a3.dist-info/METADATA +292 -0
pg_sui-1.6.16a3.dist-info/RECORD +81 -0
{pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info}/WHEEL +1 -1
pg_sui-1.6.16a3.dist-info/entry_points.txt +4 -0
{pg_sui-0.2.3.dist-info → pg_sui-1.6.16a3.dist-info/licenses}/LICENSE +0 -0
pg_sui-1.6.16a3.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +922 -0
pgsui/data_processing/__init__.py +0 -0
pgsui/data_processing/config.py +565 -0
pgsui/data_processing/containers.py +1436 -0
pgsui/data_processing/transformers.py +557 -907
pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/extra-resources/.gitkeep +1 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +227 -0
pgsui/electron/app/package-lock.json +6894 -0
pgsui/electron/app/package.json +51 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +157 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +131 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +57 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/__init__.py +0 -0
pgsui/example_data/phylip_files/__init__.py +0 -0
pgsui/example_data/phylip_files/test.phy +0 -0
pgsui/example_data/popmaps/__init__.py +0 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/structure_files/__init__.py +0 -0
pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/__init__.py +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
pgsui/impute/deterministic/imputers/mode.py +844 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +973 -0
pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
pgsui/impute/supervised/__init__.py +0 -0
pgsui/impute/supervised/base.py +343 -0
pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
pgsui/impute/supervised/imputers/random_forest.py +291 -0
pgsui/impute/unsupervised/__init__.py +0 -0
pgsui/impute/unsupervised/base.py +1121 -0
pgsui/impute/unsupervised/callbacks.py +92 -262
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
pgsui/impute/unsupervised/imputers/autoencoder.py +1361 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1666 -0
pgsui/impute/unsupervised/imputers/ubp.py +1660 -0
pgsui/impute/unsupervised/imputers/vae.py +1316 -0
pgsui/impute/unsupervised/loss_functions.py +261 -0
pgsui/impute/unsupervised/models/__init__.py +0 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
pgsui/impute/unsupervised/models/vae_model.py +269 -630
pgsui/impute/unsupervised/nn_scorers.py +255 -0
pgsui/utils/__init__.py +0 -0
pgsui/utils/classification_viz.py +608 -0
pgsui/utils/logging_utils.py +22 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +996 -829
pgsui/utils/pretty_metrics.py +290 -0
pgsui/utils/scorers.py +213 -666
pg_sui-0.2.3.dist-info/METADATA +0 -322
pg_sui-0.2.3.dist-info/RECORD +0 -75
pg_sui-0.2.3.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -1268
pgsui/impute/impute.py +0 -1463
pgsui/impute/simple_imputers.py +0 -1431
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
pgsui/impute/unsupervised/keras_classifiers.py +0 -697
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -151
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -185

pgsui/impute/unsupervised/callbacks.py CHANGED Viewed

@@ -1,286 +1,116 @@
-import math
-import sys
+from snpio.utils.logging import LoggerManager
-import numpy as np
-import tensorflow as tf
+from pgsui.utils.logging_utils import configure_logger
-class CyclicalAnnealingCallback(tf.keras.callbacks.Callback):
-    """Perform cyclical annealing with KL Divergence weights.
+class EarlyStopping:
+    """Class to stop the training when a monitored metric has stopped improving.
-    The dynamically changing weight (beta) is multiplied with the KL Divergence loss.
-    This process is supposed to improve the latent distribution sampling for the variational autoencoder model and eliminate the KL vanishing issue.
-    Three types of cycle curves can be used that determine how the weight increases: 'linear', 'sigmoid', and 'cosine'..
-    Code is adapted from: https://github.com/haofuml/cyclical_annealing
-    The cyclical annealing process was first described in the following paper: https://aclanthology.org/N19-1021.pdf
-    Args:
-        n_iter (int): Number of iterations (epochs) being used in training.
-        start (float, optional): Where to start cycles. Defaults to 0.0.
-        stop (float, optional): Where to stop cycles. Defaults to 1.0.
-        n_cycle (int, optional): How many cycles to use across all the epochs. Defaults to 4.
-        ratio (float, optional): Ratio to determine proportion used to increase beta. Defaults to 0.5.
-        schedule_type (str, optional): Type of curve to use for scheduler. Possible options include: 'linear', 'sigmoid', or 'cosine'. Defaults to 'linear'.
+    This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered.
+    Example:
+        >>> early_stopping = EarlyStopping(patience=25, verbose=1, min_epochs=100)
+        >>> for epoch in range(1, 1001):
+        >>>     val_loss = train_epoch(...)
+        >>>     early_stopping(val_loss, model)
+        >>>     if early_stopping.early_stop:
+        >>>         break
     """
     def __init__(
         self,
-        n_iter,
-        start=0.0,
-        stop=1.0,
-        n_cycle=4,
-        ratio=0.5,
-        schedule_type="linear",
+        patience: int = 25,
+        delta: float = 0.0,
+        verbose: int = 0,
+        mode: str = "min",
+        min_epochs: int = 100,
+        prefix: str = "pgsui_output",
+        debug: bool = False,
     ):
-        self.n_iter = n_iter
-        self.start = start
-        self.stop = stop
-        self.n_cycle = n_cycle
-        self.ratio = ratio
-        self.schedule_type = schedule_type
+        """Early stopping callback for PyTorch training.
-        self.arr = None
+        This class is used to stop the training of a model when a monitored metric has stopped improving (such as validation loss or accuracy). If the metric does not improve for `patience` epochs, and we have already passed the `min_epochs` epoch threshold, training is halted. The best model checkpoint is reloaded when early stopping is triggered. The `mode` parameter can be set to "min" or "max" to indicate whether the metric should be minimized or maximized, respectively.
-    def on_train_begin(self, logs=None):
-        """Executes on training begin.
-        Here, the cycle curve is generated and stored as a class variable.
+        Args:
+            patience (int): Number of epochs to wait after the last time the monitored metric improved.
+            delta (float): Minimum change in the monitored metric to qualify as an improvement.
+            verbose (int): Verbosity level (0 = silent, 1 = improvement messages, 2+ = more).
+            mode (str): "min" or "max" to indicate how improvement is defined.
+            prefix (str): Prefix for directory naming.
+            output_dir (Path): Directory in which to create subfolders/checkpoints.
+            min_epochs (int): Minimum epoch count before early stopping can take effect.
+            debug (bool): Debug mode for logging messages
+        Raises:
+            ValueError: If an invalid mode is provided. Must be "min" or "max".
         """
-        if self.schedule_type == "linear":
-            cycle_func = self._linear_cycle_range
-        elif self.schedule_type == "sigmoid":
-            cycle_func = self._sigmoid_cycle_range
-        elif self.schedule_type == "cosine":
-            cycle_func = self._cosine_cycle_range
+        self.patience = patience
+        self.delta = delta
+        self.verbose = verbose >= 2 or debug
+        self.debug = debug
+        self.mode = mode
+        self.counter = 0
+        self.epoch_count = 0
+        self.best_score = float("inf") if mode == "min" else 0.0
+        self.early_stop = False
+        self.best_model = None
+        self.min_epochs = min_epochs
+        is_verbose = verbose >= 2 or debug
+        logman = LoggerManager(name=__name__, prefix=prefix, verbose=is_verbose)
+        self.logger = configure_logger(
+            logman.get_logger(), verbose=is_verbose, debug=debug
+        )
+        # Define the comparison function for the monitored metric
+        if mode == "min":
+            self.monitor = lambda current, best: current < best - self.delta
+        elif mode == "max":
+            self.monitor = lambda current, best: current > best + self.delta
         else:
-            raise ValueError(
-                f"Invalid schedule_type value provided: {self.schedule_type}"
-            )
-        self.arr = cycle_func()
+            msg = f"Invalid mode provided: '{mode}'. Use 'min' or 'max'."
+            self.logger.error(msg)
+            raise ValueError(msg)
-    def on_epoch_begin(self, epoch, logs=None):
-        """Executes each time an epoch begins.
-        Here, the new kl_beta weight is set.
+    def __call__(self, score, model):
+        """Checks if early stopping condition is met and checkpoints model accordingly.
         Args:
-            epoch (int): Current epoch iteration.
-            logs (None, optional): For compatibility. Not used. Defaults to None.
-        """
-        idx = epoch - 1
-        new_weight = self.arr[idx]
-        tf.keras.backend.set_value(self.model.kl_beta, new_weight)
-    def _linear_cycle_range(self):
-        """Get an array with a linear cycle curve ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Linear cycle range.
+            score (float): The current metric value (e.g., validation loss/accuracy).
+            model (torch.nn.Module): The model being trained.
         """
-        L = np.ones(self.n_iter) * self.stop
-        period = self.n_iter / self.n_cycle
-        # Linear schedule
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # linear schedule
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
-            while v <= self.stop and (int(i + c * period) < self.n_iter):
-                L[int(i + c * period)] = v
-                v += step
-                i += 1
-        return L
-    def _sigmoid_cycle_range(self):
-        """Get sigmoidal curve cycle ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Sigmoidal cycle range.
-        """
-        L = np.ones(self.n_iter)
-        period = self.n_iter / self.n_cycle
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # step is in [0,1]
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
+        # Increment the epoch count each time we call this function
+        self.epoch_count += 1
+        # If this is the first epoch, initialize best_score and save model
+        if self.best_score is None:
+            self.best_score = score
+            return
+        # Check if there is improvement
+        if self.monitor(score, self.best_score):
+            # If improved, reset counter and update the best score/model
+            self.best_score = score
+            self.best_model = model
+            self.counter = 0
+        else:
+            # No improvement: increase counter
+            self.counter += 1
-            while v <= self.stop:
-                L[int(i + c * period)] = 1.0 / (
-                    1.0 + np.exp(-(v * 12.0 - 6.0))
+            if self.verbose:
+                self.logger.info(
+                    f"EarlyStopping counter: {self.counter}/{self.patience}"
                 )
-                v += step
-                i += 1
-        return L
-    def _cosine_cycle_range(self):
-        """Get cosine curve cycle ranging from 0 to 1 for n_iter epochs.
-        The amount of time cycling and spent at 1.0 is determined by the ratio variable.
-        Returns:
-            numpy.ndarray: Cosine cycle range.
-        """
-        L = np.ones(self.n_iter)
-        period = self.n_iter / self.n_cycle
-        step = (self.stop - self.start) / (
-            period * self.ratio
-        )  # step is in [0,1]
-        for c in range(self.n_cycle):
-            v, i = self.start, 0
-            while v <= self.stop:
-                L[int(i + c * period)] = 0.5 - 0.5 * math.cos(v * math.pi)
-                v += step
-                i += 1
-        return L
-class VAECallbacks(tf.keras.callbacks.Callback):
-    """Custom callbacks to use with subclassed VAE Keras model.
-    Requires y, missing_mask, and sample_weight to be input variables to be properties with setters in the subclassed model.
-    """
-    def __init__(self):
-        self.indices = None
-    def on_epoch_begin(self, epoch, logs=None):
-        """Shuffle input and target at start of epoch."""
-        y = self.model.y.copy()
-        missing_mask = self.model.missing_mask
-        sample_weight = self.model.sample_weight
-        n_samples = len(y)
-        self.indices = np.arange(n_samples)
-        np.random.shuffle(self.indices)
-        self.model.y = y[self.indices]
-        self.model.missing_mask = missing_mask[self.indices]
-        if sample_weight is not None:
-            self.model.sample_weight = sample_weight[self.indices]
-    def on_train_batch_begin(self, batch, logs=None):
-        """Get batch index."""
-        self.model.batch_idx = batch
-    def on_epoch_end(self, epoch, logs=None):
-        """Unsort the row indices."""
-        unshuffled = np.argsort(self.indices)
-        self.model.y = self.model.y[unshuffled]
-        self.model.missing_mask = self.model.missing_mask[unshuffled]
-        if self.model.sample_weight is not None:
-            self.model.sample_weight = self.model.sample_weight[unshuffled]
-class UBPCallbacks(tf.keras.callbacks.Callback):
-    """Custom callbacks to use with subclassed NLPCA/ UBP Keras models.
-    Requires y, missing_mask, V_latent, and sample_weight to be input variables to be properties with setters in the subclassed model.
-    """
-    def __init__(self):
-        self.indices = None
-    def on_epoch_begin(self, epoch, logs=None):
-        """Shuffle input and target at start of epoch."""
-        y = self.model.y.copy()
-        missing_mask = self.model.missing_mask
-        sample_weight = self.model.sample_weight
-        n_samples = len(y)
-        self.indices = np.arange(n_samples)
-        np.random.shuffle(self.indices)
-        self.model.y = y[self.indices]
-        self.model.V_latent = self.model.V_latent[self.indices]
-        self.model.missing_mask = missing_mask[self.indices]
+            # Now check if we surpass patience AND have reached min_epochs
+            if self.counter >= self.patience and self.epoch_count >= self.min_epochs:
-        if sample_weight is not None:
-            self.model.sample_weight = sample_weight[self.indices]
+                if self.best_model is None:
+                    self.best_model = model
-    def on_train_batch_begin(self, batch, logs=None):
-        """Get batch index."""
-        self.model.batch_idx = batch
-    def on_epoch_end(self, epoch, logs=None):
-        """Unsort the row indices."""
-        unshuffled = np.argsort(self.indices)
-        self.model.y = self.model.y[unshuffled]
-        self.model.V_latent = self.model.V_latent[unshuffled]
-        self.model.missing_mask = self.model.missing_mask[unshuffled]
-        if self.model.sample_weight is not None:
-            self.model.sample_weight = self.model.sample_weight[unshuffled]
-class UBPEarlyStopping(tf.keras.callbacks.Callback):
-    """Stop training when the loss is at its min, i.e. the loss stops decreasing.
-    Args:
-        patience (int, optional): Number of epochs to wait after min has been hit. After this
-        number of no improvement, training stops. Defaults to 0.
-        phase (int, optional): Current UBP Phase. Defaults to 3.
-    """
-    def __init__(self, patience=0, phase=3):
-        super(UBPEarlyStopping, self).__init__()
-        self.patience = patience
-        self.phase = phase
-        # best_weights to store the weights at which the minimum loss occurs.
-        self.best_weights = None
-        # In UBP, the input gets refined during training.
-        # So we have to revert it too.
-        self.best_input = None
-    def on_train_begin(self, logs=None):
-        # The number of epoch it has waited when loss is no longer minimum.
-        self.wait = 0
-        # The epoch the training stops at.
-        self.stopped_epoch = 0
-        # Initialize the best as infinity.
-        self.best = np.Inf
-    def on_epoch_end(self, epoch, logs=None):
-        current = logs.get("loss")
-        if np.less(current, self.best):
-            self.best = current
-            self.wait = 0
-            # Record the best weights if current results is better (less).
-            self.best_weights = self.model.get_weights()
-            if self.phase != 2:
-                # Only refine input in phase 2.
-                self.best_input = self.model.V_latent
-        else:
-            self.wait += 1
-            if self.wait >= self.patience:
-                self.stopped_epoch = epoch
-                self.model.stop_training = True
-                self.model.set_weights(self.best_weights)
+                self.early_stop = True
-                if self.phase != 2:
-                    self.model.V_latent = self.best_input
+                if self.verbose:
+                    self.logger.info(
+                        f"Early stopping triggered at epoch {self.epoch_count}"
+                    )

{simulation → pgsui/impute/unsupervised/imputers}/__init__.py RENAMED Viewed

File without changes

pg-sui 0.2.3__py3-none-any.whl → 1.6.16a3__py3-none-any.whl

pg-sui 0.2.3py3-none-any.whl → 1.6.16a3py3-none-any.whl