PyPI - pg-sui - Versions diffs - 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl - Mend

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pg-sui might be problematic. Click here for more details.

Files changed (112) hide show

{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/METADATA +51 -70
pg_sui-1.6.8.dist-info/RECORD +78 -0
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info}/WHEEL +1 -1
pg_sui-1.6.8.dist-info/entry_points.txt +4 -0
pg_sui-1.6.8.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +635 -0
pgsui/data_processing/config.py +576 -0
pgsui/data_processing/containers.py +1782 -0
pgsui/data_processing/transformers.py +121 -1103
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +189 -0
pgsui/electron/app/package-lock.json +6893 -0
pgsui/electron/app/package.json +50 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +146 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +130 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +59 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +691 -0
pgsui/impute/deterministic/imputers/mode.py +679 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +971 -0
pgsui/impute/deterministic/imputers/ref_allele.py +530 -0
pgsui/impute/supervised/base.py +339 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +293 -0
pgsui/impute/supervised/imputers/random_forest.py +287 -0
pgsui/impute/unsupervised/base.py +924 -0
pgsui/impute/unsupervised/callbacks.py +89 -263
pgsui/impute/unsupervised/imputers/autoencoder.py +972 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1264 -0
pgsui/impute/unsupervised/imputers/ubp.py +1288 -0
pgsui/impute/unsupervised/imputers/vae.py +957 -0
pgsui/impute/unsupervised/loss_functions.py +158 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +208 -558
pgsui/impute/unsupervised/models/nlpca_model.py +149 -468
pgsui/impute/unsupervised/models/ubp_model.py +198 -1317
pgsui/impute/unsupervised/models/vae_model.py +259 -618
pgsui/impute/unsupervised/nn_scorers.py +215 -0
pgsui/utils/classification_viz.py +591 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +514 -824
pgsui/utils/scorers.py +212 -438
pg_sui-1.0.2.1.dist-info/RECORD +0 -75
pg_sui-1.0.2.1.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -735
pgsui/impute/impute.py +0 -1486
pgsui/impute/simple_imputers.py +0 -1439
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -785
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1027
pgsui/impute/unsupervised/keras_classifiers.py +0 -702
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1424
pgsui/impute/unsupervised/neural_network_methods.py +0 -1549
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -297
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -214
{pg_sui-1.0.2.1.dist-info → pg_sui-1.6.8.dist-info/licenses}/LICENSE +0 -0
/pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
/pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0

pgsui/impute/unsupervised/models/autoencoder_model.py CHANGED Viewed

@@ -1,634 +1,284 @@
-import logging
-import os
-import sys
-import warnings
+from typing import List, Literal
-# Import tensorflow with reduced warnings.
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-logging.getLogger("tensorflow").disabled = True
-warnings.filterwarnings("ignore", category=UserWarning)
+import numpy as np
+import torch
+import torch.nn as nn
+from snpio.utils.logging import LoggerManager
-import tensorflow as tf
+from pgsui.impute.unsupervised.loss_functions import MaskedFocalLoss
-# Disable can't find cuda .dll errors. Also turns of GPU support.
-tf.config.set_visible_devices([], "GPU")
-from tensorflow.python.util import deprecation
+class Encoder(nn.Module):
+    """The Encoder module of a standard Autoencoder.
-# Disable warnings and info logs.
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-tf.get_logger().setLevel(logging.ERROR)
-# Monkey patching deprecation utils to supress warnings.
-# noinspection PyUnusedLocal
-def deprecated(
-    date, instructions, warn_once=True
-):  # pylint: disable=unused-argument
-    def deprecated_wrapper(func):
-        return func
-    return deprecated_wrapper
-deprecation.deprecated = deprecated
-from tensorflow.keras.layers import (
-    Dropout,
-    Dense,
-    Reshape,
-    Flatten,
-    LeakyReLU,
-    PReLU,
-    Activation,
-)
-from tensorflow.keras.regularizers import l1_l2
-from tensorflow.keras import backend as K
-# Custom Modules
-try:
-    from ..neural_network_methods import NeuralNetworkMethods
-except (ModuleNotFoundError, ValueError, ImportError):
-    from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
-class Encoder(tf.keras.layers.Layer):
-    """VAE encoder to Encode genotypes to (z_mean, z_log_var, z)."""
+    This module defines the encoder network, which takes high-dimensional input data and maps it to a deterministic, low-dimensional latent representation. The architecture consists of a series of fully-connected hidden layers that progressively compress the flattened input data into a single latent vector, `z`.
+    """
     def __init__(
         self,
-        n_features,
-        num_classes,
-        latent_dim,
-        hidden_layer_sizes,
-        dropout_rate,
-        activation,
-        kernel_initializer,
-        kernel_regularizer,
-        beta=K.variable(0.0),
-        name="Encoder",
-        **kwargs,
+        n_features: int,
+        num_classes: int,
+        latent_dim: int,
+        hidden_layer_sizes: List[int],
+        dropout_rate: float,
+        activation: torch.nn.Module,
     ):
-        super(Encoder, self).__init__(name=name, **kwargs)
+        """Initializes the Encoder module.
-        self.beta = beta
+        This class defines the encoder network, which takes high-dimensional input data and maps it to a deterministic, low-dimensional latent representation. The architecture consists of a series of fully-connected hidden layers that progressively compress the flattened input data into a single latent vector, `z`.
-        self.dense2 = None
-        self.dense3 = None
-        self.dense4 = None
-        self.dense5 = None
+        Args:
+            n_features (int): The number of features in the input data (e.g., SNPs).
+            num_classes (int): The number of possible classes for each input element (e.g., 4 alleles).
+            latent_dim (int): The dimensionality of the output latent space.
+            hidden_layer_sizes (List[int]): A list of integers specifying the size of each hidden layer.
+            dropout_rate (float): The dropout rate for regularization in the hidden layers.
+            activation (torch.nn.Module): An instantiated activation function module (e.g., `nn.ReLU()`) for the hidden layers.
+        """
+        super(Encoder, self).__init__()
+        self.flatten = nn.Flatten()
-        # n_features * num_classes.
-        self.flatten = Flatten()
+        layers = []
+        input_dim = n_features * num_classes
+        for hidden_size in hidden_layer_sizes:
+            layers.append(nn.Linear(input_dim, hidden_size))
+            layers.append(nn.BatchNorm1d(hidden_size))
+            layers.append(nn.Dropout(dropout_rate))
+            layers.append(activation)
+            input_dim = hidden_size
-        self.dense1 = Dense(
-            hidden_layer_sizes[0],
-            input_shape=(n_features * num_classes,),
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Encoder1",
-        )
+        self.hidden_layers = nn.Sequential(*layers)
+        self.dense_z = nn.Linear(input_dim, latent_dim)
-        if len(hidden_layer_sizes) >= 2:
-            self.dense2 = Dense(
-                hidden_layer_sizes[1],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder2",
-            )
-        if len(hidden_layer_sizes) >= 3:
-            self.dense3 = Dense(
-                hidden_layer_sizes[2],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder3",
-            )
-        if len(hidden_layer_sizes) >= 4:
-            self.dense4 = Dense(
-                hidden_layer_sizes[3],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder4",
-            )
-        if len(hidden_layer_sizes) == 5:
-            self.dense5 = Dense(
-                hidden_layer_sizes[4],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder5",
-            )
-        self.dense_latent = Dense(
-            latent_dim,
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Encoder5",
-        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the encoder.
-        self.dropout_layer = Dropout(dropout_rate)
+        Args:
+            x (torch.Tensor): The input data tensor of shape `(batch_size, n_features, num_classes)`.
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.flatten(inputs)
-        x = self.dense1(x)
-        x = self.dropout_layer(x, training=training)
-        if self.dense2 is not None:
-            x = self.dense2(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense3 is not None:
-            x = self.dense3(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense4 is not None:
-            x = self.dense4(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense5 is not None:
-            x = self.dense5(x)
-            x = self.dropout_layer(x, training=training)
+        Returns:
+            torch.Tensor: The latent representation `z` of shape `(batch_size, latent_dim)`.
+        """
+        x = self.flatten(x)
+        x = self.hidden_layers(x)
+        z = self.dense_z(x)
+        return z
-        return self.dense_latent(x)
+class Decoder(nn.Module):
+    """The Decoder module of a standard Autoencoder.
-class Decoder(tf.keras.layers.Layer):
-    """Converts the encoded vector back into the reconstructed output"""
+    This module defines the decoder network, which takes a deterministic latent vector and maps it back to the high-dimensional data space, aiming to reconstruct the original input. The architecture typically mirrors the encoder, consisting of a series of fully-connected hidden layers that progressively expand the representation, followed by a final linear layer to produce the reconstructed data.
+    """
     def __init__(
         self,
-        n_features,
-        num_classes,
-        latent_dim,
-        hidden_layer_sizes,
-        dropout_rate,
-        activation,
-        kernel_initializer,
-        kernel_regularizer,
-        name="Decoder",
-        **kwargs,
-    ):
-        super(Decoder, self).__init__(name=name, **kwargs)
-        self.dense2 = None
-        self.dense3 = None
-        self.dense4 = None
-        self.dense5 = None
-        self.dense1 = Dense(
-            hidden_layer_sizes[0],
-            input_shape=(latent_dim,),
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Decoder1",
-        )
-        if len(hidden_layer_sizes) >= 2:
-            self.dense2 = Dense(
-                hidden_layer_sizes[1],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder2",
-            )
-        if len(hidden_layer_sizes) >= 3:
-            self.dense3 = Dense(
-                hidden_layer_sizes[2],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder3",
-            )
-        if len(hidden_layer_sizes) >= 4:
-            self.dense4 = Dense(
-                hidden_layer_sizes[3],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder4",
-            )
-        if len(hidden_layer_sizes) == 5:
-            self.dense5 = Dense(
-                hidden_layer_sizes[4],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder5",
-            )
-        # No activation for final layer.
-        self.dense_output = Dense(
-            n_features * num_classes,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            activation=None,
-            name="Decoder6",
-        )
-        self.rshp = Reshape((n_features, num_classes))
-        self.dropout_layer = Dropout(dropout_rate)
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.dense1(inputs)
-        x = self.dropout_layer(x, training=training)
-        if self.dense2 is not None:
-            x = self.dense2(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense3 is not None:
-            x = self.dense3(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense4 is not None:
-            x = self.dense4(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense5 is not None:
-            x = self.dense5(x)
-            x = self.dropout_layer(x, training=training)
+        n_features: int,
+        num_classes: int,
+        latent_dim: int,
+        hidden_layer_sizes: List[int],
+        dropout_rate: float,
+        activation: torch.nn.Module,
+    ) -> None:
+        """Initializes the Decoder module.
-        x = self.dense_output(x)
-        return self.rshp(x)
-class AutoEncoderModel(tf.keras.Model):
-    """Standard AutoEncoder model to impute missing data.
-    Args:
-        y (np.ndarray): Full input data.
-        batch_size (int, optional): Batch size to use with model. Defaults to 32.
-        output_shape (int, optional): Number of features in output. Defaults to None.
+        Args:
+            n_features (int): The number of features in the output data (e.g., SNPs).
+            num_classes (int): The number of possible classes for each output element (e.g., 4 alleles).
+            latent_dim (int): The dimensionality of the input latent space.
+            hidden_layer_sizes (List[int]): A list of integers specifying the size of each hidden layer (typically the reverse of the encoder's).
+            dropout_rate (float): The dropout rate for regularization in the hidden layers.
+            activation (torch.nn.Module): An instantiated activation function module (e.g., `nn.ReLU()`) for the hidden layers.
+        """
+        super(Decoder, self).__init__()
-        n_components (int, optional): Number of principal components to encode. Defaults to 3.
+        layers = []
+        input_dim = latent_dim
+        for hidden_size in hidden_layer_sizes:
+            layers.append(nn.Linear(input_dim, hidden_size))
+            layers.append(nn.BatchNorm1d(hidden_size))
+            layers.append(nn.Dropout(dropout_rate))
+            layers.append(activation)
+            input_dim = hidden_size
-        weights_initializer (str, optional): tf.keras function to use with initial weights. Defaults to 'glorot_normal'.
+        self.hidden_layers = nn.Sequential(*layers)
+        output_dim = n_features * num_classes
+        self.dense_output = nn.Linear(input_dim, output_dim)
+        self.reshape = (n_features, num_classes)
-        hidden_layer_sizes (str, List[int], or int, optional): Number of nodes to use in hidden layers. If List[int] is provided, must be equal in length to the number of hidden layers. If a string is provided, a calculation will be performed to automatically estimate the hidden layer sizes, with possible options including {'midpoint' or 'sqrt'}. If an integer is provided, then the provided integer will be used for all hidden layers. Defaults to 'midpoint'.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the decoder.
-        num_hidden_layers (int, optional): Number of hidden layers to use in model construction. Maximum number of layers is 5. Defaults to 1.
+        Args:
+            x (torch.Tensor): The input latent tensor of shape `(batch_size, latent_dim)`.
-        hidden_activation (str, optional): Hidden activation function to use in hidden layers. Possible options include: {"elu", "relu", "selu", "leaky_relu", and "prelu"}. Defaults to "elu".
+        Returns:
+            torch.Tensor: The reconstructed output data of shape `(batch_size, n_features, num_classes)`.
+        """
+        x = self.hidden_layers(x)
+        x = self.dense_output(x)
+        return x.view(-1, *self.reshape)
-        l1_penalty (float, optional): l1_penalty to use for regularization. Defaults to 1e-6.
-        l2_penalty (float, optional): l2_penalty to use fo regularization. Defaults to 1e-6.
+class AutoencoderModel(nn.Module):
+    """A standard Autoencoder (AE) model for imputation.
-        dropout_rate (float, optional): Dropout rate to use for Dropout() layer. Defaults to 0.2.
+    This class combines an `Encoder` and a `Decoder` to form a standard autoencoder. The model is trained to learn a compressed, low-dimensional representation of the input data and then reconstruct it as accurately as possible. It is particularly useful for unsupervised dimensionality reduction and data imputation.
-        sample_weight (numpy.ndarray, optional): Sample weight matrix for weighting class imbalance. Should be of shape (n_samples, n_features). Defaults to None.
+    **Model Architecture and Objective:**
-        num_classes (int, optional): Number of classes in multiclass predictions. Defaults to 3.
+    The autoencoder consists of two parts: an encoder, $f_{\theta}$, and a decoder, $g_{\phi}$.
+        1.  The **encoder** maps the input data $x$ to a latent representation $z$:
+            $$
+            z = f_{\theta}(x)
+            $$
+        2.  The **decoder** reconstructs the data $\hat{x}$ from the latent representation:
+            $$
+            \hat{x} = g_{\phi}(z)
+            $$
-    Raises:
-        ValueError: Maximum number of hidden layers (5) was exceeded.
+    The model is trained by minimizing a reconstruction loss, $L(x, \hat{x})$, which measures the dissimilarity between the original input and the reconstructed output. This implementation uses a `MaskedFocalLoss` to handle missing values and class imbalance effectively.
     """
     def __init__(
         self,
-        y,
-        batch_size=32,
-        output_shape=None,
-        n_components=3,
-        weights_initializer="glorot_normal",
-        hidden_layer_sizes="midpoint",
-        num_hidden_layers=1,
-        hidden_activation="elu",
-        l1_penalty=1e-6,
-        l2_penalty=1e-6,
-        dropout_rate=0.2,
-        sample_weight=None,
-        missing_mask=None,
-        num_classes=3,
+        n_features: int,
+        prefix: str,
+        *,
+        num_classes: int = 4,
+        hidden_layer_sizes: List[int] | np.ndarray = [128, 64],
+        latent_dim: int = 2,
+        dropout_rate: float = 0.2,
+        activation: Literal["relu", "elu", "selu", "leaky_relu"] = "relu",
+        gamma: float = 2.0,
+        device: Literal["cpu", "gpu", "mps"] = "cpu",
+        verbose: bool = False,
+        debug: bool = False,
     ):
-        super(AutoEncoderModel, self).__init__()
-        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
-        self.binary_accuracy_tracker = tf.keras.metrics.Mean(
-            name="binary_accuracy"
-        )
+        """Initializes the AutoencoderModel.
-        self.nn_ = NeuralNetworkMethods()
-        self._y = y
-        self._batch_idx = 0
-        self._batch_size = batch_size
-        self._sample_weight = sample_weight
-        self._missing_mask = missing_mask
-        # y_train[1] dimension.
-        self.n_features = output_shape
-        n_features = self.n_features
-        self.n_components = n_components
-        self.weights_initializer = weights_initializer
-        self.hidden_layer_sizes = hidden_layer_sizes
-        self.num_hidden_layers = num_hidden_layers
-        self.hidden_activation = hidden_activation
-        self.l1_penalty = l1_penalty
-        self.l2_penalty = l2_penalty
-        self.dropout_rate = dropout_rate
-        self.sample_weight = sample_weight
+        Args:
+            n_features (int): The number of features in the input data (e.g., SNPs).
+            prefix (str): A prefix used for logging.
+            num_classes (int): The number of possible classes for each input element. Defaults to 4.
+            hidden_layer_sizes (List[int] | np.ndarray): A list of integers specifying the size of each hidden layer in the encoder. The decoder will use the reverse of this structure. Defaults to [128, 64].
+            latent_dim (int): The dimensionality of the latent space (bottleneck). Defaults to 2.
+            dropout_rate (float): The dropout rate for regularization in hidden layers. Defaults to 0.2.
+            activation (Literal["relu", "elu", "selu", "leaky_relu"]): The name of the activation function for hidden layers. Defaults to "relu".
+            gamma (float): The focusing parameter for the focal loss function. Defaults to 2.0.
+            device (Literal["cpu", "gpu", "mps"]): The device to run the model on.
+            verbose (bool): If True, enables detailed logging.
+            debug (bool): If True, enables debug mode.
+        """
+        super(AutoencoderModel, self).__init__()
         self.num_classes = num_classes
+        self.gamma = gamma
+        self.device = device
-        nn = NeuralNetworkMethods()
-        hidden_layer_sizes = nn.validate_hidden_layers(
-            self.hidden_layer_sizes, self.num_hidden_layers
+        logman = LoggerManager(
+            name=__name__, prefix=prefix, verbose=verbose, debug=debug
         )
+        self.logger = logman.get_logger()
-        hidden_layer_sizes = nn.get_hidden_layer_sizes(
-            n_features, self.n_components, hidden_layer_sizes, vae=True
-        )
-        hidden_layer_sizes = [h * self.num_classes for h in hidden_layer_sizes]
-        if self.l1_penalty == 0.0 and self.l2_penalty == 0.0:
-            kernel_regularizer = None
-        else:
-            kernel_regularizer = l1_l2(self.l1_penalty, self.l2_penalty)
-        kernel_initializer = self.weights_initializer
-        if self.hidden_activation.lower() == "leaky_relu":
-            activation = LeakyReLU(alpha=0.01)
-        elif self.hidden_activation.lower() == "prelu":
-            activation = PReLU()
-        elif self.hidden_activation.lower() == "selu":
-            activation = "selu"
-            kernel_initializer = "lecun_normal"
-        else:
-            activation = self.hidden_activation
-        if num_hidden_layers > 5:
-            raise ValueError(
-                f"The maximum number of hidden layers is 5, but got "
-                f"{num_hidden_layers}"
-            )
+        activation_module = self._resolve_activation(activation)
         self.encoder = Encoder(
             n_features,
             self.num_classes,
-            self.n_components,
+            latent_dim,
             hidden_layer_sizes,
-            self.dropout_rate,
-            activation,
-            kernel_initializer,
-            kernel_regularizer,
+            dropout_rate,
+            activation_module,
         )
-        hidden_layer_sizes.reverse()
+        decoder_layer_sizes = list(reversed(hidden_layer_sizes))
         self.decoder = Decoder(
             n_features,
             self.num_classes,
-            self.n_components,
-            hidden_layer_sizes,
-            self.dropout_rate,
-            activation,
-            kernel_initializer,
-            kernel_regularizer,
-        )
-        self.activation = Activation("sigmoid")
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.encoder(inputs)
-        x = self.decoder(x)
-        return self.activation(x)
-    def model(self):
-        """To allow model.summary().summar() to be called."""
-        x = tf.keras.Input(shape=(self.n_features, self.num_classes))
-        return tf.keras.Model(inputs=[x], outputs=self.call(x))
-    def set_model_outputs(self):
-        """Set expected model outputs."""
-        x = tf.keras.Input(shape=(self.n_features, self.num_classes))
-        model = tf.keras.Model(inputs=[x], outputs=self.call(x))
-        self.outputs = model.outputs
-    @property
-    def metrics(self):
-        return [
-            self.total_loss_tracker,
-            self.binary_accuracy_tracker,
-        ]
-    @tf.function
-    def train_step(self, data):
-        y = self._y
-        (
-            y_true,
-            sample_weight,
-            missing_mask,
-        ) = self.nn_.prepare_training_batches(
-            y,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-            ubp=False,
-        )
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
-        with tf.GradientTape() as tape:
-            reconstruction = self(y_true, training=True)
-            y_pred_masked = tf.boolean_mask(
-                reconstruction, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-            )
-            # Returns binary crossentropy loss.
-            reconstruction_loss = self.compiled_loss(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
-            regularization_loss = sum(self.losses)
-            total_loss = reconstruction_loss + regularization_loss
-        grads = tape.gradient(total_loss, self.trainable_weights)
-        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
-        ### NOTE: If you get the error, "'tuple' object has no attribute
-        ### 'rank', then convert y_true to a tensor object."
-        self.total_loss_tracker.update_state(total_loss)
-        self.binary_accuracy_tracker.update_state(
-            tf.keras.metrics.binary_accuracy(y_true_masked, y_pred_masked)
+            latent_dim,
+            decoder_layer_sizes,
+            dropout_rate,
+            activation_module,
         )
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "binary_accuracy": self.binary_accuracy_tracker.result(),
-        }
-    @tf.function
-    def test_step(self, data):
-        """Custom evaluation loop for one step (=batch) in a single epoch.
-        This function will evaluate on a batch of samples (rows), which can be adjusted with the ``batch_size`` parameter from the estimator.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the full Autoencoder model.
         Args:
-            data (Tuple[tf.EagerTensor, tf.EagerTensor]): Input tensorflow tensors of shape (batch_size, n_components) and (batch_size, n_features, num_classes).
+            x (torch.Tensor): The input data tensor of shape `(batch_size, n_features, num_classes)`.
         Returns:
-            Dict[str, float]: History object that gets returned from fit(). Contains the loss and any metrics specified in compile().
+            torch.Tensor: The reconstructed data tensor.
         """
-        y = self._y
-        (
-            y_true,
-            sample_weight,
-            missing_mask,
-        ) = self.nn_.prepare_training_batches(
-            y,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-            ubp=False,
-        )
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
+        z = self.encoder(x)
+        reconstruction = self.decoder(z)
+        return reconstruction
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
-        reconstruction = self(y_true, training=False)
-        y_pred_masked = tf.boolean_mask(
-            reconstruction, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-        )
-        reconstruction_loss = self.compiled_loss(
-            y_true_masked,
-            y_pred_masked,
-            sample_weight=sample_weight_masked,
-        )
-        regularization_loss = sum(self.losses)
-        total_loss = reconstruction_loss + regularization_loss
+    def compute_loss(
+        self,
+        reconstruction: torch.Tensor,
+        y: torch.Tensor,
+        mask: torch.Tensor | None = None,
+        class_weights: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Computes the reconstruction loss for the Autoencoder model.
-        ### NOTE: If you get the error, "'tuple' object has no attribute
-        ### 'rank', then convert y_true to a tensor object."
-        self.total_loss_tracker.update_state(total_loss)
-        self.binary_accuracy_tracker.update_state(
-            tf.keras.metrics.binary_accuracy(y_true_masked, y_pred_masked)
-        )
+        This method calculates the reconstruction loss using a masked focal loss, which is suitable for categorical data with missing values and class imbalance.
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "binary_accuracy": self.binary_accuracy_tracker.result(),
-        }
+        Args:
+            reconstruction (torch.Tensor): The reconstructed output (logits) from the model's forward pass.
+            y (torch.Tensor): The target data tensor, expected to be one-hot encoded. It is converted to class indices internally for the loss calculation.
+            mask (torch.Tensor | None): A boolean mask to exclude missing values from the loss calculation.
+            class_weights (torch.Tensor | None): Weights to apply to each class in the loss to handle imbalance.
-    @property
-    def batch_size(self):
-        """Batch (=step) size per epoch.
-        :noindex:
+        Returns:
+            torch.Tensor: The computed scalar loss value.
         """
-        return self._batch_size
+        if class_weights is None:
+            class_weights = torch.ones(self.num_classes, device=y.device)
-    @property
-    def batch_idx(self):
-        """Current batch (=step) index.
-        :noindex:
-        """
-        return self._batch_idx
+        logits_flat = reconstruction.view(-1, self.num_classes)
+        targets_flat = torch.argmax(y, dim=-1).view(-1)
-    @property
-    def y(self):
-        """Full input dataset.
-        :noindex:
-        """
-        return self._y
+        if mask is None:
+            mask_flat = torch.ones_like(targets_flat, dtype=torch.bool)
+        else:
+            mask_flat = mask.view(-1)
-    @property
-    def missing_mask(self):
-        """Missing mask of shape (y.shape[0], y.shape[1])
-        :noindex:
-        """
-        return self._missing_mask
+        criterion = MaskedFocalLoss(alpha=class_weights, gamma=self.gamma)
-    @property
-    def sample_weight(self):
-        """Sample weights of shape (y.shape[0], y.shape[1])
-        :noindex:
-        """
-        return self._sample_weight
+        reconstruction_loss = criterion(
+            logits_flat.to(self.device),
+            targets_flat.to(self.device),
+            valid_mask=mask_flat.to(self.device),
+        )
-    @batch_size.setter
-    def batch_size(self, value):
-        """Set batch_size parameter.
-        :noindex:
-        """
-        self._batch_size = int(value)
+        return reconstruction_loss
-    @batch_idx.setter
-    def batch_idx(self, value):
-        """Set current batch (=step) index.
-        :noindex:
-        """
-        self._batch_idx = int(value)
+    def _resolve_activation(
+        self, activation: Literal["relu", "elu", "leaky_relu", "selu"]
+    ) -> torch.nn.Module:
+        """Resolves an activation function module from a string name.
-    @y.setter
-    def y(self, value):
-        """Set y after each epoch.
-        :noindex:
-        """
-        self._y = value
+        Args:
+            activation (Literal["relu", "elu", "leaky_relu", "selu"]): The name of the activation function.
-    @missing_mask.setter
-    def missing_mask(self, value):
-        """Set missing_mask after each epoch.
-        :noindex:
-        """
-        self._missing_mask = value
+        Returns:
+            torch.nn.Module: The corresponding instantiated PyTorch activation function module.
-    @sample_weight.setter
-    def sample_weight(self, value):
-        """Set sample_weight after each epoch.
-        :noindex:
+        Raises:
+            ValueError: If the provided activation name is not supported.
         """
-        self._sample_weight = value
+        activation = activation.lower()
+        if activation == "relu":
+            return nn.ReLU()
+        elif activation == "elu":
+            return nn.ELU()
+        elif activation in ("leaky_relu", "leakyrelu"):
+            return nn.LeakyReLU()
+        elif activation == "selu":
+            return nn.SELU()
+        else:
+            msg = f"Activation {activation} not supported."
+            self.logger.error(msg)
+            raise ValueError(msg)

pg-sui 1.0.2.1__py3-none-any.whl → 1.6.8__py3-none-any.whl

Potentially problematic release.

pg-sui 1.0.2.1py3-none-any.whl → 1.6.8py3-none-any.whl