PyPI - pg-sui - Versions diffs - 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl - Mend

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +909 -0
pgsui/data_processing/__init__.py +0 -0
pgsui/data_processing/config.py +565 -0
pgsui/data_processing/containers.py +1424 -0
pgsui/data_processing/transformers.py +557 -907
pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/extra-resources/.gitkeep +1 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +227 -0
pgsui/electron/app/package-lock.json +6894 -0
pgsui/electron/app/package.json +51 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +157 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +131 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +57 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/__init__.py +0 -0
pgsui/example_data/phylip_files/__init__.py +0 -0
pgsui/example_data/phylip_files/test.phy +0 -0
pgsui/example_data/popmaps/__init__.py +0 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/structure_files/__init__.py +0 -0
pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/__init__.py +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
pgsui/impute/deterministic/imputers/mode.py +844 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +973 -0
pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
pgsui/impute/supervised/__init__.py +0 -0
pgsui/impute/supervised/base.py +343 -0
pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
pgsui/impute/supervised/imputers/random_forest.py +291 -0
pgsui/impute/unsupervised/__init__.py +0 -0
pgsui/impute/unsupervised/base.py +1118 -0
pgsui/impute/unsupervised/callbacks.py +92 -262
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
pgsui/impute/unsupervised/imputers/vae.py +1228 -0
pgsui/impute/unsupervised/loss_functions.py +261 -0
pgsui/impute/unsupervised/models/__init__.py +0 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
pgsui/impute/unsupervised/models/vae_model.py +269 -630
pgsui/impute/unsupervised/nn_scorers.py +255 -0
pgsui/utils/__init__.py +0 -0
pgsui/utils/classification_viz.py +608 -0
pgsui/utils/logging_utils.py +22 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +996 -829
pgsui/utils/pretty_metrics.py +290 -0
pgsui/utils/scorers.py +213 -666
pg_sui-0.2.0.dist-info/RECORD +0 -75
pg_sui-0.2.0.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -1268
pgsui/impute/impute.py +0 -1463
pgsui/impute/simple_imputers.py +0 -1431
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
pgsui/impute/unsupervised/keras_classifiers.py +0 -697
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -151
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -185

pgsui/impute/unsupervised/models/autoencoder_model.py CHANGED Viewed

@@ -1,645 +1,293 @@
-import logging
-import os
-import sys
-import warnings
+from typing import List, Literal
-# Import tensorflow with reduced warnings.
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-logging.getLogger("tensorflow").disabled = True
-warnings.filterwarnings("ignore", category=UserWarning)
+import numpy as np
+import torch
+import torch.nn as nn
+from snpio.utils.logging import LoggerManager
-import tensorflow as tf
+from pgsui.impute.unsupervised.loss_functions import MaskedFocalLoss
+from pgsui.utils.logging_utils import configure_logger
-# Disable can't find cuda .dll errors. Also turns of GPU support.
-tf.config.set_visible_devices([], "GPU")
-from tensorflow.python.util import deprecation
+class Encoder(nn.Module):
+    """The Encoder module of a standard Autoencoder.
-# Disable warnings and info logs.
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-tf.get_logger().setLevel(logging.ERROR)
-# Monkey patching deprecation utils to supress warnings.
-# noinspection PyUnusedLocal
-def deprecated(
-    date, instructions, warn_once=True
-):  # pylint: disable=unused-argument
-    def deprecated_wrapper(func):
-        return func
-    return deprecated_wrapper
-deprecation.deprecated = deprecated
-from tensorflow.keras.layers import (
-    Dropout,
-    Dense,
-    Reshape,
-    Flatten,
-    LeakyReLU,
-    PReLU,
-)
-from tensorflow.keras.regularizers import l1_l2
-from tensorflow.keras import backend as K
-# Custom Modules
-try:
-    from ..neural_network_methods import NeuralNetworkMethods
-except (ModuleNotFoundError, ValueError, ImportError):
-    from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
-class Encoder(tf.keras.layers.Layer):
-    """VAE encoder to Encode genotypes to (z_mean, z_log_var, z)."""
+    This module defines the encoder network, which takes high-dimensional input data and maps it to a deterministic, low-dimensional latent representation. The architecture consists of a series of fully-connected hidden layers that progressively compress the flattened input data into a single latent vector, `z`.
+    """
     def __init__(
         self,
-        n_features,
-        num_classes,
-        latent_dim,
-        hidden_layer_sizes,
-        dropout_rate,
-        activation,
-        kernel_initializer,
-        kernel_regularizer,
-        beta=K.variable(0.0),
-        name="Encoder",
-        **kwargs,
+        n_features: int,
+        num_classes: int,
+        latent_dim: int,
+        hidden_layer_sizes: List[int],
+        dropout_rate: float,
+        activation: torch.nn.Module,
     ):
-        super(Encoder, self).__init__(name=name, **kwargs)
+        """Initializes the Encoder module.
-        self.beta = beta
+        This class defines the encoder network, which takes high-dimensional input data and maps it to a deterministic, low-dimensional latent representation. The architecture consists of a series of fully-connected hidden layers that progressively compress the flattened input data into a single latent vector, `z`.
-        self.dense2 = None
-        self.dense3 = None
-        self.dense4 = None
-        self.dense5 = None
+        Args:
+            n_features (int): The number of features in the input data (e.g., SNPs).
+            num_classes (int): Number of genotype states per locus (2 for haploid, 3 for diploid in practice).
+            latent_dim (int): The dimensionality of the output latent space.
+            hidden_layer_sizes (List[int]): A list of integers specifying the size of each hidden layer.
+            dropout_rate (float): The dropout rate for regularization in the hidden layers.
+            activation (torch.nn.Module): An instantiated activation function module (e.g., `nn.ReLU()`) for the hidden layers.
+        """
+        super(Encoder, self).__init__()
+        self.flatten = nn.Flatten()
-        # n_features * num_classes.
-        self.flatten = Flatten()
+        layers = []
+        input_dim = n_features * num_classes
+        for hidden_size in hidden_layer_sizes:
+            layers.append(nn.Linear(input_dim, hidden_size))
+            layers.append(nn.BatchNorm1d(hidden_size))
+            layers.append(nn.Dropout(dropout_rate))
+            layers.append(activation)
+            input_dim = hidden_size
-        self.dense1 = Dense(
-            hidden_layer_sizes[0],
-            input_shape=(n_features * num_classes,),
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Encoder1",
-        )
+        self.hidden_layers = nn.Sequential(*layers)
+        self.dense_z = nn.Linear(input_dim, latent_dim)
-        if len(hidden_layer_sizes) >= 2:
-            self.dense2 = Dense(
-                hidden_layer_sizes[1],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder2",
-            )
-        if len(hidden_layer_sizes) >= 3:
-            self.dense3 = Dense(
-                hidden_layer_sizes[2],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder3",
-            )
-        if len(hidden_layer_sizes) >= 4:
-            self.dense4 = Dense(
-                hidden_layer_sizes[3],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder4",
-            )
-        if len(hidden_layer_sizes) == 5:
-            self.dense5 = Dense(
-                hidden_layer_sizes[4],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Encoder5",
-            )
-        self.dense_latent = Dense(
-            latent_dim,
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Encoder5",
-        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the encoder.
-        self.dropout_layer = Dropout(dropout_rate)
+        Args:
+            x (torch.Tensor): The input data tensor of shape `(batch_size, n_features, num_classes)`.
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.flatten(inputs)
-        x = self.dense1(x)
-        x = self.dropout_layer(x, training=training)
-        if self.dense2 is not None:
-            x = self.dense2(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense3 is not None:
-            x = self.dense3(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense4 is not None:
-            x = self.dense4(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense5 is not None:
-            x = self.dense5(x)
-            x = self.dropout_layer(x, training=training)
+        Returns:
+            torch.Tensor: The latent representation `z` of shape `(batch_size, latent_dim)`.
+        """
+        x = self.flatten(x)
+        x = self.hidden_layers(x)
+        z = self.dense_z(x)
+        return z
-        return self.dense_latent(x)
+class Decoder(nn.Module):
+    """The Decoder module of a standard Autoencoder.
-class Decoder(tf.keras.layers.Layer):
-    """Converts the encoded vector back into the reconstructed output"""
+    This module defines the decoder network, which takes a deterministic latent vector and maps it back to the high-dimensional data space, aiming to reconstruct the original input. The architecture typically mirrors the encoder, consisting of a series of fully-connected hidden layers that progressively expand the representation, followed by a final linear layer to produce the reconstructed data.
+    """
     def __init__(
         self,
-        n_features,
-        num_classes,
-        latent_dim,
-        hidden_layer_sizes,
-        dropout_rate,
-        activation,
-        kernel_initializer,
-        kernel_regularizer,
-        name="Decoder",
-        **kwargs,
-    ):
-        super(Decoder, self).__init__(name=name, **kwargs)
-        self.dense2 = None
-        self.dense3 = None
-        self.dense4 = None
-        self.dense5 = None
-        self.dense1 = Dense(
-            hidden_layer_sizes[0],
-            input_shape=(latent_dim,),
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            name="Decoder1",
-        )
-        if len(hidden_layer_sizes) >= 2:
-            self.dense2 = Dense(
-                hidden_layer_sizes[1],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder2",
-            )
-        if len(hidden_layer_sizes) >= 3:
-            self.dense3 = Dense(
-                hidden_layer_sizes[2],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder3",
-            )
-        if len(hidden_layer_sizes) >= 4:
-            self.dense4 = Dense(
-                hidden_layer_sizes[3],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder4",
-            )
-        if len(hidden_layer_sizes) == 5:
-            self.dense5 = Dense(
-                hidden_layer_sizes[4],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-                name="Decoder5",
-            )
-        # No activation for final layer.
-        self.dense_output = Dense(
-            n_features * num_classes,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
-            activation=None,
-            name="Decoder6",
-        )
-        self.rshp = Reshape((n_features, num_classes))
-        self.dropout_layer = Dropout(dropout_rate)
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.dense1(inputs)
-        x = self.dropout_layer(x, training=training)
-        if self.dense2 is not None:
-            x = self.dense2(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense3 is not None:
-            x = self.dense3(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense4 is not None:
-            x = self.dense4(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense5 is not None:
-            x = self.dense5(x)
-            x = self.dropout_layer(x, training=training)
-        x = self.dense_output(x)
-        return self.rshp(x)
-class AutoEncoderModel(tf.keras.Model):
-    """Standard AutoEncoder model to impute missing data.
+        n_features: int,
+        num_classes: int,
+        latent_dim: int,
+        hidden_layer_sizes: List[int],
+        dropout_rate: float,
+        activation: torch.nn.Module,
+    ) -> None:
+        """Initializes the Decoder module.
-    Args:
-        y (np.ndarray): Full input data.
-        batch_size (int, optional): Batch size to use with model. Defaults to 32.
-        output_shape (int, optional): Number of features in output. Defaults to None.
+        Args:
+            n_features (int): The number of features in the output data (e.g., SNPs).
+            num_classes (int): Number of genotype states per locus (2 or 3 in practice).
+            latent_dim (int): The dimensionality of the input latent space.
+            hidden_layer_sizes (List[int]): A list of integers specifying the size of each hidden layer (typically the reverse of the encoder's).
+            dropout_rate (float): The dropout rate for regularization in the hidden layers.
+            activation (torch.nn.Module): An instantiated activation function module (e.g., `nn.ReLU()`) for the hidden layers.
+        """
+        super(Decoder, self).__init__()
-        n_components (int, optional): Number of principal components to encode. Defaults to 3.
+        layers = []
+        input_dim = latent_dim
+        for hidden_size in hidden_layer_sizes:
+            layers.append(nn.Linear(input_dim, hidden_size))
+            layers.append(nn.BatchNorm1d(hidden_size))
+            layers.append(nn.Dropout(dropout_rate))
+            layers.append(activation)
+            input_dim = hidden_size
-        weights_initializer (str, optional): tf.keras function to use with initial weights. Defaults to 'glorot_normal'.
+        self.hidden_layers = nn.Sequential(*layers)
+        output_dim = n_features * num_classes
+        self.dense_output = nn.Linear(input_dim, output_dim)
+        self.reshape = (n_features, num_classes)
-        hidden_layer_sizes (str, List[int], or int, optional): Number of nodes to use in hidden layers. If List[int] is provided, must be equal in length to the number of hidden layers. If a string is provided, a calculation will be performed to automatically estimate the hidden layer sizes, with possible options including {'midpoint' or 'sqrt'}. If an integer is provided, then the provided integer will be used for all hidden layers. Defaults to 'midpoint'.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the decoder.
-        num_hidden_layers (int, optional): Number of hidden layers to use in model construction. Maximum number of layers is 5. Defaults to 1.
+        Args:
+            x (torch.Tensor): The input latent tensor of shape `(batch_size, latent_dim)`.
-        hidden_activation (str, optional): Hidden activation function to use in hidden layers. Possible options include: {"elu", "relu", "selu", "leaky_relu", and "prelu"}. Defaults to "elu".
+        Returns:
+            torch.Tensor: The reconstructed output data of shape `(batch_size, n_features, num_classes)`.
+        """
+        x = self.hidden_layers(x)
+        x = self.dense_output(x)
+        return x.view(-1, *self.reshape)
-        l1_penalty (float, optional): l1_penalty to use for regularization. Defaults to 1e-6.
-        l2_penalty (float, optional): l2_penalty to use fo regularization. Defaults to 1e-6.
+class AutoencoderModel(nn.Module):
+    """A standard Autoencoder (AE) model for imputation.
-        dropout_rate (float, optional): Dropout rate to use for Dropout() layer. Defaults to 0.2.
+    This class combines an `Encoder` and a `Decoder` to form a standard autoencoder. The model is trained to learn a compressed, low-dimensional representation of the input data and then reconstruct it as accurately as possible. It is particularly useful for unsupervised dimensionality reduction and data imputation.
-        sample_weight (numpy.ndarray, optional): Sample weight matrix for weighting class imbalance. Should be of shape (n_samples, n_features). Defaults to None.
+    **Model Architecture and Objective:**
-        num_classes (int, optional): Number of classes in multiclass predictions. Defaults to 3.
+    The autoencoder consists of two parts: an encoder, $f_{\theta}$, and a decoder, $g_{\phi}$.
+        1.  The **encoder** maps the input data $x$ to a latent representation $z$:
+            $$
+            z = f_{\theta}(x)
+            $$
+        2.  The **decoder** reconstructs the data $\hat{x}$ from the latent representation:
+            $$
+            \hat{x} = g_{\phi}(z)
+            $$
-    Raises:
-        ValueError: Maximum number of hidden layers (5) was exceeded.
+    The model is trained by minimizing a reconstruction loss, $L(x, \hat{x})$, which measures the dissimilarity between the original input and the reconstructed output. This implementation uses a `MaskedFocalLoss` to handle missing values and class imbalance effectively.
     """
     def __init__(
         self,
-        y,
-        batch_size=32,
-        output_shape=None,
-        n_components=3,
-        weights_initializer="glorot_normal",
-        hidden_layer_sizes="midpoint",
-        num_hidden_layers=1,
-        hidden_activation="elu",
-        l1_penalty=1e-6,
-        l2_penalty=1e-6,
-        dropout_rate=0.2,
-        sample_weight=None,
-        missing_mask=None,
-        num_classes=3,
+        n_features: int,
+        prefix: str,
+        *,
+        num_classes: int = 4,
+        hidden_layer_sizes: List[int] | np.ndarray = [128, 64],
+        latent_dim: int = 2,
+        dropout_rate: float = 0.2,
+        activation: Literal["relu", "elu", "selu", "leaky_relu"] = "relu",
+        gamma: float = 2.0,
+        device: Literal["cpu", "gpu", "mps"] = "cpu",
+        verbose: bool = False,
+        debug: bool = False,
     ):
-        super(AutoEncoderModel, self).__init__()
-        self.nn_ = NeuralNetworkMethods()
-        self.categorical_accuracy = self.nn_.make_masked_categorical_accuracy()
+        """Initializes the AutoencoderModel.
-        self.total_loss_tracker = tf.keras.metrics.Mean(name="loss")
-        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
-            name="reconstruction_loss"
-        )
-        self.accuracy_tracker = tf.keras.metrics.Mean(name="accuracy")
-        self._y = y
-        self._batch_idx = 0
-        self._batch_size = batch_size
-        self._sample_weight = sample_weight
-        self._missing_mask = missing_mask
-        # y_train[1] dimension.
-        self.n_features = output_shape
-        n_features = self.n_features
-        self.n_components = n_components
-        self.weights_initializer = weights_initializer
-        self.hidden_layer_sizes = hidden_layer_sizes
-        self.num_hidden_layers = num_hidden_layers
-        self.hidden_activation = hidden_activation
-        self.l1_penalty = l1_penalty
-        self.l2_penalty = l2_penalty
-        self.dropout_rate = dropout_rate
-        self.sample_weight = sample_weight
+        Args:
+            n_features (int): The number of features in the input data (e.g., SNPs).
+            prefix (str): A prefix used for logging.
+            num_classes (int): Number of genotype states per locus. Defaults to 4 for backward compatibility, but the genotype imputers pass 2 (haploid) or 3 (diploid).
+            hidden_layer_sizes (List[int] | np.ndarray): A list of integers specifying the size of each hidden layer in the encoder. The decoder will use the reverse of this structure. Defaults to [128, 64].
+            latent_dim (int): The dimensionality of the latent space (bottleneck). Defaults to 2.
+            dropout_rate (float): The dropout rate for regularization in hidden layers. Defaults to 0.2.
+            activation (Literal["relu", "elu", "selu", "leaky_relu"]): The name of the activation function for hidden layers. Defaults to "relu".
+            gamma (float): The focusing parameter for the focal loss function. Defaults to 2.0.
+            device (Literal["cpu", "gpu", "mps"]): The device to run the model on.
+            verbose (bool): If True, enables detailed logging.
+            debug (bool): If True, enables debug mode.
+        """
+        super(AutoencoderModel, self).__init__()
         self.num_classes = num_classes
+        self.gamma = gamma
+        self.device = device
-        nn = NeuralNetworkMethods()
-        hidden_layer_sizes = nn.validate_hidden_layers(
-            self.hidden_layer_sizes, self.num_hidden_layers
+        logman = LoggerManager(
+            name=__name__, prefix=prefix, verbose=verbose, debug=debug
         )
-        hidden_layer_sizes = nn.get_hidden_layer_sizes(
-            n_features, self.n_components, hidden_layer_sizes, vae=True
+        self.logger = configure_logger(
+            logman.get_logger(), verbose=verbose, debug=debug
         )
-        hidden_layer_sizes = [h * self.num_classes for h in hidden_layer_sizes]
-        if self.l1_penalty == 0.0 and self.l2_penalty == 0.0:
-            kernel_regularizer = None
-        else:
-            kernel_regularizer = l1_l2(self.l1_penalty, self.l2_penalty)
-        kernel_initializer = self.weights_initializer
-        if self.hidden_activation.lower() == "leaky_relu":
-            activation = LeakyReLU(alpha=0.01)
-        elif self.hidden_activation.lower() == "prelu":
-            activation = PReLU()
-        elif self.hidden_activation.lower() == "selu":
-            activation = "selu"
-            kernel_initializer = "lecun_normal"
+        activation_module = self._resolve_activation(activation)
+        if isinstance(hidden_layer_sizes, np.ndarray):
+            hls = hidden_layer_sizes.tolist()
         else:
-            activation = self.hidden_activation
-        if num_hidden_layers > 5:
-            raise ValueError(
-                f"The maximum number of hidden layers is 5, but got "
-                f"{num_hidden_layers}"
-            )
+            hls = hidden_layer_sizes
         self.encoder = Encoder(
             n_features,
             self.num_classes,
-            self.n_components,
-            hidden_layer_sizes,
-            self.dropout_rate,
-            activation,
-            kernel_initializer,
-            kernel_regularizer,
+            latent_dim,
+            hls,
+            dropout_rate,
+            activation_module,
         )
-        hidden_layer_sizes.reverse()
+        decoder_layer_sizes = list(reversed(hls))
         self.decoder = Decoder(
             n_features,
             self.num_classes,
-            self.n_components,
-            hidden_layer_sizes,
-            self.dropout_rate,
-            activation,
-            kernel_initializer,
-            kernel_regularizer,
-        )
-    def call(self, inputs, training=None):
-        """Forward pass through model."""
-        x = self.encoder(inputs)
-        return self.decoder(x)
-    def model(self):
-        """To allow model.summary().summar() to be called."""
-        x = tf.keras.Input(shape=(self.n_features, self.num_classes))
-        return tf.keras.Model(inputs=[x], outputs=self.call(x))
-    def set_model_outputs(self):
-        """Set expected model outputs."""
-        x = tf.keras.Input(shape=(self.n_features, self.num_classes))
-        model = tf.keras.Model(inputs=[x], outputs=self.call(x))
-        self.outputs = model.outputs
-    @property
-    def metrics(self):
-        return [
-            self.total_loss_tracker,
-            self.reconstruction_loss_tracker,
-            self.accuracy_tracker,
-        ]
-    @tf.function
-    def train_step(self, data):
-        y = self._y
-        (
-            y_true,
-            sample_weight,
-            missing_mask,
-        ) = self.nn_.prepare_training_batches(
-            y,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-            ubp=False,
-        )
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
-        with tf.GradientTape() as tape:
-            reconstruction = self(y_true, training=True)
-            y_pred_masked = tf.boolean_mask(
-                reconstruction, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-            )
-            # Returns binary crossentropy loss.
-            reconstruction_loss = self.compiled_loss(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
-            regularization_loss = sum(self.losses)
-            total_loss = reconstruction_loss + regularization_loss
-        grads = tape.gradient(total_loss, self.trainable_weights)
-        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
-        self.total_loss_tracker.update_state(total_loss)
-        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
-        ### NOTE: If you get the error, "'tuple' object has no attribute
-        ### 'rank', then convert y_true to a tensor object."
-        # self.compiled_metrics.update_state(
-        self.accuracy_tracker.update_state(
-            self.categorical_accuracy(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
+            latent_dim,
+            decoder_layer_sizes,
+            dropout_rate,
+            activation_module,
         )
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
-            "accuracy": self.accuracy_tracker.result(),
-        }
-    @tf.function
-    def test_step(self, data):
-        """Custom evaluation loop for one step (=batch) in a single epoch.
-        This function will evaluate on a batch of samples (rows), which can be adjusted with the ``batch_size`` parameter from the estimator.
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass through the full Autoencoder model.
         Args:
-            data (Tuple[tf.EagerTensor, tf.EagerTensor]): Input tensorflow tensors of shape (batch_size, n_components) and (batch_size, n_features, num_classes).
+            x (torch.Tensor): The input data tensor of shape `(batch_size, n_features, num_classes)`.
         Returns:
-            Dict[str, float]: History object that gets returned from fit(). Contains the loss and any metrics specified in compile().
+            torch.Tensor: The reconstructed data tensor.
         """
-        y = self._y
-        (
-            y_true,
-            sample_weight,
-            missing_mask,
-        ) = self.nn_.prepare_training_batches(
-            y,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-            ubp=False,
-        )
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
-        reconstruction = self(y_true, training=False)
+        z = self.encoder(x)
+        reconstruction = self.decoder(z)
+        return reconstruction
-        y_pred_masked = tf.boolean_mask(
-            reconstruction, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-        )
-        reconstruction_loss = self.compiled_loss(
-            y_true_masked,
-            y_pred_masked,
-            sample_weight=sample_weight_masked,
-        )
-        regularization_loss = sum(self.losses)
-        total_loss = reconstruction_loss + regularization_loss
-        self.accuracy_tracker.update_state(
-            self.categorical_accuracy(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-            )
-        )
+    def compute_loss(
+        self,
+        reconstruction: torch.Tensor,
+        y: torch.Tensor,
+        mask: torch.Tensor | None = None,
+        class_weights: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Computes the reconstruction loss for the Autoencoder model.
-        self.total_loss_tracker.update_state(total_loss)
-        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
+        This method calculates the reconstruction loss using a masked focal loss, which is suitable for categorical data with missing values and class imbalance.
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
-            "accuracy": self.accuracy_tracker.result(),
-        }
+        Args:
+            reconstruction (torch.Tensor): The reconstructed output (logits) from the model's forward pass.
+            y (torch.Tensor): The target data tensor, expected to be one-hot encoded. It is converted to class indices internally for the loss calculation.
+            mask (torch.Tensor | None): A boolean mask to exclude missing values from the loss calculation.
+            class_weights (torch.Tensor | None): Weights to apply to each class in the loss to handle imbalance.
-    @property
-    def batch_size(self):
-        """Batch (=step) size per epoch.
-        :noindex:
+        Returns:
+            torch.Tensor: The computed scalar loss value.
         """
-        return self._batch_size
+        if class_weights is None:
+            class_weights = torch.ones(self.num_classes, device=y.device)
-    @property
-    def batch_idx(self):
-        """Current batch (=step) index.
-        :noindex:
-        """
-        return self._batch_idx
+        logits_flat = reconstruction.view(-1, self.num_classes)
+        targets_flat = torch.argmax(y, dim=-1).view(-1)
-    @property
-    def y(self):
-        """Full input dataset.
-        :noindex:
-        """
-        return self._y
+        if mask is None:
+            mask_flat = torch.ones_like(targets_flat, dtype=torch.bool)
+        else:
+            mask_flat = mask.view(-1)
-    @property
-    def missing_mask(self):
-        """Missing mask of shape (y.shape[0], y.shape[1])
-        :noindex:
-        """
-        return self._missing_mask
+        criterion = MaskedFocalLoss(alpha=class_weights, gamma=self.gamma)
-    @property
-    def sample_weight(self):
-        """Sample weights of shape (y.shape[0], y.shape[1])
-        :noindex:
-        """
-        return self._sample_weight
+        reconstruction_loss = criterion(
+            logits_flat.to(self.device),
+            targets_flat.to(self.device),
+            valid_mask=mask_flat.to(self.device),
+        )
-    @batch_size.setter
-    def batch_size(self, value):
-        """Set batch_size parameter.
-        :noindex:
-        """
-        self._batch_size = int(value)
+        return reconstruction_loss
-    @batch_idx.setter
-    def batch_idx(self, value):
-        """Set current batch (=step) index.
-        :noindex:
-        """
-        self._batch_idx = int(value)
+    def _resolve_activation(
+        self, activation: Literal["relu", "elu", "leaky_relu", "selu"]
+    ) -> torch.nn.Module:
+        """Resolves an activation function module from a string name.
-    @y.setter
-    def y(self, value):
-        """Set y after each epoch.
-        :noindex:
-        """
-        self._y = value
+        Args:
+            activation (Literal["relu", "elu", "leaky_relu", "selu"]): The name of the activation function.
-    @missing_mask.setter
-    def missing_mask(self, value):
-        """Set missing_mask after each epoch.
-        :noindex:
-        """
-        self._missing_mask = value
+        Returns:
+            torch.nn.Module: The corresponding instantiated PyTorch activation function module.
-    @sample_weight.setter
-    def sample_weight(self, value):
-        """Set sample_weight after each epoch.
-        :noindex:
+        Raises:
+            ValueError: If the provided activation name is not supported.
         """
-        self._sample_weight = value
+        act: str = activation.lower()
+        if act == "relu":
+            return nn.ReLU()
+        elif act == "elu":
+            return nn.ELU()
+        elif act in ("leaky_relu", "leakyrelu"):
+            return nn.LeakyReLU()
+        elif act == "selu":
+            return nn.SELU()
+        else:
+            msg = f"Activation {activation} not supported."
+            self.logger.error(msg)
+            raise ValueError(msg)

pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl