PyPI - pg-sui - Versions diffs - 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl - Mend

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/METADATA +101 -79
pg_sui-1.6.14.dev9.dist-info/RECORD +81 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info}/WHEEL +1 -1
pg_sui-1.6.14.dev9.dist-info/entry_points.txt +4 -0
{pg_sui-0.2.0.dist-info → pg_sui-1.6.14.dev9.dist-info/licenses}/LICENSE +0 -0
pg_sui-1.6.14.dev9.dist-info/top_level.txt +1 -0
pgsui/__init__.py +35 -54
pgsui/_version.py +34 -0
pgsui/cli.py +909 -0
pgsui/data_processing/__init__.py +0 -0
pgsui/data_processing/config.py +565 -0
pgsui/data_processing/containers.py +1424 -0
pgsui/data_processing/transformers.py +557 -907
pgsui/{example_data/trees → electron/app}/__init__.py +0 -0
pgsui/electron/app/__main__.py +5 -0
pgsui/electron/app/extra-resources/.gitkeep +1 -0
pgsui/electron/app/icons/icons/1024x1024.png +0 -0
pgsui/electron/app/icons/icons/128x128.png +0 -0
pgsui/electron/app/icons/icons/16x16.png +0 -0
pgsui/electron/app/icons/icons/24x24.png +0 -0
pgsui/electron/app/icons/icons/256x256.png +0 -0
pgsui/electron/app/icons/icons/32x32.png +0 -0
pgsui/electron/app/icons/icons/48x48.png +0 -0
pgsui/electron/app/icons/icons/512x512.png +0 -0
pgsui/electron/app/icons/icons/64x64.png +0 -0
pgsui/electron/app/icons/icons/icon.icns +0 -0
pgsui/electron/app/icons/icons/icon.ico +0 -0
pgsui/electron/app/main.js +227 -0
pgsui/electron/app/package-lock.json +6894 -0
pgsui/electron/app/package.json +51 -0
pgsui/electron/app/preload.js +15 -0
pgsui/electron/app/server.py +157 -0
pgsui/electron/app/ui/logo.png +0 -0
pgsui/electron/app/ui/renderer.js +131 -0
pgsui/electron/app/ui/styles.css +59 -0
pgsui/electron/app/ui/ui_shim.js +72 -0
pgsui/electron/bootstrap.py +43 -0
pgsui/electron/launch.py +57 -0
pgsui/electron/package.json +14 -0
pgsui/example_data/__init__.py +0 -0
pgsui/example_data/phylip_files/__init__.py +0 -0
pgsui/example_data/phylip_files/test.phy +0 -0
pgsui/example_data/popmaps/__init__.py +0 -0
pgsui/example_data/popmaps/{test.popmap → phylogen_nomx.popmap} +185 -99
pgsui/example_data/structure_files/__init__.py +0 -0
pgsui/example_data/structure_files/test.pops.2row.allsites.str +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz +0 -0
pgsui/example_data/vcf_files/phylogen_subset14K.vcf.gz.tbi +0 -0
pgsui/impute/__init__.py +0 -0
pgsui/impute/deterministic/imputers/allele_freq.py +725 -0
pgsui/impute/deterministic/imputers/mode.py +844 -0
pgsui/impute/deterministic/imputers/nmf.py +221 -0
pgsui/impute/deterministic/imputers/phylo.py +973 -0
pgsui/impute/deterministic/imputers/ref_allele.py +669 -0
pgsui/impute/supervised/__init__.py +0 -0
pgsui/impute/supervised/base.py +343 -0
pgsui/impute/{unsupervised/models/in_development → supervised/imputers}/__init__.py +0 -0
pgsui/impute/supervised/imputers/hist_gradient_boosting.py +317 -0
pgsui/impute/supervised/imputers/random_forest.py +291 -0
pgsui/impute/unsupervised/__init__.py +0 -0
pgsui/impute/unsupervised/base.py +1118 -0
pgsui/impute/unsupervised/callbacks.py +92 -262
{simulation → pgsui/impute/unsupervised/imputers}/__init__.py +0 -0
pgsui/impute/unsupervised/imputers/autoencoder.py +1285 -0
pgsui/impute/unsupervised/imputers/nlpca.py +1554 -0
pgsui/impute/unsupervised/imputers/ubp.py +1575 -0
pgsui/impute/unsupervised/imputers/vae.py +1228 -0
pgsui/impute/unsupervised/loss_functions.py +261 -0
pgsui/impute/unsupervised/models/__init__.py +0 -0
pgsui/impute/unsupervised/models/autoencoder_model.py +215 -567
pgsui/impute/unsupervised/models/nlpca_model.py +155 -394
pgsui/impute/unsupervised/models/ubp_model.py +180 -1106
pgsui/impute/unsupervised/models/vae_model.py +269 -630
pgsui/impute/unsupervised/nn_scorers.py +255 -0
pgsui/utils/__init__.py +0 -0
pgsui/utils/classification_viz.py +608 -0
pgsui/utils/logging_utils.py +22 -0
pgsui/utils/misc.py +35 -480
pgsui/utils/plotting.py +996 -829
pgsui/utils/pretty_metrics.py +290 -0
pgsui/utils/scorers.py +213 -666
pg_sui-0.2.0.dist-info/RECORD +0 -75
pg_sui-0.2.0.dist-info/top_level.txt +0 -3
pgsui/example_data/phylip_files/test_n10.phy +0 -118
pgsui/example_data/phylip_files/test_n100.phy +0 -118
pgsui/example_data/phylip_files/test_n2.phy +0 -118
pgsui/example_data/phylip_files/test_n500.phy +0 -118
pgsui/example_data/structure_files/test.nopops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.nopops.2row.100sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.10sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.30sites.str +0 -234
pgsui/example_data/structure_files/test.nopops.2row.allsites.str +0 -234
pgsui/example_data/structure_files/test.pops.1row.10sites.str +0 -117
pgsui/example_data/structure_files/test.pops.2row.10sites.str +0 -234
pgsui/example_data/trees/test.iqtree +0 -376
pgsui/example_data/trees/test.qmat +0 -5
pgsui/example_data/trees/test.rate +0 -2033
pgsui/example_data/trees/test.tre +0 -1
pgsui/example_data/trees/test_n10.rate +0 -19
pgsui/example_data/trees/test_n100.rate +0 -109
pgsui/example_data/trees/test_n500.rate +0 -509
pgsui/example_data/trees/test_siterates.txt +0 -2024
pgsui/example_data/trees/test_siterates_n10.txt +0 -10
pgsui/example_data/trees/test_siterates_n100.txt +0 -100
pgsui/example_data/trees/test_siterates_n500.txt +0 -500
pgsui/example_data/vcf_files/test.vcf +0 -244
pgsui/example_data/vcf_files/test.vcf.gz +0 -0
pgsui/example_data/vcf_files/test.vcf.gz.tbi +0 -0
pgsui/impute/estimators.py +0 -1268
pgsui/impute/impute.py +0 -1463
pgsui/impute/simple_imputers.py +0 -1431
pgsui/impute/supervised/iterative_imputer_fixedparams.py +0 -782
pgsui/impute/supervised/iterative_imputer_gridsearch.py +0 -1024
pgsui/impute/unsupervised/keras_classifiers.py +0 -697
pgsui/impute/unsupervised/models/in_development/cnn_model.py +0 -486
pgsui/impute/unsupervised/neural_network_imputers.py +0 -1440
pgsui/impute/unsupervised/neural_network_methods.py +0 -1395
pgsui/pg_sui.py +0 -261
pgsui/utils/sequence_tools.py +0 -407
simulation/sim_benchmarks.py +0 -333
simulation/sim_treeparams.py +0 -475
test/__init__.py +0 -0
test/pg_sui_simtest.py +0 -215
test/pg_sui_testing.py +0 -523
test/test.py +0 -151
test/test_pgsui.py +0 -374
test/test_tkc.py +0 -185

pgsui/impute/unsupervised/models/nlpca_model.py CHANGED Viewed

@@ -1,445 +1,206 @@
-import logging
-import os
-import sys
-import warnings
+from typing import List, Literal
-# Import tensorflow with reduced warnings.
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-logging.getLogger("tensorflow").disabled = True
-warnings.filterwarnings("ignore", category=UserWarning)
+import numpy as np
+import torch
+import torch.nn as nn
+from snpio.utils.logging import LoggerManager
-# noinspection PyPackageRequirements
-import tensorflow as tf
+from pgsui.impute.unsupervised.loss_functions import MaskedFocalLoss
+from pgsui.utils.logging_utils import configure_logger
-# Disable can't find cuda .dll errors. Also turns off GPU support.
-tf.config.set_visible_devices([], "GPU")
-from tensorflow.python.util import deprecation
+class NLPCAModel(nn.Module):
+    r"""A non-linear Principal Component Analysis (NLPCA) decoder for genotypes.
-# Disable warnings and info logs.
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-tf.get_logger().setLevel(logging.ERROR)
+    This module maps a low-dimensional latent vector to logits over genotype states
+    (two classes for haploids or three for diploids) at every locus. It is a fully
+    connected network with optional batch normalization and dropout layers and is
+    used as the decoder inside the NLPCA imputer.
+    **Model Architecture**
-# Monkey patching deprecation utils to supress warnings.
-# noinspection PyUnusedLocal
-def deprecated(
-    date, instructions, warn_once=True
-):  # pylint: disable=unused-argument
-    def deprecated_wrapper(func):
-        return func
+    Let :math:`z \in \mathbb{R}^{d_{\text{latent}}}` be the latent vector. For a
+    network with :math:`L` hidden layers, the transformations are
-    return deprecated_wrapper
+    .. math::
+        h_1 = f(W_1 z + b_1)
-deprecation.deprecated = deprecated
+    .. math::
-from tensorflow.keras.layers import (
-    Dropout,
-    Dense,
-    Reshape,
-    LeakyReLU,
-    PReLU,
-)
+        h_2 = f(W_2 h_1 + b_2)
-from tensorflow.keras.regularizers import l1_l2
+    .. math::
-# Custom Modules
-try:
-    from ..neural_network_methods import NeuralNetworkMethods
-except (ModuleNotFoundError, ValueError, ImportError):
-    from impute.unsupervised.neural_network_methods import NeuralNetworkMethods
+        \vdots
+    .. math::
-class NLPCAModel(tf.keras.Model):
-    """NLPCA model to train and use to predict imputations.
+        h_L = f(W_L h_{L-1} + b_L)
-    NLPCAModel subclasses the tf.keras.Model and overrides the train_step function, which does training and evaluation for each batch in each epoch.
+    The final layer produces logits of shape ``(batch_size, n_features, num_classes)``
+    by reshaping a linear projection back to the (loci, genotype-state) grid.
-    Args:
-        V (numpy.ndarray(float)): V should have been randomly initialized and will be used as the input data that gets refined during training. Defaults to None.
-        y (numpy.ndarray): Target values to predict. Actual input data. Defaults to None.
-        batch_size (int, optional): Batch size per epoch. Defaults to 32.
-        missing_mask (numpy.ndarray): Missing data mask for y. Defaults to None.
-        output_shape (int): Output units for n_features dimension. Output will be of shape (batch_size, n_features). Defaults to None.
-        n_components (int, optional): Number of features in input V to use. Defaults to 3.
-        weights_initializer (str, optional): Kernel initializer to use for initializing model weights. Defaults to "glorot_normal".
-        hidden_layer_sizes (List[int], optional): Output units for each hidden layer. List should be of same length as the number of hidden layers. Defaults to "midpoint".
-        num_hidden_layers (int, optional): Number of hidden layers to use. Defaults to 1.
-        hidden_activation (str, optional): Activation function to use for hidden layers. Defaults to "elu".
-        l1_penalty (float, optional): L1 regularization penalty to use to reduce overfitting. Defaults to 0.01.
-        l2_penalty (float, optional): L2 regularization penalty to use to reduce overfitting. Defaults to 0.01.
-        dropout_rate (float, optional): Dropout rate during training to reduce overfitting. Must be a float between 0 and 1. Defaults to 0.2.
-        num_classes (int, optional): Number of classes in output. Corresponds to the 3rd dimension of the output shape (batch_size, n_features, num_classes). Defaults to 1.
-        phase (NoneType): Here for compatibility with UBP.
-        sample_weight (numpy.ndarray, optional): 2D sample weights of shape (n_samples, n_features). Should have values for each class weighted. Defaults to None.
-    Example:
-        >>>model = NLPCAModel(V=V, y=y, batch_size=32, missing_mask=missing_mask, output_shape, n_components, weights_initializer, hidden_layer_sizes, num_hidden_layers, hidden_activation, l1_penalty, l2_penalty, dropout_rate, num_classes=3)
-        >>>
-        >>>model.compile(optimizer=optimizer, loss=loss_func, metrics=[my_metrics], run_eagerly=True)
-        >>>
-        >>>history = model.fit(X, y, batch_size=batch_size, epochs=epochs, callbacks=[MyCallback()], validation_split=validation_split, shuffle=False)
-    Raises:
-        TypeError: V, y, missing_mask, output_shape must not be NoneType.
-        ValueError: Maximum of 5 hidden layers.
+    **Loss Function**
+    Training minimizes ``MaskedFocalLoss``, which extends cross-entropy with class
+    weighting, focal re-weighting, and masking so that only observed genotypes
+    contribute to the objective.
     """
     def __init__(
         self,
-        V=None,
-        y=None,
-        batch_size=32,
-        missing_mask=None,
-        output_shape=None,
-        n_components=3,
-        weights_initializer="glorot_normal",
-        hidden_layer_sizes="midpoint",
-        num_hidden_layers=1,
-        hidden_activation="elu",
-        l1_penalty=0.01,
-        l2_penalty=0.01,
-        dropout_rate=0.2,
-        num_classes=3,
-        phase=None,
-        sample_weight=None,
+        n_features: int,
+        prefix: str,
+        *,
+        num_classes: int = 4,
+        hidden_layer_sizes: List[int] | np.ndarray = [128, 64],
+        latent_dim: int = 2,
+        dropout_rate: float = 0.2,
+        activation: Literal["relu", "elu", "selu", "leaky_relu"] = "relu",
+        gamma: float = 2.0,
+        device: Literal["gpu", "cpu", "mps"] = "cpu",
+        verbose: bool = False,
+        debug: bool = False,
     ):
+        """Initializes the NLPCAModel.
+        Args:
+            n_features (int): The number of features (SNPs) in the input data.
+            prefix (str): A prefix used for logging.
+            num_classes (int): Number of genotype states per locus (2 for haploid, 3 for diploid in practice). Defaults to 4 for backward compatibility.
+            hidden_layer_sizes (list[int] | np.ndarray): A list of integers specifying the number of units in each hidden layer. Defaults to [128, 64].
+            latent_dim (int): The dimensionality of the latent space (the size of the bottleneck layer). Defaults to 2.
+            dropout_rate (float): The dropout rate applied to each hidden layer for regularization. Defaults to 0.2.
+            activation (Literal["relu", "elu", "selu", "leaky_relu"]): The non-linear activation function to use in hidden layers. Defaults to 'relu'.
+            gamma (float): The focusing parameter for the focal loss function, which down-weights well-classified examples. Defaults to 2.0.
+            device (Literal["gpu", "cpu", "mps"]): The PyTorch device to run the model on. Defaults to 'cpu'.
+            verbose (bool): If True, enables detailed logging. Defaults to False.
+            debug (bool): If True, enables debug mode. Defaults to False.
+        """
         super(NLPCAModel, self).__init__()
-        nn = NeuralNetworkMethods()
-        self.nn = nn
-        if V is None:
-            self._V = nn.init_weights(y.shape[0], n_components)
-        elif isinstance(V, dict):
-            self._V = V[n_components]
-        else:
-            self._V = V
-        self._y = y
-        hidden_layer_sizes = nn.validate_hidden_layers(
-            hidden_layer_sizes, num_hidden_layers
-        )
-        hidden_layer_sizes = nn.get_hidden_layer_sizes(
-            y.shape[1], self._V.shape[1], hidden_layer_sizes
-        )
-        nn.validate_model_inputs(y, missing_mask, output_shape)
-        self._missing_mask = missing_mask
-        self.weights_initializer = weights_initializer
-        self.phase = phase
-        self.dropout_rate = dropout_rate
-        self._sample_weight = sample_weight
-        ### NOTE: I tried using just _V as the input to be refined, but it
-        # wasn't getting updated. So I copy it here and it works.
-        # V_latent is refined during train_step.
-        self.V_latent_ = self._V.copy()
-        # Initialize parameters used during train_step.
-        self._batch_idx = 0
-        self._batch_size = batch_size
-        self.n_components = n_components
-        if l1_penalty == 0.0 and l2_penalty == 0.0:
-            kernel_regularizer = None
-        else:
-            kernel_regularizer = l1_l2(l1_penalty, l2_penalty)
-        self.kernel_regularizer = kernel_regularizer
-        kernel_initializer = weights_initializer
-        if hidden_activation.lower() == "leaky_relu":
-            activation = LeakyReLU(alpha=0.01)
-        elif hidden_activation.lower() == "prelu":
-            activation = PReLU()
-        elif hidden_activation.lower() == "selu":
-            activation = "selu"
-            kernel_initializer = "lecun_normal"
-        else:
-            activation = hidden_activation
-        if num_hidden_layers > 5:
-            raise ValueError(
-                f"The maximum number of hidden layers is 5, but got "
-                f"{num_hidden_layers}"
-            )
-        self.dense2 = None
-        self.dense3 = None
-        self.dense4 = None
-        self.dense5 = None
-        # Construct multi-layer perceptron.
-        # Add hidden layers dynamically.
-        self.dense1 = Dense(
-            hidden_layer_sizes[0],
-            input_shape=(n_components,),
-            activation=activation,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
+        logman = LoggerManager(
+            name=__name__, prefix=prefix, verbose=verbose, debug=debug
         )
-        if num_hidden_layers >= 2:
-            self.dense2 = Dense(
-                hidden_layer_sizes[1],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-            )
-        if num_hidden_layers >= 3:
-            self.dense3 = Dense(
-                hidden_layer_sizes[2],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-            )
-        if num_hidden_layers >= 4:
-            self.dense4 = Dense(
-                hidden_layer_sizes[3],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-            )
-        if num_hidden_layers == 5:
-            self.dense5 = Dense(
-                hidden_layer_sizes[4],
-                activation=activation,
-                kernel_initializer=kernel_initializer,
-                kernel_regularizer=kernel_regularizer,
-            )
-        self.output1 = Dense(
-            output_shape * num_classes,
-            kernel_initializer=kernel_initializer,
-            kernel_regularizer=kernel_regularizer,
+        self.logger = configure_logger(
+            logman.get_logger(), verbose=verbose, debug=debug
         )
-        self.rshp = Reshape((output_shape, num_classes))
-        self.dropout_layer = Dropout(rate=dropout_rate)
-    def call(self, inputs, training=None):
-        x = self.dense1(inputs)
-        x = self.dropout_layer(x, training=training)
-        if self.dense2 is not None:
-            x = self.dense2(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense3 is not None:
-            x = self.dense3(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense4 is not None:
-            x = self.dense4(x)
-            x = self.dropout_layer(x, training=training)
-        if self.dense5 is not None:
-            x = self.dense5(x)
-            x = self.dropout_layer(x, training=training)
-        x = self.output1(x)
-        return self.rshp(x)
-    def model(self):
-        x = tf.keras.Input(shape=(self.n_components,))
-        return tf.keras.Model(inputs=[x], outputs=self.call(x))
-    def set_model_outputs(self):
-        x = tf.keras.Input(shape=(self.n_components,))
-        model = tf.keras.Model(inputs=[x], outputs=self.call(x))
-        self.outputs = model.outputs
-    def train_step(self, data):
-        """Train step function. Parameters are set in UBPCallbacks callback."""
-        y = self._y
-        (
-            v,
-            y_true,
-            sample_weight,
-            missing_mask,
-            batch_start,
-            batch_end,
-        ) = self.nn.prepare_training_batches(
-            self.V_latent_,
-            y,
-            self._batch_size,
-            self._batch_idx,
-            True,
-            self.n_components,
-            self._sample_weight,
-            self._missing_mask,
-        )
+        self.n_features = n_features
+        self.num_classes = num_classes
+        self.latent_dim = latent_dim
+        self.gamma = gamma
+        self.device = device
-        src = [v]
+        if isinstance(hidden_layer_sizes, np.ndarray):
+            hidden_layer_sizes = hidden_layer_sizes.tolist()
-        if sample_weight is not None:
-            sample_weight_masked = tf.convert_to_tensor(
-                sample_weight[~missing_mask], dtype=tf.float32
-            )
-        else:
-            sample_weight_masked = None
-        y_true_masked = tf.boolean_mask(
-            tf.convert_to_tensor(y_true, dtype=tf.float32),
-            tf.reduce_any(tf.not_equal(y_true, -1), axis=2),
-        )
+        layers = []
+        input_dim = latent_dim
+        for size in hidden_layer_sizes:
+            layers.append(nn.Linear(input_dim, size))
+            layers.append(nn.BatchNorm1d(size))
+            layers.append(nn.Dropout(dropout_rate))
+            layers.append(self._resolve_activation(activation))
+            input_dim = size
-        # NOTE: Earlier model architectures incorrectly
-        # applied one gradient to all the variables, including
-        # the weights and v. Here we apply them separately, per
-        # the UBP manuscript.
-        with tf.GradientTape(persistent=True) as tape:
-            # Forward pass. Watch input tensor v.
-            tape.watch(v)
-            y_pred = self(v, training=True)
-            y_pred_masked = tf.boolean_mask(
-                y_pred, tf.reduce_any(tf.not_equal(y_true, -1), axis=2)
-            )
-            ### NOTE: If you get the error, "'tuple' object has no attribute
-            ### 'rank'", then convert y_true to a tensor object."
-            loss = self.compiled_loss(
-                y_true_masked,
-                y_pred_masked,
-                sample_weight=sample_weight_masked,
-                regularization_losses=self.losses,
-            )
-        # Refine the watched variables with
-        # gradient descent backpropagation
-        gradients = tape.gradient(loss, self.trainable_variables)
-        self.optimizer.apply_gradients(
-            zip(gradients, self.trainable_variables)
-        )
+        # Final layer output size is now n_features * num_classes
+        final_output_size = self.n_features * self.num_classes
+        layers.append(nn.Linear(hidden_layer_sizes[-1], final_output_size))
-        # Apply separate gradients to v.
-        vgrad = tape.gradient(loss, src)
-        self.optimizer.apply_gradients(zip(vgrad, src))
+        self.phase23_decoder = nn.Sequential(*layers)
-        del tape
+        # Reshape tuple reflects the output structure
+        self.reshape = (self.n_features, self.num_classes)
-        ### NOTE: If you get the error, "'tuple' object has no attribute
-        ### 'rank', then convert y_true to a tensor object."
-        self.compiled_metrics.update_state(
-            y_true_masked,
-            y_pred_masked,
-            sample_weight=sample_weight_masked,
-        )
+    def _resolve_activation(
+        self, activation: Literal["relu", "elu", "selu", "leaky_relu"]
+    ) -> nn.Module:
+        """Resolves an activation function from a string name.
-        # NOTE: run_eagerly must be set to True in the compile() method for this
-        # to work. Otherwise it can't convert a Tensor object to a numpy array.
-        # There is really no other way to set v back to V_latent_ in graph
-        # mode as far as I know. eager execution is slower, so it would be nice
-        # to find a way to do this without converting to numpy.
-        self.V_latent_[batch_start:batch_end, :] = v.numpy()
+        This method acts as a factory, returning the correct PyTorch activation function module based on the provided name.
-        # history object that gets returned from fit().
-        return {m.name: m.result() for m in self.metrics}
+        Args:
+            activation (Literal["relu", "elu", "selu", "leaky_relu"]): The name of the activation function.
-    @property
-    def V_latent(self):
-        """Randomly initialized input that gets refined during training.
-        :noindex:
-        """
-        return self.V_latent_
+        Returns:
+            nn.Module: The corresponding PyTorch activation function module.
-    @property
-    def batch_size(self):
-        """Batch (=step) size per epoch.
-        :noindex:
+        Raises:
+            ValueError: If the provided activation name is not supported.
         """
-        return self._batch_size
+        act: str = activation.lower()
+        if act == "relu":
+            return nn.ReLU()
+        elif act == "elu":
+            return nn.ELU()
+        elif act == "leaky_relu":
+            return nn.LeakyReLU()
+        elif act == "selu":
+            return nn.SELU()
+        else:
+            msg = f"Activation function {act} not supported."
+            self.logger.error(msg)
+            raise ValueError(msg)
-    @property
-    def batch_idx(self):
-        """Current batch (=step) index.
-        :noindex:
-        """
-        return self._batch_idx
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Performs the forward pass of the model.
-    @property
-    def y(self):
-        """Full dataset y.
-        :noindex:
-        """
-        return self._y
+        The input tensor is passed through the decoder network to produce logits,
+        which are reshaped to align with the locus-by-class grid used by the loss.
-    @property
-    def missing_mask(self):
-        """Missing mask of shape (y.shape[0], y.shape[1])
-        :noindex:
-        """
-        return self._missing_mask
+        Args:
+            x (torch.Tensor): The input tensor, which should represent the latent space vector.
-    @property
-    def sample_weight(self):
-        """Sample weights of shape (y.shape[0], y.shape[1])
-        :noindex:
+        Returns:
+            torch.Tensor: The reconstructed output tensor of shape `(batch_size, n_features, num_classes)`.
         """
-        return self._sample_weight
+        x = self.phase23_decoder(x)
-    @V_latent.setter
-    def V_latent(self, value):
-        """Set randomly initialized input. Refined during training.
-        :noindex:
-        """
-        self.V_latent_ = value
+        # Reshape to (batch, features, num_classes)
+        return x.view(-1, *self.reshape)
-    @batch_size.setter
-    def batch_size(self, value):
-        """Set batch_size parameter.
-        :noindex:
+    def compute_loss(
+        self,
+        y: torch.Tensor,
+        outputs: torch.Tensor,
+        mask: torch.Tensor | None = None,
+        class_weights: torch.Tensor | None = None,
+        gamma: float = 2.0,
+    ) -> torch.Tensor:
+        """Computes the masked focal loss between model outputs and ground truth.
+        This method calculates the loss value, handling class imbalance with weights and ignoring masked (missing) values.
+        Args:
+            y (torch.Tensor): Integer ground-truth genotypes of shape `(batch_size, n_features)`.
+            outputs (torch.Tensor): Logits of shape `(batch_size, n_features, num_classes)`.
+            mask (torch.Tensor | None): An optional boolean mask indicating which elements should be included in the loss calculation. Defaults to None.
+            class_weights (torch.Tensor | None): An optional tensor of weights for each class to address imbalance. Defaults to None.
+            gamma (float): The focusing parameter for the focal loss. Defaults to 2.0.
+        Returns:
+            torch.Tensor: The computed scalar loss value.
         """
-        self._batch_size = int(value)
+        if class_weights is None:
+            class_weights = torch.ones(self.num_classes, device=outputs.device)
-    @batch_idx.setter
-    def batch_idx(self, value):
-        """Set current batch (=step) index.
-        :noindex:
-        """
-        self._batch_idx = int(value)
+        if mask is None:
+            mask = torch.ones_like(y, dtype=torch.bool)
-    @y.setter
-    def y(self, value):
-        """Set y after each epoch.
-        :noindex:
-        """
-        self._y = value
+        # Explicitly flatten all tensors to the (N, C) and (N,) format.
+        # This creates a clear contract with the new MaskedFocalLoss function.
+        n_classes = outputs.shape[-1]
+        logits_flat = outputs.reshape(-1, n_classes)
+        targets_flat = y.reshape(-1)
+        mask_flat = mask.reshape(-1)
-    @missing_mask.setter
-    def missing_mask(self, value):
-        """Set missing_mask after each epoch.
-        :noindex:
-        """
-        self._missing_mask = value
+        criterion = MaskedFocalLoss(gamma=gamma, alpha=class_weights)
-    @sample_weight.setter
-    def sample_weight(self, value):
-        """Set sample_weight after each epoch.
-        :noindex:
-        """
-        self._sample_weight = value
+        return criterion(
+            logits_flat.to(self.device),
+            targets_flat.to(self.device),
+            valid_mask=mask_flat.to(self.device),
+        )

pg-sui 0.2.0__py3-none-any.whl → 1.6.14.dev9__py3-none-any.whl

pg-sui 0.2.0py3-none-any.whl → 1.6.14.dev9py3-none-any.whl