PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +6 -7
pertpy/tools/__init__.py +67 -6
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +77 -54
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
pertpy-0.9.1.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_perturbation_space/_comparison.py ADDED Viewed

@@ -0,0 +1,112 @@
+from typing import TYPE_CHECKING
+import numpy as np
+import pynndescent
+from scipy.sparse import issparse
+from scipy.sparse import vstack as sp_vstack
+from sklearn.base import ClassifierMixin
+from sklearn.linear_model import LogisticRegression
+if TYPE_CHECKING:
+    from numpy.typing import NDArray
+class PerturbationComparison:
+    """Comparison between real and simulated perturbations."""
+    def compare_classification(
+        self,
+        real: np.ndarray,
+        simulated: np.ndarray,
+        control: np.ndarray,
+        clf: ClassifierMixin | None = None,
+    ) -> float:
+        """Compare classification accuracy between real and simulated perturbations.
+        Trains a classifier on the real perturbation data + the control data and reports a normalized
+        classification accuracy on the simulated perturbation.
+        Args:
+            real: Real perturbed data.
+            simulated: Simulated perturbed data.
+            control: Control data
+            clf: sklearn classifier to use, `sklearn.linear_model.LogisticRegression` if not provided.
+        """
+        assert real.shape[1] == simulated.shape[1] == control.shape[1]
+        if clf is None:
+            clf = LogisticRegression()
+        n_x = real.shape[0]
+        data = sp_vstack((real, control)) if issparse(real) else np.vstack((real, control))
+        labels = np.concatenate([np.full(real.shape[0], "comp"), np.full(control.shape[0], "ctrl")])
+        clf.fit(data, labels)
+        norm_score = clf.score(simulated, np.full(simulated.shape[0], "comp")) / clf.score(real, labels[:n_x])
+        norm_score = min(1.0, norm_score)
+        return norm_score
+    def compare_knn(
+        self,
+        real: np.ndarray,
+        simulated: np.ndarray,
+        control: np.ndarray | None = None,
+        use_simulated_for_knn: bool = False,
+        n_neighbors: int = 20,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ) -> dict[str, float]:
+        """Calculate proportions of real perturbed and control data points for simulated data.
+        Computes proportions of real perturbed, control and simulated (if `use_simulated_for_knn=True`)
+        data points for simulated data. If control (`C`) is not provided, builds the knn graph from
+        real perturbed + simulated perturbed.
+        Args:
+            real: Real perturbed data.
+            simulated: Simulated perturbed data.
+            control: Control data
+            use_simulated_for_knn: Include simulted perturbed data (`simulated`) into the knn graph. Only valid when
+                control (`control`) is provided.
+            n_neighbors: Number of neighbors to use in k-neighbor graph.
+            random_state: Random state used for k-neighbor graph construction.
+            n_jobs: Number of cores to use. Defaults to -1 (all).
+        """
+        assert real.shape[1] == simulated.shape[1]
+        if control is not None:
+            assert real.shape[1] == control.shape[1]
+        n_y = simulated.shape[0]
+        if control is None:
+            index_data = sp_vstack((simulated, real)) if issparse(real) else np.vstack((simulated, real))
+        else:
+            datas = (simulated, real, control) if use_simulated_for_knn else (real, control)
+            index_data = sp_vstack(datas) if issparse(real) else np.vstack(datas)
+        y_in_index = use_simulated_for_knn or control is None
+        c_in_index = control is not None
+        label_groups = ["comp"]
+        labels: NDArray[np.str_] = np.full(index_data.shape[0], "comp")
+        if y_in_index:
+            labels[:n_y] = "siml"
+            label_groups.append("siml")
+        if c_in_index:
+            labels[-control.shape[0] :] = "ctrl"
+            label_groups.append("ctrl")
+        index = pynndescent.NNDescent(
+            index_data,
+            n_neighbors=max(50, n_neighbors),
+            random_state=random_state,
+            n_jobs=n_jobs,
+        )
+        indices = index.query(simulated, k=n_neighbors)[0]
+        uq, uq_counts = np.unique(labels[indices], return_counts=True)
+        uq_counts_norm = uq_counts / uq_counts.sum()
+        counts = dict(zip(label_groups, [0.0] * len(label_groups), strict=False))
+        for group, count_norm in zip(uq, uq_counts_norm, strict=False):
+            counts[group] = count_norm
+        return counts

pertpy/tools/_perturbation_space/_discriminator_classifiers.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from __future__ import annotations
 import warnings
-from typing import TYPE_CHECKING, Literal
 import anndata
 import numpy as np
@@ -42,12 +41,12 @@ class LRClassifierSpace(PerturbationSpace):
         Args:
             adata: AnnData object of size cells x genes
-            target_col: .obs column that stores the perturbations. Defaults to "perturbations".
-            layer_key: Layer in adata to use. Defaults to None.
+            target_col: .obs column that stores the perturbations.
+            layer_key: Layer in adata to use.
             embedding_key: Key of the embedding in obsm to be used as data for the logistic regression classifier.
-                Can only be specified if layer_key is None. Defaults to None.
-            test_split_size: Fraction of data to put in the test set. Default to 0.2.
-            max_iter: Maximum number of iterations taken for the solvers to converge. Defaults to 1000.
+                Can only be specified if layer_key is None.
+            test_split_size: Fraction of data to put in the test set.
+            max_iter: Maximum number of iterations taken for the solvers to converge.
         Returns:
             AnnData object with the logistic regression coefficients as the embedding in X and the perturbations as .obs['perturbations'].
@@ -163,24 +162,23 @@ class MLPClassifierSpace(PerturbationSpace):
         Args:
             adata: AnnData object of size cells x genes
-            target_col: .obs column that stores the perturbations. Defaults to "perturbations".
-            layer_key: Layer in adata to use. Defaults to None.
+            target_col: .obs column that stores the perturbations.
+            layer_key: Layer in adata to use.
             hidden_dim: List of number of neurons in each hidden layers of the neural network. For instance, [512, 256]
                 will create a neural network with two hidden layers, the first with 512 neurons and the second with 256 neurons.
-                Defaults to [512].
-            dropout: Amount of dropout applied, constant for all layers. Defaults to 0.
-            batch_norm: Whether to apply batch normalization. Defaults to True.
-            batch_size: The batch size, i.e. the number of datapoints to use in one forward/backward pass. Defaults to 256.
+            dropout: Amount of dropout applied, constant for all layers.
+            batch_norm: Whether to apply batch normalization.
+            batch_size: The batch size, i.e. the number of datapoints to use in one forward/backward pass.
             test_split_size: Fraction of data to put in the test set. Default to 0.2.
             validation_split_size: Fraction of data to put in the validation set of the resultant train set.
                 E.g. a test_split_size of 0.2 and a validation_split_size of 0.25 means that 25% of 80% of the data
-                will be used for validation. Defaults to 0.25.
-            max_epochs: Maximum number of epochs for training. Defaults to 20.
+                will be used for validation.
+            max_epochs: Maximum number of epochs for training.
             val_epochs_check: Test performance on validation dataset after every val_epochs_check training epochs.
                 Note that this affects early stopping, as the model will be stopped if the validation performance does not
-                improve for patience epochs. Defaults to 2.
+                improve for patience epochs.
             patience: Number of validation performance checks without improvement, after which the early stopping flag
-                is activated and training is therefore stopped. Defaults to 2.
+                is activated and training is therefore stopped.
         Returns:
             AnnData whose `X` attribute is the perturbation embedding and whose .obs['perturbations'] are the names of the perturbations.
@@ -325,10 +323,10 @@ class MLP(torch.nn.Module):
         """
         Args:
             sizes: size of layers.
-            dropout: Dropout probability. Defaults to 0.0.
-            batch_norm: specifies if batch norm should be applied. Defaults to True.
-            layer_norm:  specifies if layer norm should be applied, as commonly used in Transformers. Defaults to False.
-            last_layer_act: activation function of last layer. Defaults to "linear".
+            dropout: Dropout probability.
+            batch_norm: specifies if batch norm should be applied.
+            layer_norm:  specifies if layer norm should be applied, as commonly used in Transformers.
+            last_layer_act: activation function of last layer.
         """
         super().__init__()
         layers = []
@@ -392,8 +390,8 @@ class PLDataset(Dataset):
         """
         Args:
             adata: AnnData object with observations and labels.
-            target_col: key with the perturbation labels numerically encoded. Defaults to 'perturbations'.
-            label_col: key with the perturbation labels. Defaults to 'perturbations'.
+            target_col: key with the perturbation labels numerically encoded.
+            label_col: key with the perturbation labels.
             layer_key: key of the layer to be used as data, otherwise .X
         """
@@ -410,7 +408,7 @@ class PLDataset(Dataset):
     def __getitem__(self, idx):
         """Returns a sample and corresponding perturbations applied (labels)"""
-        sample = self.data[idx].A.squeeze() if scipy.sparse.issparse(self.data) else self.data[idx]
+        sample = self.data[idx].toarray().squeeze() if scipy.sparse.issparse(self.data) else self.data[idx]
         num_label = self.labels.iloc[idx]
         str_label = self.pert_labels.iloc[idx]

pertpy/tools/_perturbation_space/_perturbation_space.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING
 import numpy as np
 import pandas as pd
 from anndata import AnnData
-from pynndescent import NNDescent
+from lamin_utils import logger
 from rich import print
 if TYPE_CHECKING:
@@ -40,13 +40,13 @@ class PerturbationSpace:
         Args:
             adata: Anndata object of size cells x genes.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbations'.
-            group_col: .obs column name that stores the label of the group of eah cell. If None, ignore groups. Defaults to 'perturbations'.
-            reference_key: The key of the control values. Defaults to 'control'.
-            layer_key: Key of the AnnData layer to use for computation. Defaults to the `X` matrix otherwise.
-            new_layer_key: the results are stored in the given layer. Defaults to 'control_diff'.
-            embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
-            new_embedding_key: Results are stored in a new embedding in `obsm` with this key. Defaults to 'control_diff'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
+            group_col: .obs column name that stores the label of the group of eah cell. If None, ignore groups.
+            reference_key: The key of the control values.
+            layer_key: Key of the AnnData layer to use for computation.
+            new_layer_key: the results are stored in the given layer.
+            embedding_key: `obsm` key of the AnnData embedding to use for computation.
+            new_embedding_key: Results are stored in a new embedding in `obsm` with this key.
             all_data: if True, do the computation in all data representations (X, all layers and all embeddings)
             copy: If True returns a new Anndata of same size with the new column; otherwise it updates the initial AnnData object.
@@ -150,14 +150,14 @@ class PerturbationSpace:
         ensure_consistency: bool = False,
         target_col: str = "perturbation",
     ) -> tuple[AnnData, AnnData] | AnnData:
-        """Add perturbations linearly. Assumes input of size n_perts x dimensionality
+        """Add perturbations linearly. Assumes input of size n_perts x dimensionality.
         Args:
             adata: Anndata object of size n_perts x dim.
             perturbations: Perturbations to add.
-            reference_key: perturbation source from which the perturbation summation starts. Defaults to 'control'.
+            reference_key: perturbation source from which the perturbation summation starts.
             ensure_consistency: If True, runs differential expression on all data matrices to ensure consistency of linear space.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbation'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
         Returns:
             Anndata object of size (n_perts+1) x dim, where the last row is the addition of the specified perturbations.
@@ -182,8 +182,8 @@ class PerturbationSpace:
             new_pert_name += perturbation + "+"
         if not ensure_consistency:
-            print(
-                "[bold yellow]Operation might be done in non-consistent space (perturbation - perturbation != control). \n"
+            logger.warning(
+                "Operation might be done in non-consistent space (perturbation - perturbation != control). \n"
                 "Subtract control perturbation needed for consistency of space in all data representations. \n"
                 "Run with ensure_consistency=True"
             )
@@ -264,9 +264,9 @@ class PerturbationSpace:
         Args:
             adata: Anndata object of size n_perts x dim.
             perturbations: Perturbations to subtract.
-            reference_key: Perturbation source from which the perturbation subtraction starts. Defaults to 'control'.
+            reference_key: Perturbation source from which the perturbation subtraction starts.
             ensure_consistency: If True, runs differential expression on all data matrices to ensure consistency of linear space.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbations'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
         Returns:
             Anndata object of size (n_perts+1) x dim, where the last row is the subtraction of the specified perturbations.
@@ -291,8 +291,8 @@ class PerturbationSpace:
             new_pert_name += perturbation + "-"
         if not ensure_consistency:
-            print(
-                "[bold yellow]Operation might be done in non-consistent space (perturbation - perturbation != control).\n"
+            logger.warning(
+                "Operation might be done in non-consistent space (perturbation - perturbation != control).\n"
                 "Subtract control perturbation needed for consistency of space in all data representations.\n"
                 "Run with ensure_consistency=True"
             )
@@ -372,10 +372,10 @@ class PerturbationSpace:
         Args:
             adata: The AnnData object containing single-cell data.
-            column: The column name in AnnData object to perform imputation on. Defaults to "perturbation".
-            target_val: The target value to impute. Defaults to "unknown".
-            n_neighbors: Number of neighbors to use for imputation. Defaults to 5.
-            use_rep: The key in `adata.obsm` where the embedding (UMAP, PCA, etc.) is stored. Defaults to 'X_umap'.
+            column: The column name in AnnData object to perform imputation on.
+            target_val: The target value to impute.
+            n_neighbors: Number of neighbors to use for imputation.
+            use_rep: The key in `adata.obsm` where the embedding (UMAP, PCA, etc.) is stored.
         Examples:
             >>> import pertpy as pt
@@ -396,6 +396,8 @@ class PerturbationSpace:
         embedding = adata.obsm[use_rep]
+        from pynndescent import NNDescent
         nnd = NNDescent(embedding, n_neighbors=n_neighbors)
         indices, _ = nnd.query(embedding, k=n_neighbors)

pertpy/tools/_perturbation_space/_simple.py CHANGED Viewed

@@ -28,7 +28,7 @@ class CentroidSpace(PerturbationSpace):
             layer_key: If specified pseudobulk computation is done by using the specified layer. Otherwise, computation is done with .X
             embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
             keep_obs: Whether .obs columns in the input AnnData should be kept in the output pseudobulk AnnData. Only .obs columns with the same value for
-                each cell of one perturbation are kept. Defaults to True.
+                each cell of one perturbation are kept.
         Returns:
             AnnData object with one observation per perturbation, storing the embedding data of the
@@ -129,7 +129,7 @@ class PseudobulkSpace(PerturbationSpace):
             adata: Anndata object of size cells x genes
             target_col: .obs column that stores the label of the perturbation applied to each cell.
             groups_col: Optional .obs column that stores a grouping label to consider for pseudobulk computation.
-                The summarized expression per perturbation (target_col) and group (groups_col) is computed. Defaults to None.
+                The summarized expression per perturbation (target_col) and group (groups_col) is computed.
             layer_key: If specified pseudobulk computation is done by using the specified layer. Otherwise, computation is done with .X
             embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
             **kwargs: Are passed to decoupler's get_pseuobulk.
@@ -254,7 +254,7 @@ class DBSCANSpace(ClusteringSpace):
             adata: Anndata object of size cells x genes
             layer_key: If specified and exists in the adata, the clustering is done by using it. Otherwise, clustering is done with .X
             embedding_key: if specified and exists in the adata, the clustering is done with that embedding. Otherwise, clustering is done with .X
-            cluster_key: name of the .obs column to store the cluster labels. Defaults to 'dbscan'
+            cluster_key: name of the .obs column to store the cluster labels.
             copy: if True returns a new Anndata of same size with the new column; otherwise it updates the initial adata
             return_object: if True returns the clustering object
             **kwargs: Are passed to sklearn's DBSCAN.

pertpy/tools/_scgen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from pertpy.tools._scgen._scgen import ~~SCGEN~~
1	+ from pertpy.tools._scgen._scgen import Scgen

pertpy/tools/_scgen/_base_components.py CHANGED Viewed

@@ -28,7 +28,7 @@ class FlaxEncoder(nn.Module):
         Args:
             x: The input data matrix.
-            training: Whether
+            training: Whether to use running training average.
         Returns:
             Mean and variance.
@@ -69,12 +69,11 @@ class FlaxDecoder(nn.Module):
         Args:
             x: Input data.
-            training:
+            training: Whether to use running training average.
         Returns:
             Decoded data.
         """
         training = nn.merge_param("training", self.training, training)
         for _ in range(self.n_layers):

pertpy/tools/_scgen/_scgen.py CHANGED Viewed

@@ -10,6 +10,7 @@ import scanpy as sc
 from adjustText import adjust_text
 from anndata import AnnData
 from jax import Array
+from lamin_utils import logger
 from scipy import stats
 from scvi import REGISTRY_KEYS
 from scvi.data import AnnDataManager
@@ -26,7 +27,7 @@ if TYPE_CHECKING:
 font = {"family": "Arial", "size": 14}
-class SCGEN(JaxTrainingMixin, BaseModelClass):
+class Scgen(JaxTrainingMixin, BaseModelClass):
     """Jax Implementation of scGen model for batch removal and perturbation prediction."""
     def __init__(
@@ -49,7 +50,7 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
             **model_kwargs,
         )
         self._model_summary_string = (
-            f"SCGEN Model with the following params: \nn_hidden: {n_hidden}, n_latent: {n_latent}, n_layers: {n_layers}, dropout_rate: "
+            f"Scgen Model with the following params: \nn_hidden: {n_hidden}, n_latent: {n_latent}, n_layers: {n_layers}, dropout_rate: "
             f"{dropout_rate}"
         )
         self.init_params_ = self._get_init_params(locals())
@@ -79,8 +80,8 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> model = pt.tl.SCGEN(data)
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> model = pt.tl.Scgen(data)
             >>> model.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
             >>> pred, delta = model.predict(ctrl_key="ctrl", stim_key="stim", celltype_to_predict="CD4 T cells")
         """
@@ -166,8 +167,8 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> model = pt.tl.SCGEN(data)
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> model = pt.tl.Scgen(data)
             >>> model.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
             >>> decoded_X = model.get_decoded_expression()
         """
@@ -200,8 +201,8 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> model = pt.tl.SCGEN(data)
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> model = pt.tl.Scgen(data)
             >>> model.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
             >>> corrected_adata = model.batch_removal()
         """
@@ -304,7 +305,7 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
         """
         setup_method_args = cls._get_setup_method_args(**locals())
         anndata_fields = [
@@ -345,8 +346,8 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> model = pt.tl.SCGEN(data)
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> model = pt.tl.Scgen(data)
             >>> model.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
             >>> latent_X = model.get_latent_representation()
         """
@@ -403,19 +404,19 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
             gene_list: list of gene names to be plotted.
             show: if `True`: will show to the plot after saving it.
             top_100_genes: List of the top 100 differentially expressed genes. Specify if you want the top 100 DEGs to be assessed extra.
-            verbose: Specify if you want information to be printed while creating the plot, defaults to `False`.
-            legend: if `True`: plots a legend, defaults to `True`.
+            verbose: Specify if you want information to be printed while creating the plot.,
+            legend: Whether to plot a legend.
             title: Set if you want the plot to display a title.
-            x_coeff: Offset to print the R^2 value in x-direction, defaults to 0.3.
-            y_coeff: Offset to print the R^2 value in y-direction, defaults to 0.8.
-            fontsize: Fontsize used for text in the plot, defaults to 14.
+            x_coeff: Offset to print the R^2 value in x-direction.
+            y_coeff: Offset to print the R^2 value in y-direction.
+            fontsize: Fontsize used for text in the plot.
             **kwargs:
         Examples:
             >>> import pertpy as pt
             >>> data = pt.dt.kang_2018()
-            >>> pt.tl.SCGEN.setup_anndata(data, batch_key="label", labels_key="cell_type")
-            >>> scg = pt.tl.SCGEN(data)
+            >>> pt.tl.Scgen.setup_anndata(data, batch_key="label", labels_key="cell_type")
+            >>> scg = pt.tl.Scgen(data)
             >>> scg.train(max_epochs=10, batch_size=64, early_stopping=True, early_stopping_patience=5)
             >>> pred, delta = scg.predict(ctrl_key='ctrl', stim_key='stim', celltype_to_predict='CD4 T cells')
             >>> pred.obs['label'] = 'pred'
@@ -444,12 +445,12 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
             y_diff = np.asarray(np.mean(stim_diff.X, axis=0)).ravel()
             m, b, r_value_diff, p_value_diff, std_err_diff = stats.linregress(x_diff, y_diff)
             if verbose:
-                print("top_100 DEGs mean: ", r_value_diff**2)
+                logger.info("top_100 DEGs mean: ", r_value_diff**2)
         x = np.asarray(np.mean(ctrl.X, axis=0)).ravel()
         y = np.asarray(np.mean(stim.X, axis=0)).ravel()
         m, b, r_value, p_value, std_err = stats.linregress(x, y)
         if verbose:
-            print("All genes mean: ", r_value**2)
+            logger.info("All genes mean: ", r_value**2)
         df = pd.DataFrame({axis_keys["x"]: x, axis_keys["y"]: y})
         ax = sns.regplot(x=axis_keys["x"], y=axis_keys["y"], data=df)
         ax.tick_params(labelsize=fontsize)
@@ -540,12 +541,12 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
             gene_list: list of gene names to be plotted.
             show: if `True`: will show to the plot after saving it.
             top_100_genes: List of the top 100 differentially expressed genes. Specify if you want the top 100 DEGs to be assessed extra.
-            legend: if `True`: plots a legend, defaults to `True`.
+            legend: Whether to plot a elgend
             title: Set if you want the plot to display a title.
-            verbose: Specify if you want information to be printed while creating the plot, defaults to `False`.
-            x_coeff: Offset to print the R^2 value in x-direction, defaults to 0.3.
-            y_coeff: Offset to print the R^2 value in y-direction, defaults to 0.8.
-            fontsize: Fontsize used for text in the plot, defaults to 14.
+            verbose: Specify if you want information to be printed while creating the plot.
+            x_coeff: Offset to print the R^2 value in x-direction.
+            y_coeff: Offset to print the R^2 value in y-direction.
+            fontsize: Fontsize used for text in the plot.
         """
         import seaborn as sns
@@ -566,14 +567,14 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
             y_diff = np.asarray(np.var(stim_diff.X, axis=0)).ravel()
             m, b, r_value_diff, p_value_diff, std_err_diff = stats.linregress(x_diff, y_diff)
             if verbose:
-                print("Top 100 DEGs var: ", r_value_diff**2)
+                logger.info("Top 100 DEGs var: ", r_value_diff**2)
         if "y1" in axis_keys.keys():
             real_stim = adata[adata.obs[condition_key] == axis_keys["y1"]]
         x = np.asarray(np.var(ctrl.X, axis=0)).ravel()
         y = np.asarray(np.var(stim.X, axis=0)).ravel()
         m, b, r_value, p_value, std_err = stats.linregress(x, y)
         if verbose:
-            print("All genes var: ", r_value**2)
+            logger.info("All genes var: ", r_value**2)
         df = pd.DataFrame({axis_keys["x"]: x, axis_keys["y"]: y})
         ax = sns.regplot(x=axis_keys["x"], y=axis_keys["y"], data=df)
         ax.tick_params(labelsize=fontsize)
@@ -637,7 +638,7 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
     def plot_binary_classifier(
         self,
-        scgen: SCGEN,
+        scgen: Scgen,
         adata: AnnData | None,
         delta: np.ndarray,
         ctrl_key: str,
@@ -699,3 +700,7 @@ class SCGEN(JaxTrainingMixin, BaseModelClass):
         if not (show or save):
             return ax
         return None
+# compatibility
+SCGEN = Scgen

pertpy/tools/_scgen/_utils.py CHANGED Viewed

@@ -27,7 +27,7 @@ def extractor(
     Example:
         .. code-block:: python
-            import SCGEN
+            import Scgen
             import anndata
             train_data = anndata.read("./data/train.h5ad")
@@ -58,7 +58,7 @@ def balancer(
     Example:
         .. code-block:: python
-            import SCGEN
+            import Scgen
             import anndata
             train_data = anndata.read("./train_kang.h5ad")

pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.9.1py3-none-any.whl