PyPI - pertpy - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

pertpy/__init__.py +4 -2
pertpy/data/__init__.py +66 -1
pertpy/data/_dataloader.py +28 -26
pertpy/data/_datasets.py +261 -92
pertpy/metadata/__init__.py +6 -0
pertpy/metadata/_cell_line.py +795 -0
pertpy/metadata/_compound.py +128 -0
pertpy/metadata/_drug.py +238 -0
pertpy/metadata/_look_up.py +569 -0
pertpy/metadata/_metadata.py +70 -0
pertpy/metadata/_moa.py +125 -0
pertpy/plot/__init__.py +0 -13
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +89 -6
pertpy/tools/__init__.py +48 -15
pertpy/tools/_augur.py +329 -32
pertpy/tools/_cinemaot.py +145 -6
pertpy/tools/_coda/_base_coda.py +1237 -116
pertpy/tools/_coda/_sccoda.py +66 -36
pertpy/tools/_coda/_tasccoda.py +46 -39
pertpy/tools/_dialogue.py +180 -77
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +29 -24
pertpy/tools/_distances/_distances.py +584 -98
pertpy/tools/_enrichment.py +460 -0
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +406 -49
pertpy/tools/_mixscape.py +677 -55
pertpy/tools/_perturbation_space/_clustering.py +10 -3
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +524 -0
pertpy/tools/_perturbation_space/_perturbation_space.py +146 -52
pertpy/tools/_perturbation_space/_simple.py +52 -11
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +706 -0
pertpy/tools/_scgen/_utils.py +3 -5
pertpy/tools/decoupler_LICENSE +674 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +48 -20
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -234
pertpy/plot/_cinemaot.py +0 -81
pertpy/plot/_coda.py +0 -1001
pertpy/plot/_dialogue.py +0 -91
pertpy/plot/_guide_rna.py +0 -82
pertpy/plot/_milopy.py +0 -284
pertpy/plot/_mixscape.py +0 -594
pertpy/plot/_scgen.py +0 -337
pertpy/tools/_differential_gene_expression.py +0 -99
pertpy/tools/_metadata/__init__.py +0 -0
pertpy/tools/_metadata/_cell_line.py +0 -613
pertpy/tools/_metadata/_look_up.py +0 -342
pertpy/tools/_perturbation_space/_discriminator_classifier.py +0 -381
pertpy/tools/_scgen/_jax_scgen.py +0 -370
pertpy-0.6.0.dist-info/RECORD +0 -50
/pertpy/tools/_scgen/{_jax_scgenvae.py → _scgenvae.py} +0 -0
{pertpy-0.6.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_perturbation_space/_perturbation_space.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING
 import numpy as np
 import pandas as pd
 from anndata import AnnData
+from lamin_utils import logger
 from rich import print
 if TYPE_CHECKING:
@@ -15,7 +16,7 @@ class PerturbationSpace:
     """Implements various ways of interacting with PerturbationSpaces.
     We differentiate between a cell space and a perturbation space.
-    Visually speaking, in cell spaces single dota points in an embeddings summarize a cell,
+    Visually speaking, in cell spaces single data points in an embeddings summarize a cell,
     whereas in a perturbation space, data points summarize whole perturbations.
     """
@@ -25,7 +26,8 @@ class PerturbationSpace:
     def compute_control_diff(  # type: ignore
         self,
         adata: AnnData,
-        target_col: str = "perturbations",
+        target_col: str = "perturbation",
+        group_col: str = None,
         reference_key: str = "control",
         layer_key: str = None,
         new_layer_key: str = "control_diff",
@@ -33,26 +35,31 @@ class PerturbationSpace:
         new_embedding_key: str = "control_diff",
         all_data: bool = False,
         copy: bool = False,
-    ):
+    ) -> AnnData:
         """Subtract mean of the control from the perturbation.
         Args:
             adata: Anndata object of size cells x genes.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbations'.
-            reference_key: The key of the control values. Defaults to 'control'.
-            layer_key: Key of the AnnData layer to use for computation. Defaults to the `X` matrix otherwise.
-            new_layer_key: the results are stored in the given layer. Defaults to 'differential diff'.
-            embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
-            new_embedding_key: Results are stored in a new embedding in `obsm` with this key. Defaults to 'control diff'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
+            group_col: .obs column name that stores the label of the group of eah cell. If None, ignore groups.
+            reference_key: The key of the control values.
+            layer_key: Key of the AnnData layer to use for computation.
+            new_layer_key: the results are stored in the given layer.
+            embedding_key: `obsm` key of the AnnData embedding to use for computation.
+            new_embedding_key: Results are stored in a new embedding in `obsm` with this key.
             all_data: if True, do the computation in all data representations (X, all layers and all embeddings)
             copy: If True returns a new Anndata of same size with the new column; otherwise it updates the initial AnnData object.
+        Returns:
+            Updated AnnData object.
         Examples:
             Example usage with PseudobulkSpace:
             >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
             >>> ps = pt.tl.PseudobulkSpace()
-            >>> diff_adata = ps.compute_control_diff(mdata["rna"], target_col="gene_target", reference_key='NT')
+            >>> diff_adata = ps.compute_control_diff(mdata["rna"], target_col="gene_target", reference_key="NT")
         """
         if reference_key not in adata.obs[target_col].unique():
             raise ValueError(
@@ -69,48 +76,67 @@ class PerturbationSpace:
             adata = adata.copy()
         control_mask = adata.obs[target_col] == reference_key
-        num_control = control_mask.sum()
+        group_masks = (
+            [(adata.obs[group_col] == sample) for sample in adata.obs[group_col].unique()]
+            if group_col
+            else [[True] * adata.n_obs]
+        )
         if layer_key:
-            if num_control == 1:
-                control_expression = adata.layers[layer_key][control_mask, :]
-            else:
-                control_expression = np.mean(adata.layers[layer_key][control_mask, :], axis=0)
-            diff_matrix = adata.layers[layer_key] - control_expression
-            adata.layers[new_layer_key] = diff_matrix
+            adata.layers[new_layer_key] = np.zeros((adata.n_obs, adata.n_vars))
+            for mask in group_masks:
+                num_control = (control_mask & mask).sum()
+                if num_control == 1:
+                    control_expression = adata.layers[layer_key][(control_mask & mask), :]
+                elif num_control > 1:
+                    control_expression = np.mean(adata.layers[layer_key][(control_mask & mask), :], axis=0)
+                else:
+                    control_expression = np.zeros((1, adata.n_vars))
+                adata.layers[new_layer_key][mask, :] = adata.layers[layer_key][mask, :] - control_expression
         if embedding_key:
-            if num_control == 1:
-                control_expression = adata.obsm[embedding_key][control_mask, :]
-            else:
-                control_expression = np.mean(adata.obsm[embedding_key][control_mask, :], axis=0)
-            diff_matrix = adata.obsm[embedding_key] - control_expression
-            adata.obsm[new_embedding_key] = diff_matrix
+            adata.obsm[new_embedding_key] = np.zeros(adata.obsm[embedding_key].shape)
+            for mask in group_masks:
+                num_control = (control_mask & mask).sum()
+                if num_control == 1:
+                    control_expression = adata.obsm[embedding_key][(control_mask & mask), :]
+                elif num_control > 1:
+                    control_expression = np.mean(adata.obsm[embedding_key][(control_mask & mask), :], axis=0)
+                else:
+                    control_expression = np.zeros((1, adata.n_vars))
+                adata.obsm[new_embedding_key][mask, :] = adata.obsm[embedding_key][mask, :] - control_expression
         if (not layer_key and not embedding_key) or all_data:
-            if num_control == 1:
-                control_expression = adata.X[control_mask, :]
-            else:
-                control_expression = np.mean(adata.X[control_mask, :], axis=0)
-            diff_matrix = adata.X - control_expression
-            adata.X = diff_matrix
+            adata_x = np.zeros((adata.n_obs, adata.n_vars))
+            for mask in group_masks:
+                num_control = (control_mask & mask).sum()
+                if num_control == 1:
+                    control_expression = adata.X[(control_mask & mask), :]
+                elif num_control > 1:
+                    control_expression = np.mean(adata.X[(control_mask & mask), :], axis=0)
+                else:
+                    control_expression = np.zeros((1, adata.n_vars))
+                adata_x[mask, :] = adata.X[mask, :] - control_expression
+            adata.X = adata_x
         if all_data:
             layers_keys = list(adata.layers.keys())
             for local_layer_key in layers_keys:
                 if local_layer_key != layer_key and local_layer_key != new_layer_key:
-                    diff_matrix = adata.layers[local_layer_key] - np.mean(
-                        adata.layers[local_layer_key][control_mask, :], axis=0
-                    )
-                    adata.layers[local_layer_key + "_control_diff"] = diff_matrix
+                    adata.layers[local_layer_key + "_control_diff"] = np.zeros((adata.n_obs, adata.n_vars))
+                    for mask in group_masks:
+                        adata.layers[local_layer_key + "_control_diff"][mask, :] = adata.layers[local_layer_key][
+                            mask, :
+                        ] - np.mean(adata.layers[local_layer_key][(control_mask & mask), :], axis=0)
             embedding_keys = list(adata.obsm_keys())
             for local_embedding_key in embedding_keys:
                 if local_embedding_key != embedding_key and local_embedding_key != new_embedding_key:
-                    diff_matrix = adata.obsm[local_embedding_key] - np.mean(
-                        adata.obsm[local_embedding_key][control_mask, :], axis=0
-                    )
-                    adata.obsm[local_embedding_key + "_control_diff"] = diff_matrix
+                    adata.obsm[local_embedding_key + "_control_diff"] = np.zeros(adata.obsm[local_embedding_key].shape)
+                    for mask in group_masks:
+                        adata.obsm[local_embedding_key + "_control_diff"][mask, :] = adata.obsm[local_embedding_key][
+                            mask, :
+                        ] - np.mean(adata.obsm[local_embedding_key][(control_mask & mask), :], axis=0)
         self.control_diff_computed = True
@@ -122,24 +148,30 @@ class PerturbationSpace:
         perturbations: Iterable[str],
         reference_key: str = "control",
         ensure_consistency: bool = False,
-        target_col: str = "perturbations",
-    ):
-        """Add perturbations linearly. Assumes input of size n_perts x dimensionality
+        target_col: str = "perturbation",
+    ) -> tuple[AnnData, AnnData] | AnnData:
+        """Add perturbations linearly. Assumes input of size n_perts x dimensionality.
         Args:
             adata: Anndata object of size n_perts x dim.
             perturbations: Perturbations to add.
             reference_key: perturbation source from which the perturbation summation starts.
             ensure_consistency: If True, runs differential expression on all data matrices to ensure consistency of linear space.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbations'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
+        Returns:
+            Anndata object of size (n_perts+1) x dim, where the last row is the addition of the specified perturbations.
+            If ensure_consistency is True, returns a tuple of (new_perturbation, adata) where adata is the AnnData object
+            provided as input but updated using compute_control_diff.
         Examples:
             Example usage with PseudobulkSpace:
             >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
             >>> ps = pt.tl.PseudobulkSpace()
             >>> ps_adata = ps.compute(mdata["rna"], target_col="gene_target", groups_col="gene_target")
-            >>> new_perturbation = ps.add(ps_adata, perturbations=["ATF2", "CD86"], reference_key='NT')
+            >>> new_perturbation = ps.add(ps_adata, perturbations=["ATF2", "CD86"], reference_key="NT")
         """
         new_pert_name = ""
         for perturbation in perturbations:
@@ -150,8 +182,8 @@ class PerturbationSpace:
             new_pert_name += perturbation + "+"
         if not ensure_consistency:
-            print(
-                "[bold yellow]Operation might be done in non-consistent space (perturbation - perturbation != control). \n"
+            logger.warning(
+                "Operation might be done in non-consistent space (perturbation - perturbation != control). \n"
                 "Subtract control perturbation needed for consistency of space in all data representations. \n"
                 "Run with ensure_consistency=True"
             )
@@ -212,6 +244,8 @@ class PerturbationSpace:
                     key_name = key.removesuffix("_control_diff")
                 new_perturbation.obsm[key_name] = data["embeddings"][key]
+        new_perturbation.obs[target_col] = new_perturbation.obs_names.astype("category")
         if ensure_consistency:
             return new_perturbation, adata
@@ -223,24 +257,30 @@ class PerturbationSpace:
         perturbations: Iterable[str],
         reference_key: str = "control",
         ensure_consistency: bool = False,
-        target_col: str = "perturbations",
-    ):
+        target_col: str = "perturbation",
+    ) -> tuple[AnnData, AnnData] | AnnData:
         """Subtract perturbations linearly. Assumes input of size n_perts x dimensionality
         Args:
             adata: Anndata object of size n_perts x dim.
-            perturbations: Perturbations to subtract,
-            reference_key: Perturbation source from which the perturbation subtraction starts
+            perturbations: Perturbations to subtract.
+            reference_key: Perturbation source from which the perturbation subtraction starts.
             ensure_consistency: If True, runs differential expression on all data matrices to ensure consistency of linear space.
-            target_col: .obs column name that stores the label of the perturbation applied to each cell. Defaults to 'perturbations'.
+            target_col: .obs column name that stores the label of the perturbation applied to each cell.
+        Returns:
+            Anndata object of size (n_perts+1) x dim, where the last row is the subtraction of the specified perturbations.
+            If ensure_consistency is True, returns a tuple of (new_perturbation, adata) where adata is the AnnData object
+            provided as input but updated using compute_control_diff.
         Examples:
             Example usage with PseudobulkSpace:
             >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
             >>> ps = pt.tl.PseudobulkSpace()
             >>> ps_adata = ps.compute(mdata["rna"], target_col="gene_target", groups_col="gene_target")
-            >>> new_perturbation = ps.add(ps_adata, reference_key="ATF2", perturbations=["BRD4", "CUL3"])
+            >>> new_perturbation = ps.subtract(ps_adata, reference_key="ATF2", perturbations=["BRD4", "CUL3"])
         """
         new_pert_name = reference_key + "-"
         for perturbation in perturbations:
@@ -251,8 +291,8 @@ class PerturbationSpace:
             new_pert_name += perturbation + "-"
         if not ensure_consistency:
-            print(
-                "[bold yellow]Operation might be done in non-consistent space (perturbation - perturbation != control).\n"
+            logger.warning(
+                "Operation might be done in non-consistent space (perturbation - perturbation != control).\n"
                 "Subtract control perturbation needed for consistency of space in all data representations.\n"
                 "Run with ensure_consistency=True"
             )
@@ -313,7 +353,61 @@ class PerturbationSpace:
                     key_name = key.removesuffix("_control_diff")
                 new_perturbation.obsm[key_name] = data["embeddings"][key]
+        new_perturbation.obs[target_col] = new_perturbation.obs_names.astype("category")
         if ensure_consistency:
             return new_perturbation, adata
         return new_perturbation
+    def label_transfer(
+        self,
+        adata: AnnData,
+        column: str = "perturbation",
+        target_val: str = "unknown",
+        n_neighbors: int = 5,
+        use_rep: str = "X_umap",
+    ) -> None:
+        """Impute missing values in the specified column using KNN imputation in the space defined by `use_rep`.
+        Args:
+            adata: The AnnData object containing single-cell data.
+            column: The column name in AnnData object to perform imputation on.
+            target_val: The target value to impute.
+            n_neighbors: Number of neighbors to use for imputation.
+            use_rep: The key in `adata.obsm` where the embedding (UMAP, PCA, etc.) is stored.
+        Examples:
+            >>> import pertpy as pt
+            >>> import scanpy as sc
+            >>> import numpy as np
+            >>> adata = sc.datasets.pbmc68k_reduced()
+            >>> rng = np.random.default_rng()
+            >>> adata.obs["perturbation"] = rng.choice(
+            ...     ["A", "B", "C", "unknown"], size=adata.n_obs, p=[0.33, 0.33, 0.33, 0.01]
+            ... )
+            >>> sc.pp.neighbors(adata)
+            >>> sc.tl.umap(adata)
+            >>> ps = pt.tl.PseudobulkSpace()
+            >>> ps.label_transfer(adata, n_neighbors=5, use_rep="X_umap")
+        """
+        if use_rep not in adata.obsm:
+            raise ValueError(f"Representation {use_rep} not found in the AnnData object.")
+        embedding = adata.obsm[use_rep]
+        from pynndescent import NNDescent
+        nnd = NNDescent(embedding, n_neighbors=n_neighbors)
+        indices, _ = nnd.query(embedding, k=n_neighbors)
+        perturbations = np.array(adata.obs[column])
+        missing_mask = perturbations == target_val
+        for idx in np.where(missing_mask)[0]:
+            neighbor_indices = indices[idx]
+            neighbor_categories = perturbations[neighbor_indices]
+            most_common = pd.Series(neighbor_categories).mode()[0]
+            perturbations[idx] = most_common
+        adata.obs[column] = perturbations

pertpy/tools/_perturbation_space/_simple.py CHANGED Viewed

@@ -15,9 +15,10 @@ class CentroidSpace(PerturbationSpace):
     def compute(
         self,
         adata: AnnData,
-        target_col: str = "perturbations",
+        target_col: str = "perturbation",
         layer_key: str = None,
         embedding_key: str = "X_umap",
+        keep_obs: bool = True,
     ) -> AnnData:  # type: ignore
         """Computes the centroids of a pre-computed embedding such as UMAP.
@@ -26,6 +27,12 @@ class CentroidSpace(PerturbationSpace):
             target_col: .obs column that stores the label of the perturbation applied to each cell.
             layer_key: If specified pseudobulk computation is done by using the specified layer. Otherwise, computation is done with .X
             embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
+            keep_obs: Whether .obs columns in the input AnnData should be kept in the output pseudobulk AnnData. Only .obs columns with the same value for
+                each cell of one perturbation are kept.
+        Returns:
+            AnnData object with one observation per perturbation, storing the embedding data of the
+            centroid of the respective perturbation.
         Examples:
             Compute the centroids of a UMAP embedding of the papalexi_2021 dataset:
@@ -34,7 +41,7 @@ class CentroidSpace(PerturbationSpace):
             >>> import scanpy as sc
             >>> mdata = pt.dt.papalexi_2021()
             >>> sc.pp.pca(mdata["rna"])
-            >>> sc.pp.neighbors(mdata['rna'])
+            >>> sc.pp.neighbors(mdata["rna"])
             >>> sc.tl.umap(mdata["rna"])
             >>> cs = pt.tl.CentroidSpace()
             >>> cs_adata = cs.compute(mdata["rna"], target_col="gene_target")
@@ -84,6 +91,22 @@ class CentroidSpace(PerturbationSpace):
         ps_adata = AnnData(X=X)
         ps_adata.obs_names = index
+        ps_adata.obs[target_col] = index
+        if embedding_key is not None:
+            ps_adata.obsm[embedding_key] = X
+        if keep_obs:  # Save the values of the obs columns of interest in the ps_adata object
+            obs_df = adata.obs
+            obs_df = obs_df.groupby(target_col).agg(
+                lambda pert_group: np.nan if len(set(pert_group)) != 1 else list(set(pert_group))[0]
+            )
+            for obs_name in obs_df.columns:
+                if not obs_df[obs_name].isnull().values.any():
+                    mapping = {pert: obs_df.loc[pert][obs_name] for pert in index}
+                    ps_adata.obs[obs_name] = ps_adata.obs[target_col].map(mapping)
+        ps_adata.obs[target_col] = ps_adata.obs[target_col].astype("category")
         return ps_adata
@@ -94,7 +117,8 @@ class PseudobulkSpace(PerturbationSpace):
     def compute(
         self,
         adata: AnnData,
-        target_col: str = "perturbations",
+        target_col: str = "perturbation",
+        groups_col: str = None,
         layer_key: str = None,
         embedding_key: str = None,
         **kwargs,
@@ -104,19 +128,21 @@ class PseudobulkSpace(PerturbationSpace):
         Args:
             adata: Anndata object of size cells x genes
             target_col: .obs column that stores the label of the perturbation applied to each cell.
+            groups_col: Optional .obs column that stores a grouping label to consider for pseudobulk computation.
+                The summarized expression per perturbation (target_col) and group (groups_col) is computed.
             layer_key: If specified pseudobulk computation is done by using the specified layer. Otherwise, computation is done with .X
             embedding_key: `obsm` key of the AnnData embedding to use for computation. Defaults to the 'X' matrix otherwise.
             **kwargs: Are passed to decoupler's get_pseuobulk.
+        Returns:
+             AnnData object with one observation per perturbation.
         Examples:
-            >>> import pertpy as pp
+            >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
             >>> ps = pt.tl.PseudobulkSpace()
-            >>> ps_adata = ps.compute(mdata["rna"], target_col="gene_target", groups_col="gene_target")
+            >>> ps_adata = ps.compute(mdata["rna"], target_col="gene_target")
         """
-        if "groups_col" not in kwargs:
-            kwargs["groups_col"] = "perturbations"
         if layer_key is not None and embedding_key is not None:
             raise ValueError("Please, select just either layer or embedding for computation.")
@@ -135,7 +161,10 @@ class PseudobulkSpace(PerturbationSpace):
                 adata_emb.obs = adata.obs
                 adata = adata_emb
-        ps_adata = dc.get_pseudobulk(adata, sample_col=target_col, layer=layer_key, **kwargs)  # type: ignore
+        adata.obs[target_col] = adata.obs[target_col].astype("category")
+        ps_adata = dc.get_pseudobulk(adata, sample_col=target_col, layer=layer_key, groups_col=groups_col, **kwargs)  # type: ignore
+        ps_adata.obs[target_col] = ps_adata.obs[target_col].astype("category")
         return ps_adata
@@ -164,6 +193,11 @@ class KMeansSpace(ClusteringSpace):
             return_object: if True returns the clustering object
             **kwargs: Are passed to sklearn's KMeans.
+        Returns:
+            If return_object is True, the adata and the clustering object is returned.
+            Otherwise, only the adata is returned. The adata is updated with a new .obs column as specified in cluster_key,
+             that stores the cluster labels.
         Examples:
             >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
@@ -193,6 +227,7 @@ class KMeansSpace(ClusteringSpace):
         clustering = KMeans(**kwargs).fit(self.X)
         adata.obs[cluster_key] = clustering.labels_
+        adata.obs[cluster_key] = adata.obs[cluster_key].astype("category")
         if return_object:
             return adata, clustering
@@ -212,18 +247,23 @@ class DBSCANSpace(ClusteringSpace):
         copy: bool = True,
         return_object: bool = False,
         **kwargs,
-    ) -> tuple[AnnData, object | AnnData]:
+    ) -> tuple[AnnData, object] | AnnData:
         """Computes a clustering using Density-based spatial clustering of applications (DBSCAN).
         Args:
             adata: Anndata object of size cells x genes
             layer_key: If specified and exists in the adata, the clustering is done by using it. Otherwise, clustering is done with .X
             embedding_key: if specified and exists in the adata, the clustering is done with that embedding. Otherwise, clustering is done with .X
-            cluster_key: name of the .obs column to store the cluster labels. Defaults to 'dbscan'
+            cluster_key: name of the .obs column to store the cluster labels.
             copy: if True returns a new Anndata of same size with the new column; otherwise it updates the initial adata
             return_object: if True returns the clustering object
             **kwargs: Are passed to sklearn's DBSCAN.
+        Returns:
+            If return_object is True, the adata and the clustering object is returned.
+            Otherwise, only the adata is returned. The adata is updated with a new .obs column as specified in cluster_key,
+            that stores the cluster labels.
         Examples:
             >>> import pertpy as pt
             >>> mdata = pt.dt.papalexi_2021()
@@ -250,6 +290,7 @@ class DBSCANSpace(ClusteringSpace):
         clustering = DBSCAN(**kwargs).fit(self.X)
         adata.obs[cluster_key] = clustering.labels_
+        adata.obs[cluster_key] = adata.obs[cluster_key].astype("category")
         if return_object:
             return adata, clustering

pertpy/tools/_scgen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from pertpy.tools._scgen.~~_jax_scgen~~ import ~~SCGEN~~
1	+ from pertpy.tools._scgen._scgen import Scgen

pertpy/tools/_scgen/_base_components.py CHANGED Viewed

@@ -28,7 +28,7 @@ class FlaxEncoder(nn.Module):
         Args:
             x: The input data matrix.
-            training: Whether
+            training: Whether to use running training average.
         Returns:
             Mean and variance.
@@ -69,12 +69,11 @@ class FlaxDecoder(nn.Module):
         Args:
             x: Input data.
-            training:
+            training: Whether to use running training average.
         Returns:
             Decoded data.
         """
         training = nn.merge_param("training", self.training, training)
         for _ in range(self.n_layers):

pertpy 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl