PyPI - pertpy - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

pertpy 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

pertpy/__init__.py +1 -1
pertpy/data/_dataloader.py +2 -2
pertpy/data/_datasets.py +62 -62
pertpy/metadata/_cell_line.py +9 -3
pertpy/metadata/_drug.py +4 -2
pertpy/preprocessing/_guide_rna.py +17 -10
pertpy/preprocessing/_guide_rna_mixture.py +9 -3
pertpy/tools/__init__.py +12 -2
pertpy/tools/_augur.py +37 -14
pertpy/tools/_coda/_sccoda.py +68 -101
pertpy/tools/_coda/_tasccoda.py +103 -85
pertpy/tools/_mixscape.py +48 -39
pertpy/tools/_perturbation_space/_comparison.py +3 -3
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +261 -353
pertpy/tools/_perturbation_space/_perturbation_space.py +22 -14
pertpy/tools/_perturbation_space/_simple.py +12 -6
pertpy/tools/_scgen/_scgenvae.py +2 -1
pertpy/tools/core.py +18 -0
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/METADATA +14 -2
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/RECORD +22 -21
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/WHEEL +0 -0
{pertpy-1.0.1.dist-info → pertpy-1.0.3.dist-info}/licenses/LICENSE +0 -0

pertpy/preprocessing/_guide_rna.py CHANGED Viewed

@@ -266,16 +266,23 @@ class GuideAssignment:
             res.loc[adata.obs_names[is_nonzero][assignments == "Positive"], gene] = 1
             # Add the parameters to the adata.var DataFrame
-            for params_name, param in mixture_model.params.items():
-                if param.ndim == 0:
-                    if params_name not in adata.var.columns:
-                        adata.var[params_name] = np.nan
-                    adata.var.loc[gene, params_name] = param.item()
-                else:
-                    for i, p in enumerate(param):
-                        if f"{params_name}_{i}" not in adata.var.columns:
-                            adata.var[f"{params_name}_{i}"] = np.nan
-                        adata.var.loc[gene, f"{params_name}_{i}"] = p
+            samples = mixture_model.mcmc.get_samples()
+            param_data = {}
+            for param_name in ["gaussian_mean", "gaussian_std", "poisson_rate", "mix_probs"]:
+                if param_name in samples:
+                    param_value = samples[param_name].mean(axis=0)
+                    if param_value.ndim == 0:
+                        param_data[param_name] = param_value.item()
+                    else:
+                        for i, p in enumerate(param_value):
+                            param_data[f"{param_name}_{i}"] = p.item()
+            # Add all columns at once
+            for col_name, value in param_data.items():
+                if col_name not in adata.var.columns:
+                    adata.var[col_name] = np.nan
+                adata.var.loc[gene, col_name] = value
         # Assign guides to cells
         # Some cells might have multiple guides assigned

pertpy/preprocessing/_guide_rna_mixture.py CHANGED Viewed

@@ -8,7 +8,7 @@ import numpy as np
 from jax.random import PRNGKey
 from jax.scipy.special import logsumexp
 from numpyro import factor, plate, sample
-from numpyro.distributions import Dirichlet, Exponential, HalfNormal, Normal, Poisson
+from numpyro.distributions import Categorical, Dirichlet, Exponential, HalfNormal, Normal, Poisson
 from numpyro.infer import MCMC, NUTS
 ParamsDict = Mapping[str, jnp.ndarray]
@@ -102,8 +102,14 @@ class MixtureModel(ABC):
         with plate("data", data.shape[0]):
             log_likelihoods = self.log_likelihood(data, params)
-            log_mixture_likelihood = logsumexp(log_likelihoods, axis=-1)
-            sample("obs", Normal(log_mixture_likelihood, 1.0), obs=data)
+            mixture_probs = jnp.exp(log_likelihoods - logsumexp(log_likelihoods, axis=-1, keepdims=True))
+            z = sample("z", Categorical(mixture_probs), infer={"enumerate": "parallel"})
+            # Observe under selected component
+            poisson_ll = Poisson(params["poisson_rate"]).log_prob(data)
+            gaussian_ll = Normal(params["gaussian_mean"], params["gaussian_std"]).log_prob(data)
+            obs_ll = jnp.where(z == 0, poisson_ll, gaussian_ll)
+            factor("obs", obs_ll)
     def assignment(self, samples: ParamsDict, data: jnp.ndarray) -> np.ndarray:
         """Assign data points to mixture components.

pertpy/tools/__init__.py CHANGED Viewed

@@ -21,7 +21,6 @@ from pertpy.tools._perturbation_space._simple import (
     KMeansSpace,
     PseudobulkSpace,
 )
-from pertpy.tools._scgen import Scgen
 def __getattr__(name: str):
@@ -35,14 +34,25 @@ def __getattr__(name: str):
             raise ImportError(
                 "Extra dependencies required: toytree, ete4. Please install with: pip install toytree ete4"
             ) from None
     elif name in ["EdgeR", "PyDESeq2", "Statsmodels", "TTest", "WilcoxonTest"]:
         module = import_module("pertpy.tools._differential_gene_expression")
         return getattr(module, name)
+    elif name == "Scgen":
+        try:
+            module = import_module("pertpy.tools._scgen")
+            return module.Scgen
+        except ImportError:
+            raise ImportError(
+                "Scgen requires scvi-tools to be installed. Please install with: pip install scvi-tools"
+            ) from None
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+def __dir__():
+    return __all__
 __all__ = [
     "Augur",
     "Cinemaot",

pertpy/tools/_augur.py CHANGED Viewed

@@ -36,6 +36,7 @@ from statsmodels.api import OLS
 from statsmodels.stats.multitest import fdrcorrection
 from pertpy._doc import _doc_params, doc_common_plot_args
+from pertpy.tools.core import _is_raw_counts
 if TYPE_CHECKING:
     from matplotlib.axes import Axes
@@ -87,6 +88,7 @@ class Augur:
         self,
         input: AnnData | pd.DataFrame,
         *,
+        layer: str | None = None,
         meta: pd.DataFrame | None = None,
         label_col: str = "label_col",
         cell_type_col: str = "cell_type_col",
@@ -98,6 +100,7 @@ class Augur:
         Args:
             input: Anndata or matrix containing gene expression values (genes in rows, cells in columns)
                 and optionally meta data about each cell.
+            layer: Layer in AnnData to use for expression data. If None, uses .X
             meta: Optional Pandas DataFrame containing meta data about each cell.
             label_col: column of the meta DataFrame or the Anndata or matrix containing the condition labels for each cell
                 in the cell-by-gene expression matrix
@@ -114,11 +117,11 @@ class Augur:
             >>> import pertpy as pt
             >>> adata = pt.dt.sc_sim_augur()
             >>> ag_rfc = pt.tl.Augur("random_forest_classifier")
-            >>> loaded_data = ag_rfc.load(adata)
+            >>> augur_adata = ag_rfc.load(adata)
         """
         if isinstance(input, AnnData):
-            input.obs = input.obs.rename(columns={cell_type_col: "cell_type", label_col: "label"})
             adata = input
+            obs_renamed = adata.obs.rename(columns={cell_type_col: "cell_type", label_col: "label"})
         elif isinstance(input, pd.DataFrame):
             if meta is None:
@@ -130,27 +133,47 @@ class Augur:
             label = input[label_col] if meta is None else meta[label_col]
             cell_type = input[cell_type_col] if meta is None else meta[cell_type_col]
-            x = input.drop([label_col, cell_type_col], axis=1) if meta is None else input
-            adata = AnnData(X=x, obs=pd.DataFrame({"cell_type": cell_type, "label": label}))
+            X = input.drop([label_col, cell_type_col], axis=1) if meta is None else input
+            adata = AnnData(X=X, obs=pd.DataFrame({"cell_type": cell_type, "label": label}))
+            obs_renamed = adata.obs
-        if len(adata.obs["label"].unique()) < 2:
+        if len(obs_renamed["label"].unique()) < 2:
             raise ValueError("Less than two unique labels in dataset. At least two are needed for the analysis.")
+        if isinstance(input, AnnData):
+            final_adata = AnnData(X=adata.X, obs=obs_renamed, var=adata.var, layers=adata.layers)
+        else:
+            final_adata = adata
         # dummy variables for categorical data
-        if adata.obs["label"].dtype.name == "category":
-            # filter samples according to label
+        if final_adata.obs["label"].dtype.name == "category":
+            label_encoder = LabelEncoder()
+            final_adata.obs["y_"] = label_encoder.fit_transform(final_adata.obs["label"])
             if condition_label is not None and treatment_label is not None:
                 logger.info(f"Filtering samples with {condition_label} and {treatment_label} labels.")
-                adata = ad.concat(
-                    [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
+                final_adata = ad.concat(
+                    [
+                        final_adata[final_adata.obs["label"] == condition_label],
+                        final_adata[final_adata.obs["label"] == treatment_label],
+                    ]
                 )
-            label_encoder = LabelEncoder()
-            adata.obs["y_"] = label_encoder.fit_transform(adata.obs["label"])
         else:
-            y = adata.obs["label"].to_frame()
+            y = final_adata.obs["label"].to_frame()
             y = y.rename(columns={"label": "y_"})
-            adata.obs = pd.concat([adata.obs, y], axis=1)
+            final_adata.obs = pd.concat([final_adata.obs, y], axis=1)
-        return adata
+        if layer is not None:
+            if layer not in final_adata.layers:
+                raise ValueError(f"Layer '{layer}' not found in AnnData object")
+            X = final_adata.layers[layer]
+        else:
+            X = final_adata.X
+        if not _is_raw_counts(X):
+            logger.warning("Data does not appear to be raw counts. Augur developers recommend using raw counts.")
+        return final_adata
     def create_estimator(
         self,

pertpy/tools/_coda/_sccoda.py CHANGED Viewed

@@ -11,7 +11,6 @@ from jax import config, random
 from lamin_utils import logger
 from mudata import MuData
 from numpyro.infer import Predictive
-from rich import print
 from pertpy.tools._coda._base_coda import CompositionalModel2, from_scanpy
@@ -25,24 +24,6 @@ config.update("jax_enable_x64", True)
 class Sccoda(CompositionalModel2):
     r"""Statistical model for single-cell differential composition analysis with specification of a reference cell type.
-    This is the standard scCODA model and recommended for all uses.
-    The hierarchical formulation of the model for one sample is:
-    .. math::
-         y|x &\\sim DirMult(\\phi, \\bar{y}) \\\\
-         \\log(\\phi) &= \\alpha + x \\beta \\\\
-         \\alpha_k &\\sim N(0, 5) \\quad &\\forall k \\in [K] \\\\
-         \\beta_{m, \\hat{k}} &= 0 &\\forall m \\in [M]\\\\
-         \\beta_{m, k} &= \\tau_{m, k} \\tilde{\\beta}_{m, k} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\tau_{m, k} &= \\frac{\\exp(t_{m, k})}{1+ \\exp(t_{m, k})} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\frac{t_{m, k}}{50} &\\sim N(0, 1) \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\tilde{\\beta}_{m, k} &= \\sigma_m^2 \\cdot \\gamma_{m, k} \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-         \\sigma_m^2 &\\sim HC(0, 1) \\quad &\\forall m \\in [M] \\\\
-         \\gamma_{m, k} &\\sim N(0,1) \\quad &\\forall m \\in [M], k \\in \\{[K] \\smallsetminus \\hat{k}\\} \\\\
-    with y being the cell counts and x the covariates.
     For further information, see `scCODA is a Bayesian model for compositional single-cell data analysis`
     (Büttner, Ostner et al., NatComms, 2021)
     """
@@ -303,7 +284,7 @@ class Sccoda(CompositionalModel2):
         self,
         data: AnnData | MuData,
         modality_key: str = "coda",
-        rng_key=None,
+        rng_key: int | None = None,
         num_prior_samples: int = 500,
         use_posterior_predictive: bool = True,
     ) -> az.InferenceData:
@@ -381,34 +362,9 @@ class Sccoda(CompositionalModel2):
         if rng_key is None:
             rng = np.random.default_rng()
             rng_key = random.key(rng.integers(0, 10000))
-        if use_posterior_predictive:
-            posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
-                rng_key,
-                counts=None,
-                covariates=numpyro_covariates,
-                n_total=numpyro_n_total,
-                ref_index=ref_index,
-                sample_adata=sample_adata,
-            )
-        else:
-            posterior_predictive = None
-        if num_prior_samples > 0:
-            prior = Predictive(self.model, num_samples=num_prior_samples)(
-                rng_key,
-                counts=None,
-                covariates=numpyro_covariates,
-                n_total=numpyro_n_total,
-                ref_index=ref_index,
-                sample_adata=sample_adata,
-            )
         else:
-            prior = None
+            rng_key = random.key(rng_key)
-        import arviz as az
-        # Create arviz object
         if use_posterior_predictive:
             posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
                 rng_key,
@@ -451,6 +407,9 @@ class Sccoda(CompositionalModel2):
         else:
             prior = None
+        import arviz as az
+        # Create arviz object
         arviz_data = az.from_numpyro(
             self.mcmc, prior=prior, posterior_predictive=posterior_predictive, dims=dims, coords=coords
         )
@@ -468,76 +427,84 @@ class Sccoda(CompositionalModel2):
         *args,
         **kwargs,
     ):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
+        """  # noqa: D205, D212
         return super().run_nuts(data, modality_key, num_samples, num_warmup, rng_key, copy, *args, **kwargs)
     run_nuts.__doc__ = CompositionalModel2.run_nuts.__doc__ + run_nuts.__doc__
     def credible_effects(self, data: AnnData | MuData, modality_key: str = "coda", est_fdr: float = None) -> pd.Series:
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> credible_effects = sccoda.credible_effects(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> credible_effects = sccoda.credible_effects(mdata).
+        """  # noqa: D205, D212
         return super().credible_effects(data, modality_key, est_fdr)
     credible_effects.__doc__ = CompositionalModel2.credible_effects.__doc__ + credible_effects.__doc__
     def summary(self, data: AnnData | MuData, extended: bool = False, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> sccoda.summary(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.summary(mdata).
+        """  # noqa: D205, D212
         return super().summary(data, extended, modality_key, *args, **kwargs)
     summary.__doc__ = CompositionalModel2.summary.__doc__ + summary.__doc__
     def set_fdr(self, data: AnnData | MuData, est_fdr: float, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> sccoda.set_fdr(mdata, est_fdr=0.4).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.set_fdr(mdata, est_fdr=0.4).
+        """  # noqa: D205, D212
         return super().set_fdr(data, est_fdr, modality_key, *args, **kwargs)
     set_fdr.__doc__ = CompositionalModel2.set_fdr.__doc__ + set_fdr.__doc__

pertpy 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

pertpy 1.0.1py3-none-any.whl → 1.0.3py3-none-any.whl