PyPI - pertpy - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

pertpy 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

pertpy/__init__.py +1 -1
pertpy/data/_dataloader.py +61 -58
pertpy/metadata/_cell_line.py +9 -3
pertpy/tools/__init__.py +18 -27
pertpy/tools/_coda/_base_coda.py +10 -4
pertpy/tools/_coda/_sccoda.py +84 -56
pertpy/tools/_coda/_tasccoda.py +91 -61
pertpy/tools/_dialogue.py +3 -3
pertpy/tools/_differential_gene_expression/__init__.py +45 -4
pertpy/tools/_differential_gene_expression/_base.py +2 -1
pertpy/tools/_differential_gene_expression/_edger.py +9 -12
pertpy/tools/_differential_gene_expression/_pydeseq2.py +0 -2
pertpy/tools/_distances/_distance_tests.py +2 -2
pertpy/tools/_distances/_distances.py +33 -8
pertpy/tools/_milo.py +3 -1
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +16 -25
pertpy/tools/_perturbation_space/_simple.py +8 -0
{pertpy-1.0.0.dist-info → pertpy-1.0.2.dist-info}/METADATA +51 -72
{pertpy-1.0.0.dist-info → pertpy-1.0.2.dist-info}/RECORD +21 -21
{pertpy-1.0.0.dist-info → pertpy-1.0.2.dist-info}/WHEEL +1 -1
{pertpy-1.0.0.dist-info → pertpy-1.0.2.dist-info}/licenses/LICENSE +0 -0

pertpy/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 __author__ = "Lukas Heumos"
 __email__ = "lukas.heumos@posteo.net"
-__version__ = "1.0.0"
+__version__ = "1.0.2"
 import warnings

pertpy/data/_dataloader.py CHANGED Viewed

@@ -49,66 +49,69 @@ def _download(  # pragma: no cover
     Path(output_path).mkdir(parents=True, exist_ok=True)
     lock_path = Path(output_path) / f"{output_file_name}.lock"
-    with FileLock(lock_path, timeout=300):
-        if Path(download_to_path).exists() and not overwrite:
-            logger.warning(f"File {download_to_path} already exists!")
-            return download_to_path
-        temp_file_name = Path(f"{download_to_path}.part")
-        retry_count = 0
-        while retry_count <= max_retries:
-            try:
-                head_response = requests.head(url, timeout=timeout)
-                head_response.raise_for_status()
-                content_length = int(head_response.headers.get("content-length", 0))
-                free_space = shutil.disk_usage(output_path).free
-                if content_length > free_space:
-                    raise OSError(
-                        f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
-                    )
-                response = requests.get(url, stream=True)
-                response.raise_for_status()
-                total = int(response.headers.get("content-length", 0))
-                with Progress(refresh_per_second=5) as progress:
-                    task = progress.add_task("[red]Downloading...", total=total)
-                    with Path(temp_file_name).open("wb") as file:
-                        for data in response.iter_content(block_size):
-                            file.write(data)
-                            progress.update(task, advance=len(data))
-                        progress.update(task, completed=total, refresh=True)
-                Path(temp_file_name).replace(download_to_path)
-                if is_zip:
-                    with ZipFile(download_to_path, "r") as zip_obj:
-                        zip_obj.extractall(path=output_path)
-                    return Path(output_path)
+    try:
+        with FileLock(lock_path, timeout=300):
+            if Path(download_to_path).exists() and not overwrite:
+                logger.warning(f"File {download_to_path} already exists!")
                 return download_to_path
-            except (OSError, RequestException) as e:
-                retry_count += 1
-                if retry_count <= max_retries:
-                    logger.warning(
-                        f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
-                    )
-                    time.sleep(retry_delay)
-                else:
-                    logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
+            temp_file_name = Path(f"{download_to_path}.part")
+            retry_count = 0
+            while retry_count <= max_retries:
+                try:
+                    head_response = requests.head(url, timeout=timeout)
+                    head_response.raise_for_status()
+                    content_length = int(head_response.headers.get("content-length", 0))
+                    free_space = shutil.disk_usage(output_path).free
+                    if content_length > free_space:
+                        raise OSError(
+                            f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
+                        )
+                    response = requests.get(url, stream=True)
+                    response.raise_for_status()
+                    total = int(response.headers.get("content-length", 0))
+                    with Progress(refresh_per_second=5) as progress:
+                        task = progress.add_task("[red]Downloading...", total=total)
+                        with Path(temp_file_name).open("wb") as file:
+                            for data in response.iter_content(block_size):
+                                file.write(data)
+                                progress.update(task, advance=len(data))
+                            progress.update(task, completed=total, refresh=True)
+                    Path(temp_file_name).replace(download_to_path)
+                    if is_zip:
+                        with ZipFile(download_to_path, "r") as zip_obj:
+                            zip_obj.extractall(path=output_path)
+                        return Path(output_path)
+                    return download_to_path
+                except (OSError, RequestException) as e:
+                    retry_count += 1
+                    if retry_count <= max_retries:
+                        logger.warning(
+                            f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
+                        )
+                        time.sleep(retry_delay)
+                    else:
+                        logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
+                        if Path(temp_file_name).exists():
+                            Path(temp_file_name).unlink(missing_ok=True)
+                        raise
+                except Exception as e:
+                    logger.error(f"Download failed: {str(e)}")
                     if Path(temp_file_name).exists():
                         Path(temp_file_name).unlink(missing_ok=True)
                     raise
+                finally:
+                    if Path(temp_file_name).exists():
+                        Path(temp_file_name).unlink(missing_ok=True)
+    finally:
+        lock_path.unlink(missing_ok=True)
-            except Exception as e:
-                logger.error(f"Download failed: {str(e)}")
-                if Path(temp_file_name).exists():
-                    Path(temp_file_name).unlink(missing_ok=True)
-                raise
-            finally:
-                if Path(temp_file_name).exists():
-                    Path(temp_file_name).unlink(missing_ok=True)
-        return Path(download_to_path)
+    return Path(download_to_path)

pertpy/metadata/_cell_line.py CHANGED Viewed

@@ -195,7 +195,9 @@ class CellLine(MetaData):
                 block_size=4096,
                 is_zip=False,
             )
-        df = pd.read_csv(drug_response_prism_file_path, index_col=0)[["depmap_id", "name", "ic50", "ec50", "auc"]]
+        df = pd.read_csv(
+            drug_response_prism_file_path, index_col=0, usecols=["broad_id", "depmap_id", "name", "ic50", "ec50", "auc"]
+        )
         df = df.dropna(subset=["depmap_id", "name"])
         df = df.groupby(["depmap_id", "name"]).mean().reset_index()
         self.drug_response_prism = df
@@ -568,7 +570,9 @@ class CellLine(MetaData):
             verbosity=verbosity,
         )
-        old_index_name = "index" if adata.obs.index.name is None else adata.obs.index.name
+        if adata.obs.index.name is None:
+            adata.obs.index.name = "original_index"
+        old_index_name = adata.obs.index.name
         adata.obs = (
             adata.obs.reset_index()
             .set_index([query_id, query_perturbation])
@@ -635,7 +639,9 @@ class CellLine(MetaData):
             verbosity=verbosity,
         )
-        old_index_name = "index" if adata.obs.index.name is None else adata.obs.index.name
+        if adata.obs.index.name is None:
+            adata.obs.index.name = "original_index"
+        old_index_name = adata.obs.index.name
         adata.obs = (
             adata.obs.reset_index()
             .set_index([query_id, "perturbation_lower"])

pertpy/tools/__init__.py CHANGED Viewed

@@ -1,24 +1,5 @@
 from importlib import import_module
-def lazy_import(module_path: str, class_name: str, extras: list[str]):
-    try:
-        for extra in extras:
-            import_module(extra)
-        module = import_module(module_path)
-        return getattr(module, class_name)
-    except ImportError:
-        class Placeholder:
-            def __init__(self, *args, **kwargs):
-                raise ImportError(
-                    f"Extra dependencies required: {', '.join(extras)}. "
-                    f"Please install with: pip install {' '.join(extras)}"
-                )
-        return Placeholder
 from pertpy.tools._augur import Augur
 from pertpy.tools._cinemaot import Cinemaot
 from pertpy.tools._coda._sccoda import Sccoda
@@ -42,15 +23,25 @@ from pertpy.tools._perturbation_space._simple import (
 )
 from pertpy.tools._scgen import Scgen
-CODA_EXTRAS = ["toytree", "ete4"]  # also "pyqt6" but it cannot be imported
-Tasccoda = lazy_import("pertpy.tools._coda._tasccoda", "Tasccoda", CODA_EXTRAS)
-DE_EXTRAS = ["formulaic", "pydeseq2"]
-EdgeR = lazy_import("pertpy.tools._differential_gene_expression", "EdgeR", DE_EXTRAS)  # edgeR will be imported via rpy2
-PyDESeq2 = lazy_import("pertpy.tools._differential_gene_expression", "PyDESeq2", DE_EXTRAS)
-Statsmodels = lazy_import("pertpy.tools._differential_gene_expression", "Statsmodels", DE_EXTRAS + ["statsmodels"])
-TTest = lazy_import("pertpy.tools._differential_gene_expression", "TTest", DE_EXTRAS)
-WilcoxonTest = lazy_import("pertpy.tools._differential_gene_expression", "WilcoxonTest", DE_EXTRAS)
+def __getattr__(name: str):
+    if name == "Tasccoda":
+        try:
+            for extra in ["toytree", "ete4"]:
+                import_module(extra)
+            module = import_module("pertpy.tools._coda._tasccoda")
+            return module.Tasccoda
+        except ImportError:
+            raise ImportError(
+                "Extra dependencies required: toytree, ete4. Please install with: pip install toytree ete4"
+            ) from None
+    elif name in ["EdgeR", "PyDESeq2", "Statsmodels", "TTest", "WilcoxonTest"]:
+        module = import_module("pertpy.tools._differential_gene_expression")
+        return getattr(module, name)
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 __all__ = [
     "Augur",

pertpy/tools/_coda/_base_coda.py CHANGED Viewed

@@ -1181,7 +1181,7 @@ class CompositionalModel2(ABC):
                 r,
                 bars,
                 bottom=cum_bars,
-                color=palette(n % palette.N),
+                color=palette(n % palette.N),  # type: ignore
                 width=barwidth,
                 label=type_names[n],
                 linewidth=0,
@@ -1377,6 +1377,7 @@ class CompositionalModel2(ABC):
         plot_df.columns = covariate_names
         plot_df = pd.melt(plot_df, ignore_index=False, var_name="Covariate")
+        plot_df.index.name = "Cell Type"
         plot_df = plot_df.reset_index()
         if len(covariate_names_zero) != 0 and plot_facets and plot_zero_covariate and not plot_zero_cell_type:
@@ -1472,6 +1473,7 @@ class CompositionalModel2(ABC):
         if return_fig and not plot_facets:
             return plt.gcf()
         plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1823,6 +1825,7 @@ class CompositionalModel2(ABC):
         if return_fig:
             return plt.gcf()
         plt.show()
         return None
     @_doc_params(common_plot_args=doc_common_plot_args)
@@ -1881,7 +1884,7 @@ class CompositionalModel2(ABC):
             from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
         except ImportError:
             raise ImportError(
-                "To use tasccoda please install additional dependencies with `pip install pertpy[coda]`"
+                "To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
             ) from None
         if isinstance(data, MuData):
@@ -1902,8 +1905,8 @@ class CompositionalModel2(ABC):
             tree.render(save, tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
         if return_fig:
             return tree, tree_style
         return tree.render("%%inline", tree_style=tree_style, units=units, w=figsize[0], h=figsize[1], dpi=dpi)  # type: ignore
-        return None
     @_doc_params(common_plot_args=doc_common_plot_args)
     def plot_draw_effects(  # pragma: no cover # noqa: D417
@@ -1969,7 +1972,7 @@ class CompositionalModel2(ABC):
             from ete4.treeview import CircleFace, NodeStyle, TextFace, TreeStyle, faces
         except ImportError:
             raise ImportError(
-                "To use tasccoda please install additional dependencies as `pip install pertpy[coda]`"
+                "To use tasccoda please install additional dependencies: `pip install pertpy[coda]`"
             ) from None
         if isinstance(data, MuData):
@@ -2207,6 +2210,7 @@ class CompositionalModel2(ABC):
         if return_fig:
             return fig
         plt.show()
         return None
@@ -2325,6 +2329,7 @@ def df2newick(df: pd.DataFrame, levels: list[str], inner_label: bool = True) ->
     strs = [traverse(df_tax, a, 0, inner_label) for a in alevel]
     newick = f"({','.join(strs)});"
     return newick
@@ -2562,6 +2567,7 @@ def from_scanpy(
     covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
     if isinstance(sample_identifier, list):
+        adata.obs = adata.obs.copy()
         adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
         sample_identifier = "scCODA_sample_id"

pertpy/tools/_coda/_sccoda.py CHANGED Viewed

@@ -303,7 +303,7 @@ class Sccoda(CompositionalModel2):
         self,
         data: AnnData | MuData,
         modality_key: str = "coda",
-        rng_key=None,
+        rng_key: int | None = None,
         num_prior_samples: int = 500,
         use_posterior_predictive: bool = True,
     ) -> az.InferenceData:
@@ -381,6 +381,8 @@ class Sccoda(CompositionalModel2):
         if rng_key is None:
             rng = np.random.default_rng()
             rng_key = random.key(rng.integers(0, 10000))
+        else:
+            rng_key = random.key(rng_key)
         if use_posterior_predictive:
             posterior_predictive = Predictive(self.model, self.mcmc.get_samples())(
@@ -391,6 +393,15 @@ class Sccoda(CompositionalModel2):
                 ref_index=ref_index,
                 sample_adata=sample_adata,
             )
+            # Remove problematic posterior predictive arrays with wrong dimensions
+            if posterior_predictive and "counts" in posterior_predictive:
+                counts_shape = posterior_predictive["counts"].shape
+                expected_dims = 2  # ['sample', 'cell_type']
+                if len(counts_shape) != expected_dims:
+                    posterior_predictive = {k: v for k, v in posterior_predictive.items() if k != "counts"}
+                    logger.warning(
+                        f"Removed 'counts' from posterior_predictive due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
+                    )
         else:
             posterior_predictive = None
@@ -403,6 +414,15 @@ class Sccoda(CompositionalModel2):
                 ref_index=ref_index,
                 sample_adata=sample_adata,
             )
+            # Remove problematic prior arrays with wrong dimensions
+            if prior and "counts" in prior:
+                counts_shape = prior["counts"].shape
+                expected_dims = 2  # ['sample', 'cell_type']
+                if len(counts_shape) != expected_dims:
+                    prior = {k: v for k, v in prior.items() if k != "counts"}
+                    logger.warning(
+                        f"Removed 'counts' from prior due to dimension mismatch: got {len(counts_shape)}D, expected {expected_dims}D"
+                    )
         else:
             prior = None
@@ -426,76 +446,84 @@ class Sccoda(CompositionalModel2):
         *args,
         **kwargs,
     ):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42).
+        """  # noqa: D205, D212
         return super().run_nuts(data, modality_key, num_samples, num_warmup, rng_key, copy, *args, **kwargs)
     run_nuts.__doc__ = CompositionalModel2.run_nuts.__doc__ + run_nuts.__doc__
     def credible_effects(self, data: AnnData | MuData, modality_key: str = "coda", est_fdr: float = None) -> pd.Series:
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> credible_effects = sccoda.credible_effects(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> credible_effects = sccoda.credible_effects(mdata).
+        """  # noqa: D205, D212
         return super().credible_effects(data, modality_key, est_fdr)
     credible_effects.__doc__ = CompositionalModel2.credible_effects.__doc__ + credible_effects.__doc__
     def summary(self, data: AnnData | MuData, extended: bool = False, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> sccoda.summary(mdata).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.summary(mdata).
+        """  # noqa: D205, D212
         return super().summary(data, extended, modality_key, *args, **kwargs)
     summary.__doc__ = CompositionalModel2.summary.__doc__ + summary.__doc__
     def set_fdr(self, data: AnnData | MuData, est_fdr: float, modality_key: str = "coda", *args, **kwargs):
-        """Examples:
-        >>> import pertpy as pt
-        >>> haber_cells = pt.dt.haber_2017_regions()
-        >>> sccoda = pt.tl.Sccoda()
-        >>> mdata = sccoda.load(haber_cells,
-        >>>                     type="cell_level",
-        >>>                     generate_sample_level=True,
-        >>>                     cell_type_identifier="cell_label",
-        >>>                     sample_identifier="batch",
-        >>>                     covariate_obs=["condition"])
-        >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
-        >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
-        >>> sccoda.set_fdr(mdata, est_fdr=0.4).
-        """  # noqa: D205
+        """
+        Examples:
+            >>> import pertpy as pt
+            >>> haber_cells = pt.dt.haber_2017_regions()
+            >>> sccoda = pt.tl.Sccoda()
+            >>> mdata = sccoda.load(haber_cells,
+            >>>                     type="cell_level",
+            >>>                     generate_sample_level=True,
+            >>>                     cell_type_identifier="cell_label",
+            >>>                     sample_identifier="batch",
+            >>>                     covariate_obs=["condition"])
+            >>> mdata = sccoda.prepare(mdata, formula="condition", reference_cell_type="Endocrine")
+            >>> sccoda.run_nuts(mdata, num_warmup=100, num_samples=1000, rng_key=42)
+            >>> sccoda.set_fdr(mdata, est_fdr=0.4).
+        """  # noqa: D205, D212
         return super().set_fdr(data, est_fdr, modality_key, *args, **kwargs)
     set_fdr.__doc__ = CompositionalModel2.set_fdr.__doc__ + set_fdr.__doc__

pertpy 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

pertpy 1.0.0py3-none-any.whl → 1.0.2py3-none-any.whl