PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +6 -7
pertpy/tools/__init__.py +67 -6
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +77 -54
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
pertpy-0.9.1.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0

pertpy/metadata/_look_up.py CHANGED Viewed

@@ -3,11 +3,11 @@ from __future__ import annotations
 from collections import namedtuple
 from typing import TYPE_CHECKING, Literal
+from lamin_utils import logger
 if TYPE_CHECKING:
     from collections.abc import Sequence
-from rich import print
 if TYPE_CHECKING:
     import pandas as pd
@@ -24,10 +24,9 @@ class LookUp:
     ):
         """
         Args:
-            type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug. Defaults to cell_line.
+            type: Metadata type for annotation. One of 'cell_line', 'compound', 'moa' or 'drug.
             transfer_metadata: DataFrames used to generate Lookup object.
                                This is currently set to None for CompoundMetaData which does not require any dataframes for transfer.
-                               Defaults to 'cell_line'.
         """
         self.type = type
         if type == "cell_line":
@@ -285,12 +284,11 @@ class LookUp:
         """A brief summary of cell line metadata.
         Args:
-            cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene. Defaults to "DepMap".
+            cell_line_source: the source of cell line annotation, DepMap or Cancerrxgene.
             reference_id: The type of cell line identifier in the meta data, e.g. ModelID, CellLineName	or StrippedCellLineName.
-                If fetch cell line metadata from Cancerrxgene, it is recommended to choose
-                "stripped_cell_line_name". Defaults to "ModelID".
+                If fetch cell line metadata from Cancerrxgene, it is recommended to choose "stripped_cell_line_name".
             query_id_list: Unique cell line identifiers to test the number of matched ids present in the
-                metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
+                metadata. If set to None, the query of metadata identifiers will be disabled.
         """
         if self.type != "cell_line":
             raise ValueError("This is not a LookUp object specifically for CellLineMetaData!")
@@ -313,8 +311,8 @@ class LookUp:
                     )
                 not_matched_identifiers = list(set(query_id_list) - set(self.cl_cancer_project_meta[reference_id]))
-            print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
+            logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
     def available_bulk_rna(
         self,
@@ -324,9 +322,9 @@ class LookUp:
         """A brief summary of bulk RNA expression data.
         Args:
-            cell_line_source: the source of RNA-seq data, broad or sanger. Defaults to "sanger".
+            cell_line_source: the source of RNA-seq data, broad or sanger.
             query_id_list: Unique cell line identifiers to test the number of matched ids present in the
-                metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
+                metadata. If set to None, the query of metadata identifiers will be disabled.
         """
         if self.type != "cell_line":
             raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -340,8 +338,8 @@ class LookUp:
             identifier_num_all = len(query_id_list)
             not_matched_identifiers = list(set(query_id_list) - set(bulk_rna.index))
-            print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
+            logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
     def available_protein_expression(
         self,
@@ -352,9 +350,8 @@ class LookUp:
         Args:
             reference_id: The type of cell line identifier in the meta data, model_name or model_id.
-                Defaults to "model_name".
             query_id_list: Unique cell line identifiers to test the number of matched ids present in the
-                metadata. If set to None, the query of metadata identifiers will be disabled. Defaults to None.
+                metadata. If set to None, the query of metadata identifiers will be disabled.
         """
         if self.type != "cell_line":
             raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -367,8 +364,8 @@ class LookUp:
                     f"The specified `reference_id` {reference_id} is not available in the proteomics data. "
                 )
             not_matched_identifiers = list(set(query_id_list) - set(self.proteomics_data[reference_id]))
-            print(f"[bold blue]{len(not_matched_identifiers)} cell lines are not found in the metadata.")
-            print(f"[bold yellow]{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
+            logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
     def available_drug_response(
         self,
@@ -381,20 +378,16 @@ class LookUp:
         """A brief summary of drug response data.
         Args:
-            gdsc_dataset: The GDSC dataset, 1 or 2. Defaults to 1.
+            gdsc_dataset: The GDSC dataset, 1 or 2.
                           The GDSC1 dataset updates previous releases with additional drug screening data from the Wellcome Sanger Institute and Massachusetts General Hospital.
                           It covers 970 Cell lines and 403 Compounds with 333292 IC50s.
                           GDSC2 is new and has 243,466 IC50 results from the latest screening at the Wellcome Sanger Institute using improved experimental procedures.
             reference_id: The type of cell line identifier in the meta data, cell_line_name, sanger_model_id or cosmic_id.
-                          Defaults to 'cell_line_name'.
             query_id_list: Unique cell line identifiers to test the number of matched ids present in the metadata.
                            If set to None, the query of metadata identifiers will be disabled.
-                           Defaults to None.
             reference_perturbation: The perturbation information in the meta data, drug_name or drug_id.
-                                    Defaults to 'drug_name'.
             query_perturbation_list: Unique perturbation types to test the number of matched ones present in the metadata.
                                      If set to None, the query of perturbation types will be disabled.
-                                     Defaults to None.
         """
         if self.type != "cell_line":
             raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
@@ -410,8 +403,8 @@ class LookUp:
                 )
             identifier_num_all = len(query_id_list)
             not_matched_identifiers = list(set(query_id_list) - set(gdsc_data[reference_id]))
-            print(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
+            logger.info(f"{len(not_matched_identifiers)} cell lines are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} cell lines are found! ")
         if query_perturbation_list is not None:
             if reference_perturbation not in gdsc_data.columns:
@@ -420,8 +413,8 @@ class LookUp:
                 )
             identifier_num_all = len(query_perturbation_list)
             not_matched_identifiers = list(set(query_perturbation_list) - set(gdsc_data[reference_perturbation]))
-            print(f"{len(not_matched_identifiers)} perturbation types are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} perturbation types are found! ")
+            logger.info(f"{len(not_matched_identifiers)} perturbation types are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} perturbation types are found! ")
     def available_genes_annotation(
         self,
@@ -432,22 +425,20 @@ class LookUp:
         Args:
             reference_id: The type of gene identifier in the meta data, gene_id, ensembl_gene_id, hgnc_id, hgnc_symbol.
-                          Defaults to "ensembl_gene_id".
             query_id_list: Unique gene identifiers to test the number of matched ids present in the metadata.
-                           Defaults to None.
         """
         if self.type != "cell_line":
             raise ValueError("This is not a LookUp object specific for CellLineMetaData!")
-        print("To summarize: in the DepMap_Sanger gene annotation file, you can find: ")
-        print(f"{len(self.gene_annotation.index)} driver genes")
-        print(
+        logger.info("To summarize: in the DepMap_Sanger gene annotation file, you can find: ")
+        logger.info(f"{len(self.gene_annotation.index)} driver genes")
+        logger.info(
             f"{len(self.gene_annotation.columns)} meta data including: ",
             *list(self.gene_annotation.columns.values),
             sep="\n- ",
         )
-        print("Overview of gene annotation: ")
-        print(self.gene_annotation.head().to_string())
+        logger.info("Overview of gene annotation: ")
+        logger.info(self.gene_annotation.head().to_string())
         """
         #not implemented yet
         print("Default parameters to annotate gene annotation: ")
@@ -472,26 +463,24 @@ class LookUp:
         Args:
             query_id_list: Unique perturbagens to test the number of matched ones present in the metadata.
                            If set to None, the query of metadata perturbagens will be disabled.
-                           Defaults to None.
             target_list: Unique molecular targets to test the number of matched ones present in the metadata.
                          If set to None, the comparison of molecular targets in the query of metadata perturbagens will be disabled.
-                         Defaults to None.
         """
-        if self.type != "moa":
-            raise ValueError("This is not a LookUp object specific for MoaMetaData!")
         if query_id_list is not None:
+            if self.type != "moa":
+                raise ValueError("This is not a LookUp object specific for MoaMetaData!")
             identifier_num_all = len(query_id_list)
             not_matched_identifiers = list(set(query_id_list) - set(self.moa_meta.pert_iname))
-            print(f"{len(not_matched_identifiers)} perturbagens are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} perturbagens are found! ")
+            logger.info(f"{len(not_matched_identifiers)} perturbagens are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} perturbagens are found! ")
         if target_list is not None:
             targets = self.moa_meta.target.astype(str).apply(lambda x: x.split("|"))
             all_targets = [t for tl in targets for t in tl]
             identifier_num_all = len(target_list)
             not_matched_identifiers = list(set(target_list) - set(all_targets))
-            print(f"{len(not_matched_identifiers)} molecular targets are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} molecular targets are found! ")
+            logger.info(f"{len(not_matched_identifiers)} molecular targets are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} molecular targets are found! ")
     def available_compounds(
         self,
@@ -503,8 +492,7 @@ class LookUp:
         Args:
             query_id_list: Unique compounds to test the number of matched ones present in the metadata.
                         If set to None, query of compound identifiers will be disabled.
-                        Defaults to None.
-            query_id_type: The type of compound identifiers, name or cid. Defaults to 'name'.
+            query_id_type: The type of compound identifiers, name or cid.
         """
         if self.type != "compound":
             raise ValueError("This is not a LookUp object specific for CompoundData!")
@@ -523,8 +511,8 @@ class LookUp:
                     except pcp.BadRequestError:
                         not_matched_identifiers.append(compound)
-            print(f"{len(not_matched_identifiers)} compounds are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} compounds are found! ")
+            logger.info(f"{len(not_matched_identifiers)} compounds are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} compounds are found! ")
     def available_drug_annotation(
         self,
@@ -535,11 +523,10 @@ class LookUp:
         """A brief summary of drug annotation.
         Args:
-            drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb. Defaults to "chembl".
+            drug_annotation_source: the source of drug annotation data, chembl, dgidb or pharmgkb.
             query_id_list: Unique target or compound names to test the number of matched ones present in the metadata.
                         If set to None, query of compound identifiers will be disabled.
-                        Defaults to None.
-            query_id_type: The type of identifiers, target, compound and disease(pharmgkb only). Defaults to 'target'.
+            query_id_type: The type of identifiers, target, compound and disease(pharmgkb only).
         """
         if self.type != "drug":
             raise ValueError("This is not a LookUp object specific for DrugMetaData!")
@@ -578,5 +565,5 @@ class LookUp:
                     diseases = self.pharmgkb[self.pharmgkb["Type"] == "Disease"]
                     not_matched_identifiers = list(set(query_id_list) - set(diseases["Compound|Disease"]))
-            print(f"{len(not_matched_identifiers)} {query_id_type}s are not found in the metadata.")
-            print(f"{identifier_num_all - len(not_matched_identifiers)} {query_id_type}s are found! ")
+            logger.info(f"{len(not_matched_identifiers)} {query_id_type}s are not found in the metadata.")
+            logger.info(f"{identifier_num_all - len(not_matched_identifiers)} {query_id_type}s are found! ")

pertpy/metadata/_metadata.py CHANGED Viewed

@@ -2,6 +2,8 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, Literal
+from lamin_utils import logger
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -31,12 +33,10 @@ class MetaData:
             total_identifiers: The total number of identifiers in the `adata` object.
             unmatched_identifiers: Unmatched identifiers in the `adata` object.
             query_id: The column of `.obs` with cell line information.
-            reference_id: The type of cell line identifier in the meta data.
+            reference_id: The type of cell line identifier in the metadata.
             metadata_type: The type of metadata where some identifiers are not matched during annotation such as
                            cell line, protein expression, bulk RNA expression, drug response, moa or compound.
-                           Defaults to 'cell line'.
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
-                       Defaults to 5.
         """
         if isinstance(verbosity, str):
             if verbosity != "all":
@@ -60,14 +60,11 @@ class MetaData:
         if isinstance(verbosity, int) and verbosity >= 0:
             verbosity = min(verbosity, len(unmatched_identifiers))
             if verbosity > 0:
-                print(
-                    f"[bold blue]There are {total_identifiers} identifiers in `adata.obs`."
+                logger.info(
+                    f"There are {total_identifiers} identifiers in `adata.obs`."
                     f"However, {len(unmatched_identifiers)} identifiers can't be found in the {metadata_type} annotation,"
-                    "leading to the presence of NA values for their respective metadata.\n",
-                    "Please check again: ",
-                    *unmatched_identifiers[:verbosity],
-                    "...",
-                    sep="\n- ",
+                    "leading to the presence of NA values for their respective metadata.\n"
+                    f"Please check again: *unmatched_identifiers[:verbosity]..."
                 )
         else:
             raise ValueError("Only 'all' or a non-negative value is accepted.")

pertpy/metadata/_moa.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import TYPE_CHECKING
 import numpy as np
 import pandas as pd
-from rich import print
 from scanpy import settings
 from pertpy.data._dataloader import _download
@@ -26,7 +25,6 @@ class Moa(MetaData):
     def _download_clue(self) -> None:
         clue_path = Path(settings.cachedir) / "repurposing_drugs_20200324.txt"
         if not Path(clue_path).exists():
-            print("[bold yellow]No metadata file was found for clue. Starting download now.")
             _download(
                 url="https://s3.amazonaws.com/data.clue.io/repurposing/downloads/repurposing_drugs_20200324.txt",
                 output_file_name="repurposing_drugs_20200324.txt",
@@ -51,12 +49,10 @@ class Moa(MetaData):
         Args:
             adata: The data object to annotate.
-            query_id: The column of `.obs` with the name of a perturbagen. Defaults to 'perturbation'.
+            query_id: The column of `.obs` with the name of a perturbagen.
             target: The column of `.obs` with target information. If set to None, all MoAs are retrieved without comparing molecular targets.
-                    Defaults to None.
             verbosity: The number of unmatched identifiers to print, can be either non-negative values or 'all'.
-                       Defaults to 5.
-            copy: Determines whether a copy of the `adata` is returned. Defaults to False.
+            copy: Determines whether a copy of the `adata` is returned.
         Returns:
             Returns an AnnData object with MoA annotation.

pertpy/plot/__init__.py CHANGED Viewed

@@ -1,5 +0,0 @@
-from pertpy.plot._augur import AugurpyPlot as ag
-from pertpy.plot._coda import CodaPlot as coda
-from pertpy.plot._guide_rna import GuideRnaPlot as guide
-from pertpy.plot._milopy import MilopyPlot as milo
-from pertpy.plot._mixscape import MixscapePlot as ms

pertpy/preprocessing/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
 from ._guide_rna import GuideAssignment
+__all__ = ["GuideAssignment"]

pertpy/preprocessing/_guide_rna.py CHANGED Viewed

@@ -34,9 +34,8 @@ class GuideAssignment:
             assignment_threshold: The count threshold that is required for an assignment to be viable.
             layer: Key to the layer containing raw count values of the gRNAs.
                    adata.X is used if layer is None. Expects count data.
-            output_layer: Assigned guide will be saved on adata.layers[output_key]. Defaults to `assigned_guides`.
+            output_layer: Assigned guide will be saved on adata.layers[output_key].
             only_return_results: If True, input AnnData is not modified and the result is returned as an np.ndarray.
-                                 Defaults to False.
         Examples:
             Each cell is assigned to gRNA that occurs at least 5 times in the respective cell.
@@ -49,7 +48,7 @@ class GuideAssignment:
         """
         counts = adata.X if layer is None else adata.layers[layer]
         if scipy.sparse.issparse(counts):
-            counts = counts.A
+            counts = counts.toarray()
         assigned_grnas = np.where(counts >= assignment_threshold, 1, 0)
         assigned_grnas = scipy.sparse.csr_matrix(assigned_grnas)
@@ -93,7 +92,7 @@ class GuideAssignment:
         """
         counts = adata.X if layer is None else adata.layers[layer]
         if scipy.sparse.issparse(counts):
-            counts = counts.A
+            counts = counts.toarray()
         assigned_grna = np.where(
             counts.max(axis=1).squeeze() >= assignment_threshold,
@@ -127,7 +126,7 @@ class GuideAssignment:
             adata: Annotated data matrix containing gRNA values
             layer: Key to the layer containing log normalized count values of the gRNAs.
                    adata.X is used if layer is None.
-            order_by: The order of cells in y axis. Defaults to None.
+            order_by: The order of cells in y axis.
                       If None, cells will be reordered to have a nice sparse representation.
                       If a string is provided, adata.obs[order_by] will be used as the order.
                       If a numpy array is provided, the array will be used for ordering.
@@ -153,9 +152,9 @@ class GuideAssignment:
         if order_by is None:
             if scipy.sparse.issparse(data):
-                max_values = data.max(axis=1).A.squeeze()
+                max_values = data.max(axis=1).toarray().squeeze()
                 data_argmax = data.argmax(axis=1).A.squeeze()
-                max_guide_index = np.where(max_values != data.min(axis=1).A.squeeze(), data_argmax, -1)
+                max_guide_index = np.where(max_values != data.min(axis=1).toarray().squeeze(), data_argmax, -1)
             else:
                 max_guide_index = np.where(
                     data.max(axis=1).squeeze() != data.min(axis=1).squeeze(), data.argmax(axis=1).squeeze(), -1

pertpy/tools/__init__.py CHANGED Viewed

@@ -1,19 +1,80 @@
+from importlib import import_module
+def lazy_import(module_path, class_name, extras):
+    def _import():
+        try:
+            for extra in extras:
+                import_module(extra)
+        except ImportError as e:
+            raise ImportError(
+                f"Extra dependencies required: {', '.join(extras)}. "
+                f"Please install with: pip install {' '.join(extras)}"
+            ) from e
+        module = import_module(module_path)
+        return getattr(module, class_name)
+    return _import
 from pertpy.tools._augur import Augur
 from pertpy.tools._cinemaot import Cinemaot
-from pertpy.tools._coda._sccoda import Sccoda
-from pertpy.tools._coda._tasccoda import Tasccoda
 from pertpy.tools._dialogue import Dialogue
-from pertpy.tools._differential_gene_expression import DifferentialGeneExpression
 from pertpy.tools._distances._distance_tests import DistanceTest
 from pertpy.tools._distances._distances import Distance
 from pertpy.tools._enrichment import Enrichment
 from pertpy.tools._milo import Milo
 from pertpy.tools._mixscape import Mixscape
 from pertpy.tools._perturbation_space._clustering import ClusteringSpace
+from pertpy.tools._perturbation_space._comparison import PerturbationComparison
 from pertpy.tools._perturbation_space._discriminator_classifiers import (
-    DiscriminatorClassifierSpace,
     LRClassifierSpace,
     MLPClassifierSpace,
 )
-from pertpy.tools._perturbation_space._simple import CentroidSpace, DBSCANSpace, KMeansSpace, PseudobulkSpace
-from pertpy.tools._scgen import SCGEN
+from pertpy.tools._perturbation_space._simple import (
+    CentroidSpace,
+    DBSCANSpace,
+    KMeansSpace,
+    PseudobulkSpace,
+)
+from pertpy.tools._scgen import Scgen
+# from pertpy.tools._differential_gene_expression import DGEEVAL
+CODA_EXTRAS = ["toytree", "arviz", "ete3"]  # also pyqt5 technically
+Sccoda = lazy_import("pertpy.tools._coda._sccoda", "Sccoda", CODA_EXTRAS)
+Tasccoda = lazy_import("pertpy.tools._coda._tasccoda", "Tasccoda", CODA_EXTRAS)
+DE_EXTRAS = ["formulaic", "pydeseq2"]
+EdgeR = lazy_import("pertpy.tools._differential_gene_expression", "EdgeR", DE_EXTRAS + ["edger"])
+PyDESeq2 = lazy_import("pertpy.tools._differential_gene_expression", "PyDESeq2", DE_EXTRAS)
+Statsmodels = lazy_import("pertpy.tools._differential_gene_expression", "Statsmodels", DE_EXTRAS + ["statsmodels"])
+TTest = lazy_import("pertpy.tools._differential_gene_expression", "TTest", DE_EXTRAS)
+WilcoxonTest = lazy_import("pertpy.tools._differential_gene_expression", "WilcoxonTest", DE_EXTRAS)
+__all__ = [
+    "Augur",
+    "Cinemaot",
+    "Sccoda",
+    "Tasccoda",
+    "Dialogue",
+    "EdgeR",
+    "PyDESeq2",
+    "WilcoxonTest",
+    "TTest",
+    "Statsmodels",
+    "DistanceTest",
+    "Distance",
+    "Enrichment",
+    "Milo",
+    "Mixscape",
+    "ClusteringSpace",
+    "LRClassifierSpace",
+    "MLPClassifierSpace",
+    "CentroidSpace",
+    "DBSCANSpace",
+    "KMeansSpace",
+    "PseudobulkSpace",
+    "Scgen",
+    "DGEEVAL",
+]

pertpy/tools/_augur.py CHANGED Viewed

@@ -14,6 +14,7 @@ import scanpy as sc
 import statsmodels.api as sm
 from anndata import AnnData
 from joblib import Parallel, delayed
+from lamin_utils import logger
 from rich import print
 from rich.progress import track
 from scipy import sparse, stats
@@ -127,7 +128,7 @@ class Augur:
                     _ = input[cell_type_col]
                     _ = input[label_col]
                 except KeyError:
-                    print("[bold red]No column names matching cell_type_col and label_col.")
+                    logger.error("No column names matching cell_type_col and label_col.")
             label = input[label_col] if meta is None else meta[label_col]
             cell_type = input[cell_type_col] if meta is None else meta[cell_type_col]
@@ -140,7 +141,7 @@ class Augur:
         if adata.obs["label"].dtype.name == "category":
             # filter samples according to label
             if condition_label is not None and treatment_label is not None:
-                print(f"Filtering samples with {condition_label} and {treatment_label} labels.")
+                logger.info(f"Filtering samples with {condition_label} and {treatment_label} labels.")
                 adata = ad.concat(
                     [adata[adata.obs["label"] == condition_label], adata[adata.obs["label"] == treatment_label]]
                 )
@@ -556,7 +557,7 @@ class Augur:
             try:
                 sc.pp.highly_variable_genes(adata)
             except ValueError:
-                print("[bold yellow]Data not normalized. Normalizing now using scanpy log1p normalize.")
+                logger.warn("Data not normalized. Normalizing now using scanpy log1p normalize.")
                 sc.pp.log1p(adata)
                 sc.pp.highly_variable_genes(adata)
@@ -608,7 +609,7 @@ class Augur:
             var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
             filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
             span: Smoothing factor, as a fraction of the number of points to take into account.
-                  Should be in the range (0, 1]. Defaults to 0.75
+                  Should be in the range (0, 1].
         Return:
             AnnData object with additional select_variance column in var.
@@ -700,13 +701,11 @@ class Augur:
             feature_perc: proportion of genes that are randomly selected as features for input to the classifier in each
                           subsample using the random gene filter
             var_quantile: The quantile below which features will be filtered, based on their residuals in a loess model.
-                          Defaults to 0.5.
             span: Smoothing factor, as a fraction of the number of points to take into account. Should be in the range (0, 1].
-                  Defaults to 0.75.
             filter_negative_residuals: if `True`, filter residuals at a fixed threshold of zero, instead of `var_quantile`
             n_threads: number of threads to use for parallelization
             select_variance_features: Whether to select genes based on the original Augur implementation (True)
-                                      or using scanpy's highly_variable_genes (False). Defaults to True.
+                                      or using scanpy's highly_variable_genes (False).
             key_added: Key to add results to in .uns
             augur_mode: One of 'default', 'velocity' or 'permute'. Setting augur_mode = "velocity" disables feature selection,
                         assuming feature selection has been performed by the RNA velocity procedure to produce the input matrix,
@@ -751,8 +750,8 @@ class Augur:
             "full_results": defaultdict(list),
         }
         if select_variance_features:
-            print("[bold yellow]Set smaller span value in the case of a `segmentation fault` error.")
-            print("[bold yellow]Set larger span in case of svddc or other near singularities error.")
+            logger.warning("Set smaller span value in the case of a `segmentation fault` error.")
+            logger.warning("Set larger span in case of svddc or other near singularities error.")
         adata.obs["augur_score"] = nan
         for cell_type in track(adata.obs["cell_type"].unique(), description="Processing data..."):
             cell_type_subsample = adata[adata.obs["cell_type"] == cell_type].copy()
@@ -768,8 +767,8 @@ class Augur:
                     )
                 )
             if len(cell_type_subsample) < min_cells:
-                print(
-                    f"[bold red]Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
+                logger.warning(
+                    f"Skipping {cell_type} cell type - {len(cell_type_subsample)} samples is less than min_cells {min_cells}."
                 )
             elif (
                 cell_type_subsample.obs.groupby(
@@ -778,8 +777,8 @@ class Augur:
                 ).y_.count()
                 < subsample_size
             ).any():
-                print(
-                    f"[bold red]Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
+                logger.warning(
+                    f"Skipping {cell_type} cell type - the number of samples for at least one class type is less than "
                     f"subsample size {subsample_size}."
                 )
             else:
@@ -821,7 +820,7 @@ class Augur:
                 results["full_results"]["cell_type"].extend([cell_type] * folds * n_subsamples)
         # make sure one cell type worked
         if len(results) <= 2:
-            print("[bold red]No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
+            logger.warning("No cells types had more than min_cells needed. Please adjust data or min_cells parameter.")
         results["summary_metrics"] = pd.DataFrame(results["summary_metrics"])
         results["feature_importances"] = pd.DataFrame(results["feature_importances"])
@@ -850,7 +849,7 @@ class Augur:
             augur2: Augurpy results from condition 2, obtained from `predict()[1]`
             permuted1: permuted Augurpy results from condition 1, obtained from `predict()` with argument `augur_mode=permute`
             permuted2: permuted Augurpy results from condition 2, obtained from `predict()` with argument `augur_mode=permute`
-            n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation; Defaults to 50.
+            n_subsamples: number of subsamples to pool when calculating the mean augur score for each permutation.
             n_permutations: the total number of mean augur scores to calculate from a background distribution
         Returns:

pertpy/tools/_cinemaot.py CHANGED Viewed

@@ -338,7 +338,7 @@ class Cinemaot:
             sc.tl.leiden(adata, resolution=cf_resolution)
             df["ct"] = adata.obs["leiden"].astype(str)
         df["ptb"] = "control"
-        df["ptb"][adata.obs[pert_key] != control] = de.obs["leiden"].astype(str)
+        df.loc[adata.obs[pert_key] != control, "ptb"] = de.obs["leiden"].astype(str)
         label_list.append("ptb")
         df = df.groupby(label_list).sum()
         new_index = df.index.map(lambda x: "_".join(map(str, x)))
@@ -432,7 +432,7 @@ class Cinemaot:
         expr_label = "control"
         adata_.obs["ct"] = ref_label
-        adata_.obs["ct"][adata_.obs[pert_key] == control] = expr_label
+        adata_.obs.loc[adata_.obs[pert_key] == control, "ct"] = expr_label
         pert_key = "ct"
         z = np.zeros(adata_.shape[0]) + 1

pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.9.1py3-none-any.whl