PyPI - pertpy - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

pertpy/__init__.py +2 -1
pertpy/data/__init__.py +61 -0
pertpy/data/_dataloader.py +27 -23
pertpy/data/_datasets.py +58 -0
pertpy/metadata/__init__.py +2 -0
pertpy/metadata/_cell_line.py +39 -70
pertpy/metadata/_compound.py +3 -4
pertpy/metadata/_drug.py +2 -6
pertpy/metadata/_look_up.py +38 -51
pertpy/metadata/_metadata.py +7 -10
pertpy/metadata/_moa.py +2 -6
pertpy/plot/__init__.py +0 -5
pertpy/preprocessing/__init__.py +2 -0
pertpy/preprocessing/_guide_rna.py +2 -3
pertpy/tools/__init__.py +42 -4
pertpy/tools/_augur.py +14 -15
pertpy/tools/_cinemaot.py +2 -2
pertpy/tools/_coda/_base_coda.py +118 -142
pertpy/tools/_coda/_sccoda.py +16 -15
pertpy/tools/_coda/_tasccoda.py +21 -22
pertpy/tools/_dialogue.py +18 -23
pertpy/tools/_differential_gene_expression/__init__.py +20 -0
pertpy/tools/_differential_gene_expression/_base.py +657 -0
pertpy/tools/_differential_gene_expression/_checks.py +41 -0
pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
pertpy/tools/_differential_gene_expression/_edger.py +125 -0
pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
pertpy/tools/_distances/_distance_tests.py +21 -16
pertpy/tools/_distances/_distances.py +406 -70
pertpy/tools/_enrichment.py +10 -15
pertpy/tools/_kernel_pca.py +1 -1
pertpy/tools/_milo.py +76 -53
pertpy/tools/_mixscape.py +15 -11
pertpy/tools/_perturbation_space/_clustering.py +5 -2
pertpy/tools/_perturbation_space/_comparison.py +112 -0
pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
pertpy/tools/_perturbation_space/_simple.py +3 -3
pertpy/tools/_scgen/__init__.py +1 -1
pertpy/tools/_scgen/_base_components.py +2 -3
pertpy/tools/_scgen/_scgen.py +33 -28
pertpy/tools/_scgen/_utils.py +2 -2
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
pertpy-0.8.0.dist-info/RECORD +57 -0
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
pertpy/plot/_augur.py +0 -171
pertpy/plot/_coda.py +0 -601
pertpy/plot/_guide_rna.py +0 -64
pertpy/plot/_milopy.py +0 -209
pertpy/plot/_mixscape.py +0 -355
pertpy/tools/_differential_gene_expression.py +0 -325
pertpy-0.7.0.dist-info/RECORD +0 -53
{pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0

pertpy/tools/_coda/_base_coda.py CHANGED Viewed

@@ -15,6 +15,7 @@ import seaborn as sns
 from adjustText import adjust_text
 from anndata import AnnData
 from jax import config, random
+from lamin_utils import logger
 from matplotlib import cm, rcParams
 from matplotlib import image as mpimg
 from matplotlib.colors import ListedColormap
@@ -110,9 +111,9 @@ class CompositionalModel2(ABC):
                 Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
                 To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
             reference_cell_type: Column name that sets the reference cell type.
-                Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. Defaults to "automatic".
+                Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
             automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
-                to be considered as a possible reference cell type. Defaults to 0.05.
+                to be considered as a possible reference cell type.
         Returns:
             AnnData object that is ready for CODA models.
@@ -148,7 +149,7 @@ class CompositionalModel2(ABC):
             ref_index = np.where(cell_type_disp == min_var)[0][0]
             ref_cell_type = cell_types[ref_index]
-            print(f"[bold blue]Automatic reference selection! Reference cell type set to {ref_cell_type}")
+            logger.info(f"Automatic reference selection! Reference cell type set to {ref_cell_type}")
         # Column name as reference cell type
         elif reference_cell_type in cell_types:
@@ -160,7 +161,7 @@ class CompositionalModel2(ABC):
         # Add pseudocount if zeroes are present.
         if np.count_nonzero(sample_adata.X) != np.size(sample_adata.X):
-            print("Zero counts encountered in data! Added a pseudocount of 0.5.")
+            logger.info("Zero counts encountered in data! Added a pseudocount of 0.5.")
             sample_adata.X[sample_adata.X == 0] = 0.5
         sample_adata.obsm["sample_counts"] = np.sum(sample_adata.X, axis=1)
@@ -201,7 +202,7 @@ class CompositionalModel2(ABC):
             sample_adata: anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
             kernel: A `numpyro.infer.mcmc.MCMCKernel` object
             rng_key: The rng state used. If None, a random state will be selected
-            copy: Return a copy instead of writing to adata. Defaults to False.
+            copy: Return a copy instead of writing to adata.
             args: Passed to `numpyro.infer.mcmc.MCMC`
             kwargs: Passed to `numpyro.infer.mcmc.MCMC`
@@ -237,13 +238,13 @@ class CompositionalModel2(ABC):
         acc_rate = np.array(self.mcmc.last_state.mean_accept_prob)
         if acc_rate < 0.6:
-            print(
-                f"[bold red]Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
+            logger.warning(
+                f"Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
                 f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
             )
         if acc_rate > 0.95:
-            print(
-                f"[bold red]Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
+            logger.warning(
+                f"Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
                 f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
             )
@@ -286,11 +287,11 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            num_samples: Number of sampled values after burn-in. Defaults to 10000.
-            num_warmup: Number of burn-in (warmup) samples. Defaults to 1000.
-            rng_key: The rng state used. Defaults to 0.
-            copy: Return a copy instead of writing to adata. Defaults to False.
+            modality_key: If data is a MuData object, specify which modality to use.
+            num_samples: Number of sampled values after burn-in.
+            num_warmup: Number of burn-in (warmup) samples.
+            rng_key: The rng state used.
+            copy: Return a copy instead of writing to adata.
         Returns:
             Calls `self.__run_mcmc`
@@ -299,7 +300,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -339,11 +340,11 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            num_samples: Number of sampled values after burn-in. Defaults to 20000.
-            num_warmup: Number of burn-in (warmup) samples. Defaults to 5000.
-            rng_key: The rng state used. If None, a random state will be selected. Defaults to None.
-            copy: Return a copy instead of writing to adata. Defaults to False.
+            modality_key: If data is a MuData object, specify which modality to use.
+            num_samples: Number of sampled values after burn-in.
+            num_warmup: Number of burn-in (warmup) samples.
+            rng_key: The rng state used. If None, a random state will be selected.
+            copy: Return a copy instead of writing to adata.
         Examples:
             >>> import pertpy as pt
@@ -358,7 +359,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -397,7 +398,7 @@ class CompositionalModel2(ABC):
         Args:
             sample_adata: Anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
-            est_fdr: Desired FDR value. Defaults to 0.05.
+            est_fdr: Desired FDR value.
             args: Passed to ``az.summary``
             kwargs: Passed to ``az.summary``
@@ -637,8 +638,8 @@ class CompositionalModel2(ABC):
             effect_df: Effect summary, see ``summary_prepare``
             model_type: String indicating the model type ("classic" or "tree_agg")
             select_type:  String indicating the type of spike_and_slab selection ("spikeslab" or "sslasso")
-            target_fdr: Desired FDR value. Defaults to 0.05.
-            node_df: If using tree aggregation, the node-level effect DataFrame must be passed. Defaults to None.
+            target_fdr: Desired FDR value.
+            node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
         Returns:
             pd.DataFrame:  effect DataFrame with inclusion probability, final parameters, expected sample.
@@ -790,8 +791,8 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            extended: If True, return the extended summary with additional statistics. Defaults to False.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            extended: If True, return the extended summary with additional statistics.
+            modality_key: If data is a MuData object, specify which modality to use.
             args: Passed to az.summary
             kwargs: Passed to az.summary
@@ -809,7 +810,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("[bold red]When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -848,10 +849,10 @@ class CompositionalModel2(ABC):
         table.add_column("Name", justify="left", style="cyan")
         table.add_column("Value", justify="left")
         table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
-        table.add_row("Reference cell type", "%s" % str(sample_adata.uns["scCODA_params"]["reference_cell_type"]))
-        table.add_row("Formula", "%s" % sample_adata.uns["scCODA_params"]["formula"])
+        table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
+        table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
         if extended:
-            table.add_row("Reference index", "%s" % str(sample_adata.uns["scCODA_params"]["reference_index"]))
+            table.add_row("Reference index", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_index"])))
             if select_type == "spikeslab":
                 table.add_row(
                     "Spike-and-slab threshold",
@@ -934,7 +935,7 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            modality_key: If data is a MuData object, specify which modality to use.
         Returns:
             pd.DataFrame: Intercept data frame.
@@ -953,7 +954,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -965,7 +966,7 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            modality_key: If data is a MuData object, specify which modality to use.
         Returns:
             pd.DataFrame: Effect data frame.
@@ -984,7 +985,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -1007,7 +1008,7 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            modality_key: If data is a MuData object, specify which modality to use.
         Returns:
             pd.DataFrame: Node effect data frame.
@@ -1032,7 +1033,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -1046,7 +1047,7 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
             est_fdr: Desired FDR value.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            modality_key: If data is a MuData object, specify which modality to use.
             args: passed to self.summary_prepare
             kwargs: passed to self.summary_prepare
@@ -1057,7 +1058,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -1080,8 +1081,8 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            est_fdr: Estimated false discovery rate. Must be between 0 and 1. Defaults to None.
+            modality_key: If data is a MuData object, specify which modality to use.
+            est_fdr: Estimated false discovery rate. Must be between 0 and 1.
         Returns:
             pd.Series: Credible effect decision series which includes boolean values indicate whether effects are credible under inc_prob_threshold.
@@ -1090,7 +1091,7 @@ class CompositionalModel2(ABC):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data
@@ -1143,10 +1144,10 @@ class CompositionalModel2(ABC):
             type_names: The names of all cell types
             title: Plot title, usually the covariate's name
             level_names: Names of the covariate's levels
-            figsize: Figure size. Defaults to None.
-            dpi: Dpi setting. Defaults to 100.
-            palette: The color map for the barplot. Defaults to cm.tab20.
-            show_legend: If True, adds a legend. Defaults to True.
+            figsize: Figure size (matplotlib).
+            dpi: Resolution in DPI (matplotlib).
+            palette: The color map for the barplot.
+            show_legend: If True, adds a legend.
         Returns:
             A :class:`~matplotlib.axes.Axes` object
@@ -1205,12 +1206,12 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
             feature_name: The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            figsize: Figure size. Defaults to None.
-            dpi: Dpi setting. Defaults to 100.
-            palette: The matplotlib color map for the barplot. Defaults to cm.tab20.
-            show_legend: If True, adds a legend. Defaults to True.
-            level_order: Custom ordering of bars on the x-axis. Defaults to None.
+            modality_key: If data is a MuData object, specify which modality to use.
+            figsize: Figure size.
+            dpi: Dpi setting.
+            palette: The matplotlib color map for the barplot.
+            show_legend: If True, adds a legend.
+            level_order: Custom ordering of bars on the x-axis.
         Returns:
             A :class:`~matplotlib.axes.Axes` object
@@ -1311,20 +1312,17 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            covariates: The name of the covariates in data.obs to plot. Defaults to None.
-            parameter: The parameter in effect summary to plot. Defaults to "log2-fold change".
+            modality_key: If data is a MuData object, specify which modality to use.
+            covariates: The name of the covariates in data.obs to plot.
+            parameter: The parameter in effect summary to plot.
             plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
-                         Defaults to True.
             plot_zero_covariate: If True, plot covariate that have all zero effects. If False, do not plot.
-                                 Defaults to True.
             plot_zero_cell_type: If True, plot cell type that have zero effect. If False, do not plot.
-                                 Defaults to False.
-            figsize: Figure size. Defaults to None.
-            dpi: Figure size. Defaults to 100.
-            palette: The seaborn color map for the barplot. Defaults to cm.tab20.
-            level_order: Custom ordering of bars on the x-axis. Defaults to None.
-            args_barplot: Arguments passed to sns.barplot. Defaults to None.
+            figsize: Figure size.
+            dpi: Figure size.
+            palette: The seaborn color map for the barplot.
+            level_order: Custom ordering of bars on the x-axis.
+            args_barplot: Arguments passed to sns.barplot.
         Returns:
             Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
@@ -1519,20 +1517,19 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object
             feature_name: The name of the feature in data.obs to plot
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
+            modality_key: If data is a MuData object, specify which modality to use.
             y_scale: Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count),
                      "log10" - log10(count), "count" - absolute abundance (cell counts).
-                     Defaults to "relative".
-            plot_facets: If False, plot cell types on the x-axis. If True, plot as facets. Defaults to False.
-            add_dots: If True, overlay a scatterplot with one dot for each data point. Defaults to False.
-            cell_types: Subset of cell types that should be plotted. Defaults to None.
-            args_boxplot: Arguments passed to sns.boxplot. Defaults to {}.
-            args_swarmplot: Arguments passed to sns.swarmplot. Defaults to {}.
-            figsize: Figure size. Defaults to None.
-            dpi: Dpi setting. Defaults to 100.
-            palette: The seaborn color map for the barplot. Defaults to "Blues".
-            show_legend: If True, adds a legend. Defaults to True.
-            level_order: Custom ordering of bars on the x-axis. Defaults to None.
+            plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
+            add_dots: If True, overlay a scatterplot with one dot for each data point.
+            cell_types: Subset of cell types that should be plotted.
+            args_boxplot: Arguments passed to sns.boxplot.
+            args_swarmplot: Arguments passed to sns.swarmplot.
+            figsize: Figure size.
+            dpi: Dpi setting.
+            palette: The seaborn color map for the barplot.
+            show_legend: If True, adds a legend.
+            level_order: Custom ordering of bars on the x-axis.
         Returns:
             Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
@@ -1758,16 +1755,14 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-                          Defaults to "coda".
-            abundant_threshold: Presence threshold for abundant cell types. Defaults to 0.9.
-            default_color: Bar color for all non-minimal cell types. Defaults to "Grey".
+            modality_key: If data is a MuData object, specify which modality to use.
+            abundant_threshold: Presence threshold for abundant cell types.
+            default_color: Bar color for all non-minimal cell types.
             abundant_color: Bar color for cell types with abundant percentage larger than abundant_threshold.
-                            Defaults to "Red".
-            label_cell_types: Label dots with cell type names. Defaults to True.
-            figsize: Figure size. Defaults to None.
-            dpi: Dpi setting. Defaults to 100.
-            ax: A matplotlib axes object. Only works if plotting a single component. Defaults to None.
+            label_cell_types: Label dots with cell type names.
+            figsize: Figure size.
+            dpi: Dpi setting.
+            ax: A matplotlib axes object. Only works if plotting a single component.
         Returns:
             A :class:`~matplotlib.axes.Axes` object
@@ -1882,22 +1877,16 @@ class CompositionalModel2(ABC):
         Args:
             data: AnnData object or MuData object.
             modality_key: If data is a MuData object, specify which modality to use.
-                          Defaults to "coda".
             tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
-                  Defaults to "tree".
             tight_text: When False, boundaries of the text are approximated according to general font metrics,
                         producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
-                        Default to False.
             show_scale: Include the scale legend in the tree image or not.
-                        Defaults to False.
             show: If True, plot the tree inline. If false, return tree and tree_style objects.
-                  Defaults to True.
             file_name: Path to the output image file. Valid extensions are .SVG, .PDF, .PNG.
                        Output image can be saved whether show is True or not.
-                       Defaults to None.
-            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
-            figsize: Figure size. Defaults to None.
-            dpi: Dots per inches. Defaults to 100.
+            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
+            figsize: Figure size.
+            dpi: Dots per inches.
         Returns:
             Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`) or plot the tree inline (`show = False`)
@@ -1972,23 +1961,18 @@ class CompositionalModel2(ABC):
             data: AnnData object or MuData object.
             covariate: The covariate, whose effects should be plotted.
             modality_key: If data is a MuData object, specify which modality to use.
-                          Defaults to "coda".
             tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
-                  Defaults to "tree".
             show_legend: If show legend of nodes significant effects or not.
                          Defaults to False if show_leaf_effects is True.
             show_leaf_effects: If True, plot bar plots which indicate leave-level significant effects.
-                               Defaults to False.
             tight_text: When False, boundaries of the text are approximated according to general font metrics,
                         producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
-                        Defaults to False.
-            show_scale: Include the scale legend in the tree image or not. Defaults to False.
-            show: If True, plot the tree inline. If false, return tree and tree_style objects. Defaults to True.
+            show_scale: Include the scale legend in the tree image or not.
+            show: If True, plot the tree inline. If false, return tree and tree_style objects.
             file_name: Path to the output image file. valid extensions are .SVG, .PDF, .PNG. Output image can be saved whether show is True or not.
-                       Defaults to None.
-            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
-            figsize: Figure size. Defaults to None.
-            dpi: Dots per inches. Defaults to 100.
+            units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
+            figsize: Figure size.
+            dpi: Dots per inches.
         Returns:
             Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`)
@@ -2026,7 +2010,7 @@ class CompositionalModel2(ABC):
         if show_legend is None:
             show_legend = not show_leaf_effects
         elif show_legend:
-            print("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
+            logger.info("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
         if isinstance(tree, str):
             tree = data.uns[tree]
@@ -2171,12 +2155,10 @@ class CompositionalModel2(ABC):
             effect_name: The name of the effect results in .varm of aggregated sample-level AnnData to plot
             cluster_key: The cluster information in .obs of cell-level AnnData (default is data['rna']).
                          To assign cell types' effects to original cells.
-            modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
+            modality_key_1: Key to the cell-level AnnData in the MuData object.
             modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
-                            Defaults to "coda".
-            show: Whether to display the figure or return axis. Defaults to None.
+            show: Whether to display the figure or return axis.
             ax: A matplotlib axes object. Only works if plotting a single component.
-                Defaults to None.
             **kwargs: All other keyword arguments are passed to `scanpy.plot.umap()`
         Returns:
@@ -2254,7 +2236,7 @@ class CompositionalModel2(ABC):
 def get_a(
-    tree: tt.tree,
+    tree: tt.core.ToyTree,
 ) -> tuple[np.ndarray, int]:
     """Calculate ancestor matrix from a toytree tree
@@ -2293,7 +2275,7 @@ def get_a(
     return A, n_nodes - 1
-def collapse_singularities(tree: tt.tree) -> tt.tree:
+def collapse_singularities(tree: tt.core.ToyTree) -> tt.core.ToyTree:
     """Collapses (deletes) nodes in a toytree tree that are singularities (have only one child).
     Args:
@@ -2509,15 +2491,14 @@ def import_tree(
     Args:
         data: A tascCODA-compatible data object.
-        modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object. Defaults to None.
-        modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object. Defaults to None.
-        dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
-        levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
-        levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
+        modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
+        modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
+        dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
+        levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
+        levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
         add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
-                        Defaults to True.
         key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`.
-                   If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
+                   If `data` is MuData, save tree in data[modality_2].
     Returns:
         Updates data with the following:
@@ -2538,10 +2519,10 @@ def import_tree(
             data_1 = data[modality_1]
             data_2 = data[modality_2]
         except KeyError as name:
-            print(f"No {name} slot in MuData")
+            logger.error(f"No {name} slot in MuData")
             raise
         except IndexError:
-            print("Please specify modality_1 and modality_2 to indicate modalities in MuData")
+            logger.error("Please specify modality_1 and modality_2 to indicate modalities in MuData")
             raise
     else:
         data_1 = data
@@ -2613,43 +2594,38 @@ def from_scanpy(
     Returns:
         AnnData: A data set with cells aggregated to the (sample x cell type) level
     """
-    if isinstance(sample_identifier, str):
-        sample_identifier = [sample_identifier]
+    sample_identifier = [sample_identifier] if isinstance(sample_identifier, str) else sample_identifier
+    covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
-    if len(sample_identifier) > 1:
+    if isinstance(sample_identifier, list):
         adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
         sample_identifier = "scCODA_sample_id"
-    else:
-        sample_identifier = sample_identifier[0]
-    # get cell type counts
-    ct_count_data = pd.crosstab(adata.obs[sample_identifier], adata.obs[cell_type_identifier])
-    ct_count_data = ct_count_data.fillna(0)
-    # get covariates from different sources
+    groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
+    ct_count_data = groups.unstack(level=cell_type_identifier).fillna(0)
     covariate_df_ = pd.DataFrame(index=ct_count_data.index)
     if covariate_uns is not None:
         covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns], index=ct_count_data.index)
-        covariate_df_ = covariate_df_.join(covariate_df_uns, how="left")
+        covariate_df_ = pd.concat([covariate_df_, covariate_df_uns], axis=1)
     if covariate_obs:
-        is_unique = adata.obs.groupby(sample_identifier, observed=True).transform(lambda x: x.nunique() == 1)
-        unique_covariates = is_unique.columns[is_unique.all()].tolist()
-        if len(unique_covariates) < len(covariate_obs):
-            skipped = set(covariate_obs) - set(unique_covariates)
-            print(f"[bold yellow]Covariates {skipped} have non-unique values! Skipping...")
-        if unique_covariates:
-            covariate_df_obs = adata.obs.groupby(sample_identifier, observed=True).first()[unique_covariates]
-            covariate_df_ = covariate_df_.join(covariate_df_obs, how="left")
+        unique_check = adata.obs.groupby(sample_identifier).nunique()
+        for c in covariate_obs.copy():
+            if unique_check[c].max() != 1:
+                logger.warning(f"Covariate {c} has non-unique values for batch! Skipping...")
+                covariate_obs.remove(c)
+        if covariate_obs:
+            covariate_df_obs = adata.obs.groupby(sample_identifier).first()[covariate_obs]
+            covariate_df_ = pd.concat([covariate_df_, covariate_df_obs], axis=1)
     if covariate_df is not None:
-        if not covariate_df.index.equals(ct_count_data.index):
-            raise ValueError("AnnData sample names and covariate_df index do not have the same elements!")
-        covariate_df_ = covariate_df_.join(covariate_df, how="left")
+        if set(covariate_df.index) != set(ct_count_data.index):
+            raise ValueError("Mismatch between sample names in anndata and covariate_df!")
+        covariate_df_ = pd.concat([covariate_df_, covariate_df.reindex(ct_count_data.index)], axis=1)
-    var_dat = ct_count_data.sum(axis=0).rename("n_cells").to_frame()
+    var_dat = ct_count_data.sum().rename("n_cells").to_frame()
     var_dat.index = var_dat.index.astype(str)
+    covariate_df_.index = covariate_df_.index.astype(str)
     return AnnData(X=ct_count_data.values, var=var_dat, obs=covariate_df_)

pertpy/tools/_coda/_sccoda.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpyro as npy
 import numpyro.distributions as npd
 from anndata import AnnData
 from jax import config, random
+from lamin_utils import logger
 from mudata import MuData
 from numpyro.infer import Predictive
 from rich import print
@@ -73,13 +74,13 @@ class Sccoda(CompositionalModel2):
             adata: AnnData object.
             type : Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
             generate_sample_level: Whether to generate an AnnData object on the sample level or create an empty AnnData object.
-            cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types. Defaults to None.
-            sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample. Defaults to None.
-            covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored. Defaults to None.
-            covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored. Defaults to None.
-            covariate_df: If type is "cell_level", specify dataFrame with covariates. Defaults to None.
-            modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
-            modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
+            cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
+            sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
+            covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
+            covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
+            covariate_df: If type is "cell_level", specify dataFrame with covariates.
+            modality_key_1: Key to the cell-level AnnData in the MuData object.
+            modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
         Returns:
             MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
@@ -127,10 +128,10 @@ class Sccoda(CompositionalModel2):
                 Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
                 To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
             reference_cell_type: Column name that sets the reference cell type.
-                Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. Defaults to "automatic".
+                Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
             automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
-                to be considered as a possible reference cell type. Defaults to 0.05.
-            modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
+                to be considered as a possible reference cell type.
+            modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
         Returns:
             Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
@@ -309,10 +310,10 @@ class Sccoda(CompositionalModel2):
         Args:
             data: AnnData object or MuData object.
-            modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
-            rng_key: The rng state used for the prior simulation. If None, a random state will be selected. Defaults to None.
-            num_prior_samples: Number of prior samples calculated. Defaults to 500.
-            use_posterior_predictive: If True, the posterior predictive will be calculated. Defaults to True.
+            modality_key: If data is a MuData object, specify which modality to use.
+            rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
+            num_prior_samples: Number of prior samples calculated.
+            use_posterior_predictive: If True, the posterior predictive will be calculated.
         Returns:
             az.InferenceData: arviz_data with all MCMC information
@@ -335,7 +336,7 @@ class Sccoda(CompositionalModel2):
             try:
                 sample_adata = data[modality_key]
             except IndexError:
-                print("When data is a MuData object, modality_key must be specified!")
+                logger.error("When data is a MuData object, modality_key must be specified!")
                 raise
         if isinstance(data, AnnData):
             sample_adata = data

pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

pertpy 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl