pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +2 -3
  15. pertpy/tools/__init__.py +42 -4
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +76 -53
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
  47. pertpy-0.8.0.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -10,9 +10,9 @@ import numpyro.distributions as npd
10
10
  import toytree as tt
11
11
  from anndata import AnnData
12
12
  from jax import config, random
13
+ from lamin_utils import logger
13
14
  from mudata import MuData
14
15
  from numpyro.infer import Predictive
15
- from rich import print
16
16
 
17
17
  from pertpy.tools._coda._base_coda import (
18
18
  CompositionalModel2,
@@ -85,18 +85,18 @@ class Tasccoda(CompositionalModel2):
85
85
  Args:
86
86
  adata: AnnData object.
87
87
  type: Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
88
- cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types. Defaults to None.
89
- sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample. Defaults to None.
90
- covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored. Defaults to None.
91
- covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored. Defaults to None.
92
- covariate_df: If type is "cell_level", specify dataFrame with covariates. Defaults to None.
93
- dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
94
- levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
95
- levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
96
- add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}. Defaults to False.
97
- key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
98
- modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
99
- modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
88
+ cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
89
+ sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
90
+ covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
91
+ covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
92
+ covariate_df: If type is "cell_level", specify dataFrame with covariates.
93
+ dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
94
+ levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
95
+ levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
96
+ add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
97
+ key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
98
+ modality_key_1: Key to the cell-level AnnData in the MuData object.
99
+ modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
100
100
 
101
101
  Returns:
102
102
  MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
@@ -155,14 +155,13 @@ class Tasccoda(CompositionalModel2):
155
155
  To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
156
156
  reference_cell_type: Column name that sets the reference cell type.
157
157
  If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
158
- Defaults to "automatic".
159
158
  automatic_reference_absence_threshold: If using reference_cell_type = "automatic",
160
159
  determine the maximum fraction of zero entries for a cell type
161
- to be considered as a possible reference cell type. Defaults to 0.05.
160
+ to be considered as a possible reference cell type.
162
161
  tree_key: Key in `adata.uns` that contains the tree structure
163
162
  pen_args: Dictionary with penalty arguments. With `reg="scaled_3"`, the parameters phi (aggregation bias), lambda_1, lambda_0 can be set here.
164
163
  See the tascCODA paper for an explanation of these parameters. Default: lambda_0 = 50, lambda_1 = 5, phi = 0.
165
- modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
164
+ modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
166
165
 
167
166
  Returns:
168
167
  Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
@@ -208,7 +207,7 @@ class Tasccoda(CompositionalModel2):
208
207
  ) from None
209
208
 
210
209
  # toytree tree - only for legacy reasons, can be removed in the final version
211
- if isinstance(adata.uns[tree_key], tt.tree):
210
+ if isinstance(adata.uns[tree_key], tt.core.ToyTree):
212
211
  # Collapse singularities in the tree
213
212
  phy_tree = collapse_singularities(adata.uns[tree_key])
214
213
 
@@ -477,10 +476,10 @@ class Tasccoda(CompositionalModel2):
477
476
 
478
477
  Args:
479
478
  data: AnnData object or MuData object.
480
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
481
- rng_key: The rng state used for the prior simulation. If None, a random state will be selected. Defaults to None.
482
- num_prior_samples: Number of prior samples calculated. Defaults to 500.
483
- use_posterior_predictive: If True, the posterior predictive will be calculated. Defaults to True.
479
+ modality_key: If data is a MuData object, specify which modality to use.
480
+ rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
481
+ num_prior_samples: Number of prior samples calculated.
482
+ use_posterior_predictive: If True, the posterior predictive will be calculated.
484
483
 
485
484
  Returns:
486
485
  arviz.InferenceData: arviz_data
@@ -504,7 +503,7 @@ class Tasccoda(CompositionalModel2):
504
503
  try:
505
504
  sample_adata = data[modality_key]
506
505
  except IndexError:
507
- print("When data is a MuData object, modality_key must be specified!")
506
+ logger.error("When data is a MuData object, modality_key must be specified!")
508
507
  raise
509
508
  if isinstance(data, AnnData):
510
509
  sample_adata = data
pertpy/tools/_dialogue.py CHANGED
@@ -13,8 +13,8 @@ import seaborn as sns
13
13
  import statsmodels.formula.api as smf
14
14
  import statsmodels.stats.multitest as ssm
15
15
  from anndata import AnnData
16
+ from lamin_utils import logger
16
17
  from pandas import DataFrame
17
- from rich import print
18
18
  from rich.console import Group
19
19
  from rich.live import Live
20
20
  from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
@@ -291,7 +291,7 @@ class Dialogue:
291
291
  mcp_name: Name of mcp which was used for calculation of column value.
292
292
  max_length: Value needed to later decide at what index the threshold value should be extracted from column.
293
293
  min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
294
- index: Column index to use eto calculate the significant genes. Defaults to `z_score`.
294
+ index: Column index to use eto calculate the significant genes.
295
295
 
296
296
  Returns:
297
297
  According to the values in a df column (default: zscore) the significant up and downregulated gene names
@@ -377,12 +377,6 @@ class Dialogue:
377
377
 
378
378
  `argmin|Ax - y|`
379
379
 
380
- Args:
381
- A_orig:
382
- y_orig:
383
- feature_ranks:
384
- n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
385
-
386
380
  Returns:
387
381
  Returns the aggregated coefficients from nnls.
388
382
  """
@@ -572,8 +566,8 @@ class Dialogue:
572
566
  Args:
573
567
  adata: AnnData object generate celltype objects for
574
568
  ct_order: The order of cell types
575
- agg_pca: Whether to aggregate pseudobulks with PCA or not. Defaults to True.
576
- normalize: Whether to mimic DIALOGUE behavior or not. Defaults to True.
569
+ agg_pca: Whether to aggregate pseudobulks with PCA or not.
570
+ normalize: Whether to mimic DIALOGUE behavior or not.
577
571
 
578
572
  Returns:
579
573
  A celltype_label:array dictionary.
@@ -613,7 +607,6 @@ class Dialogue:
613
607
  agg_pca: Whether to calculate cell-averaged PCA components.
614
608
  solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
615
609
  For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
616
- Defaults to 'bs'.
617
610
  normalize: Whether to mimic DIALOGUE as close as possible
618
611
 
619
612
  Returns:
@@ -640,9 +633,15 @@ class Dialogue:
640
633
 
641
634
  n_samples = mcca_in[0].shape[1]
642
635
  if penalties is None:
643
- penalties = multicca_permute(
644
- mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
645
- )["bestpenalties"]
636
+ try:
637
+ penalties = multicca_permute(
638
+ mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
639
+ )["bestpenalties"]
640
+ except ValueError as e:
641
+ if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
642
+ raise ValueError("Please ensure that every cell type is represented in every sample.") from e
643
+ else:
644
+ raise
646
645
  else:
647
646
  penalties = penalties
648
647
 
@@ -756,10 +755,10 @@ class Dialogue:
756
755
  mcps.append(mcp)
757
756
 
758
757
  if len(mcps) == 0:
759
- print(f"[bold red]No shared MCPs between {cell_type_1} and {cell_type_2}.")
758
+ logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
760
759
  continue
761
760
 
762
- print(f"[bold blue]{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
761
+ logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
763
762
 
764
763
  new_mcp_scores: dict[Any, list[Any]]
765
764
  cca_sig, new_mcp_scores = self._calculate_cca_sig(
@@ -912,9 +911,7 @@ class Dialogue:
912
911
  results: dl.MultilevelModeling result object.
913
912
  MCP: MCP key of the result object.
914
913
  threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
915
- Defaults to 0.70.
916
914
  focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
917
- Defaults to None.
918
915
 
919
916
  Returns:
920
917
  Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
@@ -993,10 +990,8 @@ class Dialogue:
993
990
  Args:
994
991
  ct_subs: Dialogue output ct_subs dictionary
995
992
  mcp: The name of the marker gene expression column.
996
- Defaults to "mcp_0".
997
993
  fraction: Fraction of extreme cells to consider for gene ranking.
998
994
  Should be between 0 and 1.
999
- Defaults to 0.1.
1000
995
 
1001
996
  Returns:
1002
997
  Dictionary where keys are subpopulation names and values are Anndata
@@ -1035,7 +1030,7 @@ class Dialogue:
1035
1030
  Args:
1036
1031
  ct_subs: Dialogue output ct_subs dictionary
1037
1032
  fraction: Fraction of extreme cells to consider for gene ranking.
1038
- Should be between 0 and 1. Defaults to 0.1.
1033
+ Should be between 0 and 1.
1039
1034
 
1040
1035
  Returns:
1041
1036
  Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
@@ -1085,7 +1080,7 @@ class Dialogue:
1085
1080
  split_key: Variable in adata.obs used to split the data.
1086
1081
  celltype_key: Key for cell type annotations.
1087
1082
  split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
1088
- mcp: Key for MCP data. Defaults to "mcp_0".
1083
+ mcp: Key for MCP data.
1089
1084
 
1090
1085
  Returns:
1091
1086
  A :class:`~matplotlib.axes.Axes` object
@@ -1144,7 +1139,7 @@ class Dialogue:
1144
1139
  celltype_key: Key in `adata.obs` containing cell type annotations.
1145
1140
  color: Key in `adata.obs` for color annotations. This parameter is used as the hue
1146
1141
  sample_id: Key in `adata.obs` for the sample annotations.
1147
- mcp: Key in `adata.obs` for MCP feature values. Defaults to `"mcp_0"`.
1142
+ mcp: Key in `adata.obs` for MCP feature values.
1148
1143
 
1149
1144
  Returns:
1150
1145
  Seaborn Pairgrid object.
@@ -0,0 +1,20 @@
1
+ from ._base import ContrastType, LinearModelBase, MethodBase
2
+ from ._dge_comparison import DGEEVAL
3
+ from ._edger import EdgeR
4
+ from ._pydeseq2 import PyDESeq2
5
+ from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
6
+ from ._statsmodels import Statsmodels
7
+
8
+ __all__ = [
9
+ "MethodBase",
10
+ "LinearModelBase",
11
+ "EdgeR",
12
+ "PyDESeq2",
13
+ "Statsmodels",
14
+ "SimpleComparisonBase",
15
+ "WilcoxonTest",
16
+ "TTest",
17
+ "ContrastType",
18
+ ]
19
+
20
+ AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]