pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +6 -7
  15. pertpy/tools/__init__.py +67 -6
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +77 -54
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
  47. pertpy-0.9.1.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0
@@ -10,9 +10,9 @@ import numpyro.distributions as npd
10
10
  import toytree as tt
11
11
  from anndata import AnnData
12
12
  from jax import config, random
13
+ from lamin_utils import logger
13
14
  from mudata import MuData
14
15
  from numpyro.infer import Predictive
15
- from rich import print
16
16
 
17
17
  from pertpy.tools._coda._base_coda import (
18
18
  CompositionalModel2,
@@ -85,18 +85,18 @@ class Tasccoda(CompositionalModel2):
85
85
  Args:
86
86
  adata: AnnData object.
87
87
  type: Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
88
- cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types. Defaults to None.
89
- sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample. Defaults to None.
90
- covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored. Defaults to None.
91
- covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored. Defaults to None.
92
- covariate_df: If type is "cell_level", specify dataFrame with covariates. Defaults to None.
93
- dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
94
- levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
95
- levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
96
- add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}. Defaults to False.
97
- key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
98
- modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
99
- modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
88
+ cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
89
+ sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
90
+ covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
91
+ covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
92
+ covariate_df: If type is "cell_level", specify dataFrame with covariates.
93
+ dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
94
+ levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
95
+ levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
96
+ add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
97
+ key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`. If `data` is MuData, save tree in data[modality_2].
98
+ modality_key_1: Key to the cell-level AnnData in the MuData object.
99
+ modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
100
100
 
101
101
  Returns:
102
102
  MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
@@ -155,14 +155,13 @@ class Tasccoda(CompositionalModel2):
155
155
  To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
156
156
  reference_cell_type: Column name that sets the reference cell type.
157
157
  If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
158
- Defaults to "automatic".
159
158
  automatic_reference_absence_threshold: If using reference_cell_type = "automatic",
160
159
  determine the maximum fraction of zero entries for a cell type
161
- to be considered as a possible reference cell type. Defaults to 0.05.
160
+ to be considered as a possible reference cell type.
162
161
  tree_key: Key in `adata.uns` that contains the tree structure
163
162
  pen_args: Dictionary with penalty arguments. With `reg="scaled_3"`, the parameters phi (aggregation bias), lambda_1, lambda_0 can be set here.
164
163
  See the tascCODA paper for an explanation of these parameters. Default: lambda_0 = 50, lambda_1 = 5, phi = 0.
165
- modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
164
+ modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
166
165
 
167
166
  Returns:
168
167
  Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
@@ -208,7 +207,7 @@ class Tasccoda(CompositionalModel2):
208
207
  ) from None
209
208
 
210
209
  # toytree tree - only for legacy reasons, can be removed in the final version
211
- if isinstance(adata.uns[tree_key], tt.tree):
210
+ if isinstance(adata.uns[tree_key], tt.core.ToyTree):
212
211
  # Collapse singularities in the tree
213
212
  phy_tree = collapse_singularities(adata.uns[tree_key])
214
213
 
@@ -477,10 +476,10 @@ class Tasccoda(CompositionalModel2):
477
476
 
478
477
  Args:
479
478
  data: AnnData object or MuData object.
480
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
481
- rng_key: The rng state used for the prior simulation. If None, a random state will be selected. Defaults to None.
482
- num_prior_samples: Number of prior samples calculated. Defaults to 500.
483
- use_posterior_predictive: If True, the posterior predictive will be calculated. Defaults to True.
479
+ modality_key: If data is a MuData object, specify which modality to use.
480
+ rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
481
+ num_prior_samples: Number of prior samples calculated.
482
+ use_posterior_predictive: If True, the posterior predictive will be calculated.
484
483
 
485
484
  Returns:
486
485
  arviz.InferenceData: arviz_data
@@ -504,7 +503,7 @@ class Tasccoda(CompositionalModel2):
504
503
  try:
505
504
  sample_adata = data[modality_key]
506
505
  except IndexError:
507
- print("When data is a MuData object, modality_key must be specified!")
506
+ logger.error("When data is a MuData object, modality_key must be specified!")
508
507
  raise
509
508
  if isinstance(data, AnnData):
510
509
  sample_adata = data
pertpy/tools/_dialogue.py CHANGED
@@ -13,8 +13,8 @@ import seaborn as sns
13
13
  import statsmodels.formula.api as smf
14
14
  import statsmodels.stats.multitest as ssm
15
15
  from anndata import AnnData
16
+ from lamin_utils import logger
16
17
  from pandas import DataFrame
17
- from rich import print
18
18
  from rich.console import Group
19
19
  from rich.live import Live
20
20
  from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn
@@ -291,7 +291,7 @@ class Dialogue:
291
291
  mcp_name: Name of mcp which was used for calculation of column value.
292
292
  max_length: Value needed to later decide at what index the threshold value should be extracted from column.
293
293
  min_threshold: Minimal threshold to select final scores by if it is smaller than calculated threshold.
294
- index: Column index to use eto calculate the significant genes. Defaults to `z_score`.
294
+ index: Column index to use eto calculate the significant genes.
295
295
 
296
296
  Returns:
297
297
  According to the values in a df column (default: zscore) the significant up and downregulated gene names
@@ -377,12 +377,6 @@ class Dialogue:
377
377
 
378
378
  `argmin|Ax - y|`
379
379
 
380
- Args:
381
- A_orig:
382
- y_orig:
383
- feature_ranks:
384
- n_iter: Passed to scipy.optimize.nnls. Defaults to 1000.
385
-
386
380
  Returns:
387
381
  Returns the aggregated coefficients from nnls.
388
382
  """
@@ -572,8 +566,8 @@ class Dialogue:
572
566
  Args:
573
567
  adata: AnnData object generate celltype objects for
574
568
  ct_order: The order of cell types
575
- agg_pca: Whether to aggregate pseudobulks with PCA or not. Defaults to True.
576
- normalize: Whether to mimic DIALOGUE behavior or not. Defaults to True.
569
+ agg_pca: Whether to aggregate pseudobulks with PCA or not.
570
+ normalize: Whether to mimic DIALOGUE behavior or not.
577
571
 
578
572
  Returns:
579
573
  A celltype_label:array dictionary.
@@ -613,7 +607,6 @@ class Dialogue:
613
607
  agg_pca: Whether to calculate cell-averaged PCA components.
614
608
  solver: Which solver to use for PMD. Must be one of "lp" (linear programming) or "bs" (binary search).
615
609
  For differences between these to please refer to https://github.com/theislab/sparsecca/blob/main/examples/linear_programming_multicca.ipynb
616
- Defaults to 'bs'.
617
610
  normalize: Whether to mimic DIALOGUE as close as possible
618
611
 
619
612
  Returns:
@@ -640,9 +633,15 @@ class Dialogue:
640
633
 
641
634
  n_samples = mcca_in[0].shape[1]
642
635
  if penalties is None:
643
- penalties = multicca_permute(
644
- mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
645
- )["bestpenalties"]
636
+ try:
637
+ penalties = multicca_permute(
638
+ mcca_in, penalties=np.sqrt(n_samples) / 2, nperms=10, niter=50, standardize=True
639
+ )["bestpenalties"]
640
+ except ValueError as e:
641
+ if "matmul: input operand 1 has a mismatch in its core dimension" in str(e):
642
+ raise ValueError("Please ensure that every cell type is represented in every sample.") from e
643
+ else:
644
+ raise
646
645
  else:
647
646
  penalties = penalties
648
647
 
@@ -756,10 +755,10 @@ class Dialogue:
756
755
  mcps.append(mcp)
757
756
 
758
757
  if len(mcps) == 0:
759
- print(f"[bold red]No shared MCPs between {cell_type_1} and {cell_type_2}.")
758
+ logger.warning(f"No shared MCPs between {cell_type_1} and {cell_type_2}.")
760
759
  continue
761
760
 
762
- print(f"[bold blue]{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
761
+ logger.info(f"{len(mcps)} MCPs identified for {cell_type_1} and {cell_type_2}.")
763
762
 
764
763
  new_mcp_scores: dict[Any, list[Any]]
765
764
  cca_sig, new_mcp_scores = self._calculate_cca_sig(
@@ -912,9 +911,7 @@ class Dialogue:
912
911
  results: dl.MultilevelModeling result object.
913
912
  MCP: MCP key of the result object.
914
913
  threshold: Number between [0,1]. The fraction of cell types compared against which must have the associated MCP gene.
915
- Defaults to 0.70.
916
914
  focal_celltypes: None (compare against all cell types) or a list of other cell types which you want to compare against.
917
- Defaults to None.
918
915
 
919
916
  Returns:
920
917
  Dict with keys 'up_genes' and 'down_genes' and values of lists of genes
@@ -993,10 +990,8 @@ class Dialogue:
993
990
  Args:
994
991
  ct_subs: Dialogue output ct_subs dictionary
995
992
  mcp: The name of the marker gene expression column.
996
- Defaults to "mcp_0".
997
993
  fraction: Fraction of extreme cells to consider for gene ranking.
998
994
  Should be between 0 and 1.
999
- Defaults to 0.1.
1000
995
 
1001
996
  Returns:
1002
997
  Dictionary where keys are subpopulation names and values are Anndata
@@ -1035,7 +1030,7 @@ class Dialogue:
1035
1030
  Args:
1036
1031
  ct_subs: Dialogue output ct_subs dictionary
1037
1032
  fraction: Fraction of extreme cells to consider for gene ranking.
1038
- Should be between 0 and 1. Defaults to 0.1.
1033
+ Should be between 0 and 1.
1039
1034
 
1040
1035
  Returns:
1041
1036
  Nested dictionary where keys of the first level are MCPs (of the form "mcp_0" etc)
@@ -1085,7 +1080,7 @@ class Dialogue:
1085
1080
  split_key: Variable in adata.obs used to split the data.
1086
1081
  celltype_key: Key for cell type annotations.
1087
1082
  split_which: Which values of split_key to plot. Required if more than 2 values in split_key.
1088
- mcp: Key for MCP data. Defaults to "mcp_0".
1083
+ mcp: Key for MCP data.
1089
1084
 
1090
1085
  Returns:
1091
1086
  A :class:`~matplotlib.axes.Axes` object
@@ -1144,7 +1139,7 @@ class Dialogue:
1144
1139
  celltype_key: Key in `adata.obs` containing cell type annotations.
1145
1140
  color: Key in `adata.obs` for color annotations. This parameter is used as the hue
1146
1141
  sample_id: Key in `adata.obs` for the sample annotations.
1147
- mcp: Key in `adata.obs` for MCP feature values. Defaults to `"mcp_0"`.
1142
+ mcp: Key in `adata.obs` for MCP feature values.
1148
1143
 
1149
1144
  Returns:
1150
1145
  Seaborn Pairgrid object.
@@ -0,0 +1,20 @@
1
+ from ._base import ContrastType, LinearModelBase, MethodBase
2
+ from ._dge_comparison import DGEEVAL
3
+ from ._edger import EdgeR
4
+ from ._pydeseq2 import PyDESeq2
5
+ from ._simple_tests import SimpleComparisonBase, TTest, WilcoxonTest
6
+ from ._statsmodels import Statsmodels
7
+
8
+ __all__ = [
9
+ "MethodBase",
10
+ "LinearModelBase",
11
+ "EdgeR",
12
+ "PyDESeq2",
13
+ "Statsmodels",
14
+ "SimpleComparisonBase",
15
+ "WilcoxonTest",
16
+ "TTest",
17
+ "ContrastType",
18
+ ]
19
+
20
+ AVAILABLE_METHODS = [Statsmodels, EdgeR, PyDESeq2, WilcoxonTest, TTest]