pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. pertpy/__init__.py +2 -1
  2. pertpy/data/__init__.py +61 -0
  3. pertpy/data/_dataloader.py +27 -23
  4. pertpy/data/_datasets.py +58 -0
  5. pertpy/metadata/__init__.py +2 -0
  6. pertpy/metadata/_cell_line.py +39 -70
  7. pertpy/metadata/_compound.py +3 -4
  8. pertpy/metadata/_drug.py +2 -6
  9. pertpy/metadata/_look_up.py +38 -51
  10. pertpy/metadata/_metadata.py +7 -10
  11. pertpy/metadata/_moa.py +2 -6
  12. pertpy/plot/__init__.py +0 -5
  13. pertpy/preprocessing/__init__.py +2 -0
  14. pertpy/preprocessing/_guide_rna.py +2 -3
  15. pertpy/tools/__init__.py +42 -4
  16. pertpy/tools/_augur.py +14 -15
  17. pertpy/tools/_cinemaot.py +2 -2
  18. pertpy/tools/_coda/_base_coda.py +118 -142
  19. pertpy/tools/_coda/_sccoda.py +16 -15
  20. pertpy/tools/_coda/_tasccoda.py +21 -22
  21. pertpy/tools/_dialogue.py +18 -23
  22. pertpy/tools/_differential_gene_expression/__init__.py +20 -0
  23. pertpy/tools/_differential_gene_expression/_base.py +657 -0
  24. pertpy/tools/_differential_gene_expression/_checks.py +41 -0
  25. pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
  26. pertpy/tools/_differential_gene_expression/_edger.py +125 -0
  27. pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
  28. pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
  29. pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
  30. pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
  31. pertpy/tools/_distances/_distance_tests.py +21 -16
  32. pertpy/tools/_distances/_distances.py +406 -70
  33. pertpy/tools/_enrichment.py +10 -15
  34. pertpy/tools/_kernel_pca.py +1 -1
  35. pertpy/tools/_milo.py +76 -53
  36. pertpy/tools/_mixscape.py +15 -11
  37. pertpy/tools/_perturbation_space/_clustering.py +5 -2
  38. pertpy/tools/_perturbation_space/_comparison.py +112 -0
  39. pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
  40. pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
  41. pertpy/tools/_perturbation_space/_simple.py +3 -3
  42. pertpy/tools/_scgen/__init__.py +1 -1
  43. pertpy/tools/_scgen/_base_components.py +2 -3
  44. pertpy/tools/_scgen/_scgen.py +33 -28
  45. pertpy/tools/_scgen/_utils.py +2 -2
  46. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
  47. pertpy-0.8.0.dist-info/RECORD +57 -0
  48. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
  49. pertpy/plot/_augur.py +0 -171
  50. pertpy/plot/_coda.py +0 -601
  51. pertpy/plot/_guide_rna.py +0 -64
  52. pertpy/plot/_milopy.py +0 -209
  53. pertpy/plot/_mixscape.py +0 -355
  54. pertpy/tools/_differential_gene_expression.py +0 -325
  55. pertpy-0.7.0.dist-info/RECORD +0 -53
  56. {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
@@ -15,6 +15,7 @@ import seaborn as sns
15
15
  from adjustText import adjust_text
16
16
  from anndata import AnnData
17
17
  from jax import config, random
18
+ from lamin_utils import logger
18
19
  from matplotlib import cm, rcParams
19
20
  from matplotlib import image as mpimg
20
21
  from matplotlib.colors import ListedColormap
@@ -110,9 +111,9 @@ class CompositionalModel2(ABC):
110
111
  Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
111
112
  To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
112
113
  reference_cell_type: Column name that sets the reference cell type.
113
- Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. Defaults to "automatic".
114
+ Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
114
115
  automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
115
- to be considered as a possible reference cell type. Defaults to 0.05.
116
+ to be considered as a possible reference cell type.
116
117
 
117
118
  Returns:
118
119
  AnnData object that is ready for CODA models.
@@ -148,7 +149,7 @@ class CompositionalModel2(ABC):
148
149
  ref_index = np.where(cell_type_disp == min_var)[0][0]
149
150
 
150
151
  ref_cell_type = cell_types[ref_index]
151
- print(f"[bold blue]Automatic reference selection! Reference cell type set to {ref_cell_type}")
152
+ logger.info(f"Automatic reference selection! Reference cell type set to {ref_cell_type}")
152
153
 
153
154
  # Column name as reference cell type
154
155
  elif reference_cell_type in cell_types:
@@ -160,7 +161,7 @@ class CompositionalModel2(ABC):
160
161
 
161
162
  # Add pseudocount if zeroes are present.
162
163
  if np.count_nonzero(sample_adata.X) != np.size(sample_adata.X):
163
- print("Zero counts encountered in data! Added a pseudocount of 0.5.")
164
+ logger.info("Zero counts encountered in data! Added a pseudocount of 0.5.")
164
165
  sample_adata.X[sample_adata.X == 0] = 0.5
165
166
 
166
167
  sample_adata.obsm["sample_counts"] = np.sum(sample_adata.X, axis=1)
@@ -201,7 +202,7 @@ class CompositionalModel2(ABC):
201
202
  sample_adata: anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
202
203
  kernel: A `numpyro.infer.mcmc.MCMCKernel` object
203
204
  rng_key: The rng state used. If None, a random state will be selected
204
- copy: Return a copy instead of writing to adata. Defaults to False.
205
+ copy: Return a copy instead of writing to adata.
205
206
  args: Passed to `numpyro.infer.mcmc.MCMC`
206
207
  kwargs: Passed to `numpyro.infer.mcmc.MCMC`
207
208
 
@@ -237,13 +238,13 @@ class CompositionalModel2(ABC):
237
238
 
238
239
  acc_rate = np.array(self.mcmc.last_state.mean_accept_prob)
239
240
  if acc_rate < 0.6:
240
- print(
241
- f"[bold red]Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
241
+ logger.warning(
242
+ f"Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
242
243
  f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
243
244
  )
244
245
  if acc_rate > 0.95:
245
- print(
246
- f"[bold red]Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
246
+ logger.warning(
247
+ f"Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
247
248
  f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
248
249
  )
249
250
 
@@ -286,11 +287,11 @@ class CompositionalModel2(ABC):
286
287
 
287
288
  Args:
288
289
  data: AnnData object or MuData object.
289
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
290
- num_samples: Number of sampled values after burn-in. Defaults to 10000.
291
- num_warmup: Number of burn-in (warmup) samples. Defaults to 1000.
292
- rng_key: The rng state used. Defaults to 0.
293
- copy: Return a copy instead of writing to adata. Defaults to False.
290
+ modality_key: If data is a MuData object, specify which modality to use.
291
+ num_samples: Number of sampled values after burn-in.
292
+ num_warmup: Number of burn-in (warmup) samples.
293
+ rng_key: The rng state used.
294
+ copy: Return a copy instead of writing to adata.
294
295
 
295
296
  Returns:
296
297
  Calls `self.__run_mcmc`
@@ -299,7 +300,7 @@ class CompositionalModel2(ABC):
299
300
  try:
300
301
  sample_adata = data[modality_key]
301
302
  except IndexError:
302
- print("When data is a MuData object, modality_key must be specified!")
303
+ logger.error("When data is a MuData object, modality_key must be specified!")
303
304
  raise
304
305
  if isinstance(data, AnnData):
305
306
  sample_adata = data
@@ -339,11 +340,11 @@ class CompositionalModel2(ABC):
339
340
 
340
341
  Args:
341
342
  data: AnnData object or MuData object.
342
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
343
- num_samples: Number of sampled values after burn-in. Defaults to 20000.
344
- num_warmup: Number of burn-in (warmup) samples. Defaults to 5000.
345
- rng_key: The rng state used. If None, a random state will be selected. Defaults to None.
346
- copy: Return a copy instead of writing to adata. Defaults to False.
343
+ modality_key: If data is a MuData object, specify which modality to use.
344
+ num_samples: Number of sampled values after burn-in.
345
+ num_warmup: Number of burn-in (warmup) samples.
346
+ rng_key: The rng state used. If None, a random state will be selected.
347
+ copy: Return a copy instead of writing to adata.
347
348
 
348
349
  Examples:
349
350
  >>> import pertpy as pt
@@ -358,7 +359,7 @@ class CompositionalModel2(ABC):
358
359
  try:
359
360
  sample_adata = data[modality_key]
360
361
  except IndexError:
361
- print("When data is a MuData object, modality_key must be specified!")
362
+ logger.error("When data is a MuData object, modality_key must be specified!")
362
363
  raise
363
364
  if isinstance(data, AnnData):
364
365
  sample_adata = data
@@ -397,7 +398,7 @@ class CompositionalModel2(ABC):
397
398
 
398
399
  Args:
399
400
  sample_adata: Anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
400
- est_fdr: Desired FDR value. Defaults to 0.05.
401
+ est_fdr: Desired FDR value.
401
402
  args: Passed to ``az.summary``
402
403
  kwargs: Passed to ``az.summary``
403
404
 
@@ -637,8 +638,8 @@ class CompositionalModel2(ABC):
637
638
  effect_df: Effect summary, see ``summary_prepare``
638
639
  model_type: String indicating the model type ("classic" or "tree_agg")
639
640
  select_type: String indicating the type of spike_and_slab selection ("spikeslab" or "sslasso")
640
- target_fdr: Desired FDR value. Defaults to 0.05.
641
- node_df: If using tree aggregation, the node-level effect DataFrame must be passed. Defaults to None.
641
+ target_fdr: Desired FDR value.
642
+ node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
642
643
 
643
644
  Returns:
644
645
  pd.DataFrame: effect DataFrame with inclusion probability, final parameters, expected sample.
@@ -790,8 +791,8 @@ class CompositionalModel2(ABC):
790
791
 
791
792
  Args:
792
793
  data: AnnData object or MuData object.
793
- extended: If True, return the extended summary with additional statistics. Defaults to False.
794
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
794
+ extended: If True, return the extended summary with additional statistics.
795
+ modality_key: If data is a MuData object, specify which modality to use.
795
796
  args: Passed to az.summary
796
797
  kwargs: Passed to az.summary
797
798
 
@@ -809,7 +810,7 @@ class CompositionalModel2(ABC):
809
810
  try:
810
811
  sample_adata = data[modality_key]
811
812
  except IndexError:
812
- print("[bold red]When data is a MuData object, modality_key must be specified!")
813
+ logger.error("When data is a MuData object, modality_key must be specified!")
813
814
  raise
814
815
  if isinstance(data, AnnData):
815
816
  sample_adata = data
@@ -848,10 +849,10 @@ class CompositionalModel2(ABC):
848
849
  table.add_column("Name", justify="left", style="cyan")
849
850
  table.add_column("Value", justify="left")
850
851
  table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
851
- table.add_row("Reference cell type", "%s" % str(sample_adata.uns["scCODA_params"]["reference_cell_type"]))
852
- table.add_row("Formula", "%s" % sample_adata.uns["scCODA_params"]["formula"])
852
+ table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
853
+ table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
853
854
  if extended:
854
- table.add_row("Reference index", "%s" % str(sample_adata.uns["scCODA_params"]["reference_index"]))
855
+ table.add_row("Reference index", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_index"])))
855
856
  if select_type == "spikeslab":
856
857
  table.add_row(
857
858
  "Spike-and-slab threshold",
@@ -934,7 +935,7 @@ class CompositionalModel2(ABC):
934
935
 
935
936
  Args:
936
937
  data: AnnData object or MuData object.
937
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
938
+ modality_key: If data is a MuData object, specify which modality to use.
938
939
 
939
940
  Returns:
940
941
  pd.DataFrame: Intercept data frame.
@@ -953,7 +954,7 @@ class CompositionalModel2(ABC):
953
954
  try:
954
955
  sample_adata = data[modality_key]
955
956
  except IndexError:
956
- print("When data is a MuData object, modality_key must be specified!")
957
+ logger.error("When data is a MuData object, modality_key must be specified!")
957
958
  raise
958
959
  if isinstance(data, AnnData):
959
960
  sample_adata = data
@@ -965,7 +966,7 @@ class CompositionalModel2(ABC):
965
966
 
966
967
  Args:
967
968
  data: AnnData object or MuData object.
968
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
969
+ modality_key: If data is a MuData object, specify which modality to use.
969
970
 
970
971
  Returns:
971
972
  pd.DataFrame: Effect data frame.
@@ -984,7 +985,7 @@ class CompositionalModel2(ABC):
984
985
  try:
985
986
  sample_adata = data[modality_key]
986
987
  except IndexError:
987
- print("When data is a MuData object, modality_key must be specified!")
988
+ logger.error("When data is a MuData object, modality_key must be specified!")
988
989
  raise
989
990
  if isinstance(data, AnnData):
990
991
  sample_adata = data
@@ -1007,7 +1008,7 @@ class CompositionalModel2(ABC):
1007
1008
 
1008
1009
  Args:
1009
1010
  data: AnnData object or MuData object.
1010
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1011
+ modality_key: If data is a MuData object, specify which modality to use.
1011
1012
 
1012
1013
  Returns:
1013
1014
  pd.DataFrame: Node effect data frame.
@@ -1032,7 +1033,7 @@ class CompositionalModel2(ABC):
1032
1033
  try:
1033
1034
  sample_adata = data[modality_key]
1034
1035
  except IndexError:
1035
- print("When data is a MuData object, modality_key must be specified!")
1036
+ logger.error("When data is a MuData object, modality_key must be specified!")
1036
1037
  raise
1037
1038
  if isinstance(data, AnnData):
1038
1039
  sample_adata = data
@@ -1046,7 +1047,7 @@ class CompositionalModel2(ABC):
1046
1047
  Args:
1047
1048
  data: AnnData object or MuData object.
1048
1049
  est_fdr: Desired FDR value.
1049
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1050
+ modality_key: If data is a MuData object, specify which modality to use.
1050
1051
  args: passed to self.summary_prepare
1051
1052
  kwargs: passed to self.summary_prepare
1052
1053
 
@@ -1057,7 +1058,7 @@ class CompositionalModel2(ABC):
1057
1058
  try:
1058
1059
  sample_adata = data[modality_key]
1059
1060
  except IndexError:
1060
- print("When data is a MuData object, modality_key must be specified!")
1061
+ logger.error("When data is a MuData object, modality_key must be specified!")
1061
1062
  raise
1062
1063
  if isinstance(data, AnnData):
1063
1064
  sample_adata = data
@@ -1080,8 +1081,8 @@ class CompositionalModel2(ABC):
1080
1081
 
1081
1082
  Args:
1082
1083
  data: AnnData object or MuData object.
1083
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1084
- est_fdr: Estimated false discovery rate. Must be between 0 and 1. Defaults to None.
1084
+ modality_key: If data is a MuData object, specify which modality to use.
1085
+ est_fdr: Estimated false discovery rate. Must be between 0 and 1.
1085
1086
 
1086
1087
  Returns:
1087
1088
  pd.Series: Credible effect decision series which includes boolean values indicate whether effects are credible under inc_prob_threshold.
@@ -1090,7 +1091,7 @@ class CompositionalModel2(ABC):
1090
1091
  try:
1091
1092
  sample_adata = data[modality_key]
1092
1093
  except IndexError:
1093
- print("When data is a MuData object, modality_key must be specified!")
1094
+ logger.error("When data is a MuData object, modality_key must be specified!")
1094
1095
  raise
1095
1096
  if isinstance(data, AnnData):
1096
1097
  sample_adata = data
@@ -1143,10 +1144,10 @@ class CompositionalModel2(ABC):
1143
1144
  type_names: The names of all cell types
1144
1145
  title: Plot title, usually the covariate's name
1145
1146
  level_names: Names of the covariate's levels
1146
- figsize: Figure size. Defaults to None.
1147
- dpi: Dpi setting. Defaults to 100.
1148
- palette: The color map for the barplot. Defaults to cm.tab20.
1149
- show_legend: If True, adds a legend. Defaults to True.
1147
+ figsize: Figure size (matplotlib).
1148
+ dpi: Resolution in DPI (matplotlib).
1149
+ palette: The color map for the barplot.
1150
+ show_legend: If True, adds a legend.
1150
1151
 
1151
1152
  Returns:
1152
1153
  A :class:`~matplotlib.axes.Axes` object
@@ -1205,12 +1206,12 @@ class CompositionalModel2(ABC):
1205
1206
  Args:
1206
1207
  data: AnnData object or MuData object.
1207
1208
  feature_name: The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
1208
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1209
- figsize: Figure size. Defaults to None.
1210
- dpi: Dpi setting. Defaults to 100.
1211
- palette: The matplotlib color map for the barplot. Defaults to cm.tab20.
1212
- show_legend: If True, adds a legend. Defaults to True.
1213
- level_order: Custom ordering of bars on the x-axis. Defaults to None.
1209
+ modality_key: If data is a MuData object, specify which modality to use.
1210
+ figsize: Figure size.
1211
+ dpi: Dpi setting.
1212
+ palette: The matplotlib color map for the barplot.
1213
+ show_legend: If True, adds a legend.
1214
+ level_order: Custom ordering of bars on the x-axis.
1214
1215
 
1215
1216
  Returns:
1216
1217
  A :class:`~matplotlib.axes.Axes` object
@@ -1311,20 +1312,17 @@ class CompositionalModel2(ABC):
1311
1312
 
1312
1313
  Args:
1313
1314
  data: AnnData object or MuData object.
1314
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1315
- covariates: The name of the covariates in data.obs to plot. Defaults to None.
1316
- parameter: The parameter in effect summary to plot. Defaults to "log2-fold change".
1315
+ modality_key: If data is a MuData object, specify which modality to use.
1316
+ covariates: The name of the covariates in data.obs to plot.
1317
+ parameter: The parameter in effect summary to plot.
1317
1318
  plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
1318
- Defaults to True.
1319
1319
  plot_zero_covariate: If True, plot covariate that have all zero effects. If False, do not plot.
1320
- Defaults to True.
1321
1320
  plot_zero_cell_type: If True, plot cell type that have zero effect. If False, do not plot.
1322
- Defaults to False.
1323
- figsize: Figure size. Defaults to None.
1324
- dpi: Figure size. Defaults to 100.
1325
- palette: The seaborn color map for the barplot. Defaults to cm.tab20.
1326
- level_order: Custom ordering of bars on the x-axis. Defaults to None.
1327
- args_barplot: Arguments passed to sns.barplot. Defaults to None.
1321
+ figsize: Figure size.
1322
+ dpi: Figure size.
1323
+ palette: The seaborn color map for the barplot.
1324
+ level_order: Custom ordering of bars on the x-axis.
1325
+ args_barplot: Arguments passed to sns.barplot.
1328
1326
 
1329
1327
  Returns:
1330
1328
  Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
@@ -1519,20 +1517,19 @@ class CompositionalModel2(ABC):
1519
1517
  Args:
1520
1518
  data: AnnData object or MuData object
1521
1519
  feature_name: The name of the feature in data.obs to plot
1522
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1520
+ modality_key: If data is a MuData object, specify which modality to use.
1523
1521
  y_scale: Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count),
1524
1522
  "log10" - log10(count), "count" - absolute abundance (cell counts).
1525
- Defaults to "relative".
1526
- plot_facets: If False, plot cell types on the x-axis. If True, plot as facets. Defaults to False.
1527
- add_dots: If True, overlay a scatterplot with one dot for each data point. Defaults to False.
1528
- cell_types: Subset of cell types that should be plotted. Defaults to None.
1529
- args_boxplot: Arguments passed to sns.boxplot. Defaults to {}.
1530
- args_swarmplot: Arguments passed to sns.swarmplot. Defaults to {}.
1531
- figsize: Figure size. Defaults to None.
1532
- dpi: Dpi setting. Defaults to 100.
1533
- palette: The seaborn color map for the barplot. Defaults to "Blues".
1534
- show_legend: If True, adds a legend. Defaults to True.
1535
- level_order: Custom ordering of bars on the x-axis. Defaults to None.
1523
+ plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
1524
+ add_dots: If True, overlay a scatterplot with one dot for each data point.
1525
+ cell_types: Subset of cell types that should be plotted.
1526
+ args_boxplot: Arguments passed to sns.boxplot.
1527
+ args_swarmplot: Arguments passed to sns.swarmplot.
1528
+ figsize: Figure size.
1529
+ dpi: Dpi setting.
1530
+ palette: The seaborn color map for the barplot.
1531
+ show_legend: If True, adds a legend.
1532
+ level_order: Custom ordering of bars on the x-axis.
1536
1533
 
1537
1534
  Returns:
1538
1535
  Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
@@ -1758,16 +1755,14 @@ class CompositionalModel2(ABC):
1758
1755
 
1759
1756
  Args:
1760
1757
  data: AnnData or MuData object.
1761
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
1762
- Defaults to "coda".
1763
- abundant_threshold: Presence threshold for abundant cell types. Defaults to 0.9.
1764
- default_color: Bar color for all non-minimal cell types. Defaults to "Grey".
1758
+ modality_key: If data is a MuData object, specify which modality to use.
1759
+ abundant_threshold: Presence threshold for abundant cell types.
1760
+ default_color: Bar color for all non-minimal cell types.
1765
1761
  abundant_color: Bar color for cell types with abundant percentage larger than abundant_threshold.
1766
- Defaults to "Red".
1767
- label_cell_types: Label dots with cell type names. Defaults to True.
1768
- figsize: Figure size. Defaults to None.
1769
- dpi: Dpi setting. Defaults to 100.
1770
- ax: A matplotlib axes object. Only works if plotting a single component. Defaults to None.
1762
+ label_cell_types: Label dots with cell type names.
1763
+ figsize: Figure size.
1764
+ dpi: Dpi setting.
1765
+ ax: A matplotlib axes object. Only works if plotting a single component.
1771
1766
 
1772
1767
  Returns:
1773
1768
  A :class:`~matplotlib.axes.Axes` object
@@ -1882,22 +1877,16 @@ class CompositionalModel2(ABC):
1882
1877
  Args:
1883
1878
  data: AnnData object or MuData object.
1884
1879
  modality_key: If data is a MuData object, specify which modality to use.
1885
- Defaults to "coda".
1886
1880
  tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
1887
- Defaults to "tree".
1888
1881
  tight_text: When False, boundaries of the text are approximated according to general font metrics,
1889
1882
  producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
1890
- Default to False.
1891
1883
  show_scale: Include the scale legend in the tree image or not.
1892
- Defaults to False.
1893
1884
  show: If True, plot the tree inline. If false, return tree and tree_style objects.
1894
- Defaults to True.
1895
1885
  file_name: Path to the output image file. Valid extensions are .SVG, .PDF, .PNG.
1896
1886
  Output image can be saved whether show is True or not.
1897
- Defaults to None.
1898
- units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
1899
- figsize: Figure size. Defaults to None.
1900
- dpi: Dots per inches. Defaults to 100.
1887
+ units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
1888
+ figsize: Figure size.
1889
+ dpi: Dots per inches.
1901
1890
 
1902
1891
  Returns:
1903
1892
  Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`) or plot the tree inline (`show = False`)
@@ -1972,23 +1961,18 @@ class CompositionalModel2(ABC):
1972
1961
  data: AnnData object or MuData object.
1973
1962
  covariate: The covariate, whose effects should be plotted.
1974
1963
  modality_key: If data is a MuData object, specify which modality to use.
1975
- Defaults to "coda".
1976
1964
  tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
1977
- Defaults to "tree".
1978
1965
  show_legend: If show legend of nodes significant effects or not.
1979
1966
  Defaults to False if show_leaf_effects is True.
1980
1967
  show_leaf_effects: If True, plot bar plots which indicate leave-level significant effects.
1981
- Defaults to False.
1982
1968
  tight_text: When False, boundaries of the text are approximated according to general font metrics,
1983
1969
  producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
1984
- Defaults to False.
1985
- show_scale: Include the scale legend in the tree image or not. Defaults to False.
1986
- show: If True, plot the tree inline. If false, return tree and tree_style objects. Defaults to True.
1970
+ show_scale: Include the scale legend in the tree image or not.
1971
+ show: If True, plot the tree inline. If false, return tree and tree_style objects.
1987
1972
  file_name: Path to the output image file. valid extensions are .SVG, .PDF, .PNG. Output image can be saved whether show is True or not.
1988
- Defaults to None.
1989
- units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches. Defaults to "px".
1990
- figsize: Figure size. Defaults to None.
1991
- dpi: Dots per inches. Defaults to 100.
1973
+ units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
1974
+ figsize: Figure size.
1975
+ dpi: Dots per inches.
1992
1976
 
1993
1977
  Returns:
1994
1978
  Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`)
@@ -2026,7 +2010,7 @@ class CompositionalModel2(ABC):
2026
2010
  if show_legend is None:
2027
2011
  show_legend = not show_leaf_effects
2028
2012
  elif show_legend:
2029
- print("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
2013
+ logger.info("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
2030
2014
 
2031
2015
  if isinstance(tree, str):
2032
2016
  tree = data.uns[tree]
@@ -2171,12 +2155,10 @@ class CompositionalModel2(ABC):
2171
2155
  effect_name: The name of the effect results in .varm of aggregated sample-level AnnData to plot
2172
2156
  cluster_key: The cluster information in .obs of cell-level AnnData (default is data['rna']).
2173
2157
  To assign cell types' effects to original cells.
2174
- modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
2158
+ modality_key_1: Key to the cell-level AnnData in the MuData object.
2175
2159
  modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
2176
- Defaults to "coda".
2177
- show: Whether to display the figure or return axis. Defaults to None.
2160
+ show: Whether to display the figure or return axis.
2178
2161
  ax: A matplotlib axes object. Only works if plotting a single component.
2179
- Defaults to None.
2180
2162
  **kwargs: All other keyword arguments are passed to `scanpy.plot.umap()`
2181
2163
 
2182
2164
  Returns:
@@ -2254,7 +2236,7 @@ class CompositionalModel2(ABC):
2254
2236
 
2255
2237
 
2256
2238
  def get_a(
2257
- tree: tt.tree,
2239
+ tree: tt.core.ToyTree,
2258
2240
  ) -> tuple[np.ndarray, int]:
2259
2241
  """Calculate ancestor matrix from a toytree tree
2260
2242
 
@@ -2293,7 +2275,7 @@ def get_a(
2293
2275
  return A, n_nodes - 1
2294
2276
 
2295
2277
 
2296
- def collapse_singularities(tree: tt.tree) -> tt.tree:
2278
+ def collapse_singularities(tree: tt.core.ToyTree) -> tt.core.ToyTree:
2297
2279
  """Collapses (deletes) nodes in a toytree tree that are singularities (have only one child).
2298
2280
 
2299
2281
  Args:
@@ -2509,15 +2491,14 @@ def import_tree(
2509
2491
 
2510
2492
  Args:
2511
2493
  data: A tascCODA-compatible data object.
2512
- modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object. Defaults to None.
2513
- modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object. Defaults to None.
2514
- dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object. Defaults to None.
2515
- levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
2516
- levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level. Defaults to None.
2494
+ modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
2495
+ modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
2496
+ dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
2497
+ levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
2498
+ levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
2517
2499
  add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
2518
- Defaults to True.
2519
2500
  key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`.
2520
- If `data` is MuData, save tree in data[modality_2]. Defaults to "tree".
2501
+ If `data` is MuData, save tree in data[modality_2].
2521
2502
 
2522
2503
  Returns:
2523
2504
  Updates data with the following:
@@ -2538,10 +2519,10 @@ def import_tree(
2538
2519
  data_1 = data[modality_1]
2539
2520
  data_2 = data[modality_2]
2540
2521
  except KeyError as name:
2541
- print(f"No {name} slot in MuData")
2522
+ logger.error(f"No {name} slot in MuData")
2542
2523
  raise
2543
2524
  except IndexError:
2544
- print("Please specify modality_1 and modality_2 to indicate modalities in MuData")
2525
+ logger.error("Please specify modality_1 and modality_2 to indicate modalities in MuData")
2545
2526
  raise
2546
2527
  else:
2547
2528
  data_1 = data
@@ -2613,43 +2594,38 @@ def from_scanpy(
2613
2594
  Returns:
2614
2595
  AnnData: A data set with cells aggregated to the (sample x cell type) level
2615
2596
  """
2616
- if isinstance(sample_identifier, str):
2617
- sample_identifier = [sample_identifier]
2597
+ sample_identifier = [sample_identifier] if isinstance(sample_identifier, str) else sample_identifier
2598
+ covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
2618
2599
 
2619
- if len(sample_identifier) > 1:
2600
+ if isinstance(sample_identifier, list):
2620
2601
  adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
2621
2602
  sample_identifier = "scCODA_sample_id"
2622
- else:
2623
- sample_identifier = sample_identifier[0]
2624
2603
 
2625
- # get cell type counts
2626
- ct_count_data = pd.crosstab(adata.obs[sample_identifier], adata.obs[cell_type_identifier])
2627
- ct_count_data = ct_count_data.fillna(0)
2628
-
2629
- # get covariates from different sources
2604
+ groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
2605
+ ct_count_data = groups.unstack(level=cell_type_identifier).fillna(0)
2630
2606
  covariate_df_ = pd.DataFrame(index=ct_count_data.index)
2631
2607
 
2632
2608
  if covariate_uns is not None:
2633
2609
  covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns], index=ct_count_data.index)
2634
- covariate_df_ = covariate_df_.join(covariate_df_uns, how="left")
2610
+ covariate_df_ = pd.concat([covariate_df_, covariate_df_uns], axis=1)
2635
2611
 
2636
2612
  if covariate_obs:
2637
- is_unique = adata.obs.groupby(sample_identifier, observed=True).transform(lambda x: x.nunique() == 1)
2638
- unique_covariates = is_unique.columns[is_unique.all()].tolist()
2639
-
2640
- if len(unique_covariates) < len(covariate_obs):
2641
- skipped = set(covariate_obs) - set(unique_covariates)
2642
- print(f"[bold yellow]Covariates {skipped} have non-unique values! Skipping...")
2643
- if unique_covariates:
2644
- covariate_df_obs = adata.obs.groupby(sample_identifier, observed=True).first()[unique_covariates]
2645
- covariate_df_ = covariate_df_.join(covariate_df_obs, how="left")
2613
+ unique_check = adata.obs.groupby(sample_identifier).nunique()
2614
+ for c in covariate_obs.copy():
2615
+ if unique_check[c].max() != 1:
2616
+ logger.warning(f"Covariate {c} has non-unique values for batch! Skipping...")
2617
+ covariate_obs.remove(c)
2618
+ if covariate_obs:
2619
+ covariate_df_obs = adata.obs.groupby(sample_identifier).first()[covariate_obs]
2620
+ covariate_df_ = pd.concat([covariate_df_, covariate_df_obs], axis=1)
2646
2621
 
2647
2622
  if covariate_df is not None:
2648
- if not covariate_df.index.equals(ct_count_data.index):
2649
- raise ValueError("AnnData sample names and covariate_df index do not have the same elements!")
2650
- covariate_df_ = covariate_df_.join(covariate_df, how="left")
2623
+ if set(covariate_df.index) != set(ct_count_data.index):
2624
+ raise ValueError("Mismatch between sample names in anndata and covariate_df!")
2625
+ covariate_df_ = pd.concat([covariate_df_, covariate_df.reindex(ct_count_data.index)], axis=1)
2651
2626
 
2652
- var_dat = ct_count_data.sum(axis=0).rename("n_cells").to_frame()
2627
+ var_dat = ct_count_data.sum().rename("n_cells").to_frame()
2653
2628
  var_dat.index = var_dat.index.astype(str)
2629
+ covariate_df_.index = covariate_df_.index.astype(str)
2654
2630
 
2655
2631
  return AnnData(X=ct_count_data.values, var=var_dat, obs=covariate_df_)
@@ -9,6 +9,7 @@ import numpyro as npy
9
9
  import numpyro.distributions as npd
10
10
  from anndata import AnnData
11
11
  from jax import config, random
12
+ from lamin_utils import logger
12
13
  from mudata import MuData
13
14
  from numpyro.infer import Predictive
14
15
  from rich import print
@@ -73,13 +74,13 @@ class Sccoda(CompositionalModel2):
73
74
  adata: AnnData object.
74
75
  type : Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
75
76
  generate_sample_level: Whether to generate an AnnData object on the sample level or create an empty AnnData object.
76
- cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types. Defaults to None.
77
- sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample. Defaults to None.
78
- covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored. Defaults to None.
79
- covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored. Defaults to None.
80
- covariate_df: If type is "cell_level", specify dataFrame with covariates. Defaults to None.
81
- modality_key_1: Key to the cell-level AnnData in the MuData object. Defaults to "rna".
82
- modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
77
+ cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
78
+ sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
79
+ covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
80
+ covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
81
+ covariate_df: If type is "cell_level", specify dataFrame with covariates.
82
+ modality_key_1: Key to the cell-level AnnData in the MuData object.
83
+ modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
83
84
 
84
85
  Returns:
85
86
  MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
@@ -127,10 +128,10 @@ class Sccoda(CompositionalModel2):
127
128
  Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
128
129
  To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
129
130
  reference_cell_type: Column name that sets the reference cell type.
130
- Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen. Defaults to "automatic".
131
+ Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
131
132
  automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
132
- to be considered as a possible reference cell type. Defaults to 0.05.
133
- modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object. Defaults to "coda".
133
+ to be considered as a possible reference cell type.
134
+ modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
134
135
 
135
136
  Returns:
136
137
  Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
@@ -309,10 +310,10 @@ class Sccoda(CompositionalModel2):
309
310
 
310
311
  Args:
311
312
  data: AnnData object or MuData object.
312
- modality_key: If data is a MuData object, specify which modality to use. Defaults to "coda".
313
- rng_key: The rng state used for the prior simulation. If None, a random state will be selected. Defaults to None.
314
- num_prior_samples: Number of prior samples calculated. Defaults to 500.
315
- use_posterior_predictive: If True, the posterior predictive will be calculated. Defaults to True.
313
+ modality_key: If data is a MuData object, specify which modality to use.
314
+ rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
315
+ num_prior_samples: Number of prior samples calculated.
316
+ use_posterior_predictive: If True, the posterior predictive will be calculated.
316
317
 
317
318
  Returns:
318
319
  az.InferenceData: arviz_data with all MCMC information
@@ -335,7 +336,7 @@ class Sccoda(CompositionalModel2):
335
336
  try:
336
337
  sample_adata = data[modality_key]
337
338
  except IndexError:
338
- print("When data is a MuData object, modality_key must be specified!")
339
+ logger.error("When data is a MuData object, modality_key must be specified!")
339
340
  raise
340
341
  if isinstance(data, AnnData):
341
342
  sample_adata = data