pertpy 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +2 -3
- pertpy/tools/__init__.py +42 -4
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +76 -53
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +20 -22
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/METADATA +22 -13
- pertpy-0.8.0.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.8.0.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_coda/_base_coda.py
CHANGED
@@ -15,6 +15,7 @@ import seaborn as sns
|
|
15
15
|
from adjustText import adjust_text
|
16
16
|
from anndata import AnnData
|
17
17
|
from jax import config, random
|
18
|
+
from lamin_utils import logger
|
18
19
|
from matplotlib import cm, rcParams
|
19
20
|
from matplotlib import image as mpimg
|
20
21
|
from matplotlib.colors import ListedColormap
|
@@ -110,9 +111,9 @@ class CompositionalModel2(ABC):
|
|
110
111
|
Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
|
111
112
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
112
113
|
reference_cell_type: Column name that sets the reference cell type.
|
113
|
-
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
114
|
+
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
114
115
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
|
115
|
-
to be considered as a possible reference cell type.
|
116
|
+
to be considered as a possible reference cell type.
|
116
117
|
|
117
118
|
Returns:
|
118
119
|
AnnData object that is ready for CODA models.
|
@@ -148,7 +149,7 @@ class CompositionalModel2(ABC):
|
|
148
149
|
ref_index = np.where(cell_type_disp == min_var)[0][0]
|
149
150
|
|
150
151
|
ref_cell_type = cell_types[ref_index]
|
151
|
-
|
152
|
+
logger.info(f"Automatic reference selection! Reference cell type set to {ref_cell_type}")
|
152
153
|
|
153
154
|
# Column name as reference cell type
|
154
155
|
elif reference_cell_type in cell_types:
|
@@ -160,7 +161,7 @@ class CompositionalModel2(ABC):
|
|
160
161
|
|
161
162
|
# Add pseudocount if zeroes are present.
|
162
163
|
if np.count_nonzero(sample_adata.X) != np.size(sample_adata.X):
|
163
|
-
|
164
|
+
logger.info("Zero counts encountered in data! Added a pseudocount of 0.5.")
|
164
165
|
sample_adata.X[sample_adata.X == 0] = 0.5
|
165
166
|
|
166
167
|
sample_adata.obsm["sample_counts"] = np.sum(sample_adata.X, axis=1)
|
@@ -201,7 +202,7 @@ class CompositionalModel2(ABC):
|
|
201
202
|
sample_adata: anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
|
202
203
|
kernel: A `numpyro.infer.mcmc.MCMCKernel` object
|
203
204
|
rng_key: The rng state used. If None, a random state will be selected
|
204
|
-
copy: Return a copy instead of writing to adata.
|
205
|
+
copy: Return a copy instead of writing to adata.
|
205
206
|
args: Passed to `numpyro.infer.mcmc.MCMC`
|
206
207
|
kwargs: Passed to `numpyro.infer.mcmc.MCMC`
|
207
208
|
|
@@ -237,13 +238,13 @@ class CompositionalModel2(ABC):
|
|
237
238
|
|
238
239
|
acc_rate = np.array(self.mcmc.last_state.mean_accept_prob)
|
239
240
|
if acc_rate < 0.6:
|
240
|
-
|
241
|
-
f"
|
241
|
+
logger.warning(
|
242
|
+
f"Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
|
242
243
|
f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
|
243
244
|
)
|
244
245
|
if acc_rate > 0.95:
|
245
|
-
|
246
|
-
f"
|
246
|
+
logger.warning(
|
247
|
+
f"Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
|
247
248
|
f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
|
248
249
|
)
|
249
250
|
|
@@ -286,11 +287,11 @@ class CompositionalModel2(ABC):
|
|
286
287
|
|
287
288
|
Args:
|
288
289
|
data: AnnData object or MuData object.
|
289
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
290
|
-
num_samples: Number of sampled values after burn-in.
|
291
|
-
num_warmup: Number of burn-in (warmup) samples.
|
292
|
-
rng_key: The rng state used.
|
293
|
-
copy: Return a copy instead of writing to adata.
|
290
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
291
|
+
num_samples: Number of sampled values after burn-in.
|
292
|
+
num_warmup: Number of burn-in (warmup) samples.
|
293
|
+
rng_key: The rng state used.
|
294
|
+
copy: Return a copy instead of writing to adata.
|
294
295
|
|
295
296
|
Returns:
|
296
297
|
Calls `self.__run_mcmc`
|
@@ -299,7 +300,7 @@ class CompositionalModel2(ABC):
|
|
299
300
|
try:
|
300
301
|
sample_adata = data[modality_key]
|
301
302
|
except IndexError:
|
302
|
-
|
303
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
303
304
|
raise
|
304
305
|
if isinstance(data, AnnData):
|
305
306
|
sample_adata = data
|
@@ -339,11 +340,11 @@ class CompositionalModel2(ABC):
|
|
339
340
|
|
340
341
|
Args:
|
341
342
|
data: AnnData object or MuData object.
|
342
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
343
|
-
num_samples: Number of sampled values after burn-in.
|
344
|
-
num_warmup: Number of burn-in (warmup) samples.
|
345
|
-
rng_key: The rng state used. If None, a random state will be selected.
|
346
|
-
copy: Return a copy instead of writing to adata.
|
343
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
344
|
+
num_samples: Number of sampled values after burn-in.
|
345
|
+
num_warmup: Number of burn-in (warmup) samples.
|
346
|
+
rng_key: The rng state used. If None, a random state will be selected.
|
347
|
+
copy: Return a copy instead of writing to adata.
|
347
348
|
|
348
349
|
Examples:
|
349
350
|
>>> import pertpy as pt
|
@@ -358,7 +359,7 @@ class CompositionalModel2(ABC):
|
|
358
359
|
try:
|
359
360
|
sample_adata = data[modality_key]
|
360
361
|
except IndexError:
|
361
|
-
|
362
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
362
363
|
raise
|
363
364
|
if isinstance(data, AnnData):
|
364
365
|
sample_adata = data
|
@@ -397,7 +398,7 @@ class CompositionalModel2(ABC):
|
|
397
398
|
|
398
399
|
Args:
|
399
400
|
sample_adata: Anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
|
400
|
-
est_fdr: Desired FDR value.
|
401
|
+
est_fdr: Desired FDR value.
|
401
402
|
args: Passed to ``az.summary``
|
402
403
|
kwargs: Passed to ``az.summary``
|
403
404
|
|
@@ -637,8 +638,8 @@ class CompositionalModel2(ABC):
|
|
637
638
|
effect_df: Effect summary, see ``summary_prepare``
|
638
639
|
model_type: String indicating the model type ("classic" or "tree_agg")
|
639
640
|
select_type: String indicating the type of spike_and_slab selection ("spikeslab" or "sslasso")
|
640
|
-
target_fdr: Desired FDR value.
|
641
|
-
node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
|
641
|
+
target_fdr: Desired FDR value.
|
642
|
+
node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
|
642
643
|
|
643
644
|
Returns:
|
644
645
|
pd.DataFrame: effect DataFrame with inclusion probability, final parameters, expected sample.
|
@@ -790,8 +791,8 @@ class CompositionalModel2(ABC):
|
|
790
791
|
|
791
792
|
Args:
|
792
793
|
data: AnnData object or MuData object.
|
793
|
-
extended: If True, return the extended summary with additional statistics.
|
794
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
794
|
+
extended: If True, return the extended summary with additional statistics.
|
795
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
795
796
|
args: Passed to az.summary
|
796
797
|
kwargs: Passed to az.summary
|
797
798
|
|
@@ -809,7 +810,7 @@ class CompositionalModel2(ABC):
|
|
809
810
|
try:
|
810
811
|
sample_adata = data[modality_key]
|
811
812
|
except IndexError:
|
812
|
-
|
813
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
813
814
|
raise
|
814
815
|
if isinstance(data, AnnData):
|
815
816
|
sample_adata = data
|
@@ -848,10 +849,10 @@ class CompositionalModel2(ABC):
|
|
848
849
|
table.add_column("Name", justify="left", style="cyan")
|
849
850
|
table.add_column("Value", justify="left")
|
850
851
|
table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
|
851
|
-
table.add_row("Reference cell type", "
|
852
|
-
table.add_row("Formula", "
|
852
|
+
table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
|
853
|
+
table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
|
853
854
|
if extended:
|
854
|
-
table.add_row("Reference index", "
|
855
|
+
table.add_row("Reference index", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_index"])))
|
855
856
|
if select_type == "spikeslab":
|
856
857
|
table.add_row(
|
857
858
|
"Spike-and-slab threshold",
|
@@ -934,7 +935,7 @@ class CompositionalModel2(ABC):
|
|
934
935
|
|
935
936
|
Args:
|
936
937
|
data: AnnData object or MuData object.
|
937
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
938
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
938
939
|
|
939
940
|
Returns:
|
940
941
|
pd.DataFrame: Intercept data frame.
|
@@ -953,7 +954,7 @@ class CompositionalModel2(ABC):
|
|
953
954
|
try:
|
954
955
|
sample_adata = data[modality_key]
|
955
956
|
except IndexError:
|
956
|
-
|
957
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
957
958
|
raise
|
958
959
|
if isinstance(data, AnnData):
|
959
960
|
sample_adata = data
|
@@ -965,7 +966,7 @@ class CompositionalModel2(ABC):
|
|
965
966
|
|
966
967
|
Args:
|
967
968
|
data: AnnData object or MuData object.
|
968
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
969
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
969
970
|
|
970
971
|
Returns:
|
971
972
|
pd.DataFrame: Effect data frame.
|
@@ -984,7 +985,7 @@ class CompositionalModel2(ABC):
|
|
984
985
|
try:
|
985
986
|
sample_adata = data[modality_key]
|
986
987
|
except IndexError:
|
987
|
-
|
988
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
988
989
|
raise
|
989
990
|
if isinstance(data, AnnData):
|
990
991
|
sample_adata = data
|
@@ -1007,7 +1008,7 @@ class CompositionalModel2(ABC):
|
|
1007
1008
|
|
1008
1009
|
Args:
|
1009
1010
|
data: AnnData object or MuData object.
|
1010
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1011
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1011
1012
|
|
1012
1013
|
Returns:
|
1013
1014
|
pd.DataFrame: Node effect data frame.
|
@@ -1032,7 +1033,7 @@ class CompositionalModel2(ABC):
|
|
1032
1033
|
try:
|
1033
1034
|
sample_adata = data[modality_key]
|
1034
1035
|
except IndexError:
|
1035
|
-
|
1036
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1036
1037
|
raise
|
1037
1038
|
if isinstance(data, AnnData):
|
1038
1039
|
sample_adata = data
|
@@ -1046,7 +1047,7 @@ class CompositionalModel2(ABC):
|
|
1046
1047
|
Args:
|
1047
1048
|
data: AnnData object or MuData object.
|
1048
1049
|
est_fdr: Desired FDR value.
|
1049
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1050
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1050
1051
|
args: passed to self.summary_prepare
|
1051
1052
|
kwargs: passed to self.summary_prepare
|
1052
1053
|
|
@@ -1057,7 +1058,7 @@ class CompositionalModel2(ABC):
|
|
1057
1058
|
try:
|
1058
1059
|
sample_adata = data[modality_key]
|
1059
1060
|
except IndexError:
|
1060
|
-
|
1061
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1061
1062
|
raise
|
1062
1063
|
if isinstance(data, AnnData):
|
1063
1064
|
sample_adata = data
|
@@ -1080,8 +1081,8 @@ class CompositionalModel2(ABC):
|
|
1080
1081
|
|
1081
1082
|
Args:
|
1082
1083
|
data: AnnData object or MuData object.
|
1083
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1084
|
-
est_fdr: Estimated false discovery rate. Must be between 0 and 1.
|
1084
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1085
|
+
est_fdr: Estimated false discovery rate. Must be between 0 and 1.
|
1085
1086
|
|
1086
1087
|
Returns:
|
1087
1088
|
pd.Series: Credible effect decision series which includes boolean values indicate whether effects are credible under inc_prob_threshold.
|
@@ -1090,7 +1091,7 @@ class CompositionalModel2(ABC):
|
|
1090
1091
|
try:
|
1091
1092
|
sample_adata = data[modality_key]
|
1092
1093
|
except IndexError:
|
1093
|
-
|
1094
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1094
1095
|
raise
|
1095
1096
|
if isinstance(data, AnnData):
|
1096
1097
|
sample_adata = data
|
@@ -1143,10 +1144,10 @@ class CompositionalModel2(ABC):
|
|
1143
1144
|
type_names: The names of all cell types
|
1144
1145
|
title: Plot title, usually the covariate's name
|
1145
1146
|
level_names: Names of the covariate's levels
|
1146
|
-
figsize: Figure size
|
1147
|
-
dpi:
|
1148
|
-
palette: The color map for the barplot.
|
1149
|
-
show_legend: If True, adds a legend.
|
1147
|
+
figsize: Figure size (matplotlib).
|
1148
|
+
dpi: Resolution in DPI (matplotlib).
|
1149
|
+
palette: The color map for the barplot.
|
1150
|
+
show_legend: If True, adds a legend.
|
1150
1151
|
|
1151
1152
|
Returns:
|
1152
1153
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1205,12 +1206,12 @@ class CompositionalModel2(ABC):
|
|
1205
1206
|
Args:
|
1206
1207
|
data: AnnData object or MuData object.
|
1207
1208
|
feature_name: The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
|
1208
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1209
|
-
figsize: Figure size.
|
1210
|
-
dpi: Dpi setting.
|
1211
|
-
palette: The matplotlib color map for the barplot.
|
1212
|
-
show_legend: If True, adds a legend.
|
1213
|
-
level_order: Custom ordering of bars on the x-axis.
|
1209
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1210
|
+
figsize: Figure size.
|
1211
|
+
dpi: Dpi setting.
|
1212
|
+
palette: The matplotlib color map for the barplot.
|
1213
|
+
show_legend: If True, adds a legend.
|
1214
|
+
level_order: Custom ordering of bars on the x-axis.
|
1214
1215
|
|
1215
1216
|
Returns:
|
1216
1217
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1311,20 +1312,17 @@ class CompositionalModel2(ABC):
|
|
1311
1312
|
|
1312
1313
|
Args:
|
1313
1314
|
data: AnnData object or MuData object.
|
1314
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1315
|
-
covariates: The name of the covariates in data.obs to plot.
|
1316
|
-
parameter: The parameter in effect summary to plot.
|
1315
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1316
|
+
covariates: The name of the covariates in data.obs to plot.
|
1317
|
+
parameter: The parameter in effect summary to plot.
|
1317
1318
|
plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
|
1318
|
-
Defaults to True.
|
1319
1319
|
plot_zero_covariate: If True, plot covariate that have all zero effects. If False, do not plot.
|
1320
|
-
Defaults to True.
|
1321
1320
|
plot_zero_cell_type: If True, plot cell type that have zero effect. If False, do not plot.
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
args_barplot: Arguments passed to sns.barplot. Defaults to None.
|
1321
|
+
figsize: Figure size.
|
1322
|
+
dpi: Figure size.
|
1323
|
+
palette: The seaborn color map for the barplot.
|
1324
|
+
level_order: Custom ordering of bars on the x-axis.
|
1325
|
+
args_barplot: Arguments passed to sns.barplot.
|
1328
1326
|
|
1329
1327
|
Returns:
|
1330
1328
|
Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
|
@@ -1519,20 +1517,19 @@ class CompositionalModel2(ABC):
|
|
1519
1517
|
Args:
|
1520
1518
|
data: AnnData object or MuData object
|
1521
1519
|
feature_name: The name of the feature in data.obs to plot
|
1522
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1520
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1523
1521
|
y_scale: Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count),
|
1524
1522
|
"log10" - log10(count), "count" - absolute abundance (cell counts).
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
level_order: Custom ordering of bars on the x-axis. Defaults to None.
|
1523
|
+
plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
|
1524
|
+
add_dots: If True, overlay a scatterplot with one dot for each data point.
|
1525
|
+
cell_types: Subset of cell types that should be plotted.
|
1526
|
+
args_boxplot: Arguments passed to sns.boxplot.
|
1527
|
+
args_swarmplot: Arguments passed to sns.swarmplot.
|
1528
|
+
figsize: Figure size.
|
1529
|
+
dpi: Dpi setting.
|
1530
|
+
palette: The seaborn color map for the barplot.
|
1531
|
+
show_legend: If True, adds a legend.
|
1532
|
+
level_order: Custom ordering of bars on the x-axis.
|
1536
1533
|
|
1537
1534
|
Returns:
|
1538
1535
|
Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
|
@@ -1758,16 +1755,14 @@ class CompositionalModel2(ABC):
|
|
1758
1755
|
|
1759
1756
|
Args:
|
1760
1757
|
data: AnnData or MuData object.
|
1761
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1762
|
-
|
1763
|
-
|
1764
|
-
default_color: Bar color for all non-minimal cell types. Defaults to "Grey".
|
1758
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1759
|
+
abundant_threshold: Presence threshold for abundant cell types.
|
1760
|
+
default_color: Bar color for all non-minimal cell types.
|
1765
1761
|
abundant_color: Bar color for cell types with abundant percentage larger than abundant_threshold.
|
1766
|
-
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
ax: A matplotlib axes object. Only works if plotting a single component. Defaults to None.
|
1762
|
+
label_cell_types: Label dots with cell type names.
|
1763
|
+
figsize: Figure size.
|
1764
|
+
dpi: Dpi setting.
|
1765
|
+
ax: A matplotlib axes object. Only works if plotting a single component.
|
1771
1766
|
|
1772
1767
|
Returns:
|
1773
1768
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1882,22 +1877,16 @@ class CompositionalModel2(ABC):
|
|
1882
1877
|
Args:
|
1883
1878
|
data: AnnData object or MuData object.
|
1884
1879
|
modality_key: If data is a MuData object, specify which modality to use.
|
1885
|
-
Defaults to "coda".
|
1886
1880
|
tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
|
1887
|
-
Defaults to "tree".
|
1888
1881
|
tight_text: When False, boundaries of the text are approximated according to general font metrics,
|
1889
1882
|
producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
|
1890
|
-
Default to False.
|
1891
1883
|
show_scale: Include the scale legend in the tree image or not.
|
1892
|
-
Defaults to False.
|
1893
1884
|
show: If True, plot the tree inline. If false, return tree and tree_style objects.
|
1894
|
-
Defaults to True.
|
1895
1885
|
file_name: Path to the output image file. Valid extensions are .SVG, .PDF, .PNG.
|
1896
1886
|
Output image can be saved whether show is True or not.
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
dpi: Dots per inches. Defaults to 100.
|
1887
|
+
units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
|
1888
|
+
figsize: Figure size.
|
1889
|
+
dpi: Dots per inches.
|
1901
1890
|
|
1902
1891
|
Returns:
|
1903
1892
|
Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`) or plot the tree inline (`show = False`)
|
@@ -1972,23 +1961,18 @@ class CompositionalModel2(ABC):
|
|
1972
1961
|
data: AnnData object or MuData object.
|
1973
1962
|
covariate: The covariate, whose effects should be plotted.
|
1974
1963
|
modality_key: If data is a MuData object, specify which modality to use.
|
1975
|
-
Defaults to "coda".
|
1976
1964
|
tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
|
1977
|
-
Defaults to "tree".
|
1978
1965
|
show_legend: If show legend of nodes significant effects or not.
|
1979
1966
|
Defaults to False if show_leaf_effects is True.
|
1980
1967
|
show_leaf_effects: If True, plot bar plots which indicate leave-level significant effects.
|
1981
|
-
Defaults to False.
|
1982
1968
|
tight_text: When False, boundaries of the text are approximated according to general font metrics,
|
1983
1969
|
producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
|
1984
|
-
|
1985
|
-
|
1986
|
-
show: If True, plot the tree inline. If false, return tree and tree_style objects. Defaults to True.
|
1970
|
+
show_scale: Include the scale legend in the tree image or not.
|
1971
|
+
show: If True, plot the tree inline. If false, return tree and tree_style objects.
|
1987
1972
|
file_name: Path to the output image file. valid extensions are .SVG, .PDF, .PNG. Output image can be saved whether show is True or not.
|
1988
|
-
|
1989
|
-
|
1990
|
-
|
1991
|
-
dpi: Dots per inches. Defaults to 100.
|
1973
|
+
units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
|
1974
|
+
figsize: Figure size.
|
1975
|
+
dpi: Dots per inches.
|
1992
1976
|
|
1993
1977
|
Returns:
|
1994
1978
|
Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`)
|
@@ -2026,7 +2010,7 @@ class CompositionalModel2(ABC):
|
|
2026
2010
|
if show_legend is None:
|
2027
2011
|
show_legend = not show_leaf_effects
|
2028
2012
|
elif show_legend:
|
2029
|
-
|
2013
|
+
logger.info("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
|
2030
2014
|
|
2031
2015
|
if isinstance(tree, str):
|
2032
2016
|
tree = data.uns[tree]
|
@@ -2171,12 +2155,10 @@ class CompositionalModel2(ABC):
|
|
2171
2155
|
effect_name: The name of the effect results in .varm of aggregated sample-level AnnData to plot
|
2172
2156
|
cluster_key: The cluster information in .obs of cell-level AnnData (default is data['rna']).
|
2173
2157
|
To assign cell types' effects to original cells.
|
2174
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
2158
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
2175
2159
|
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
2176
|
-
|
2177
|
-
show: Whether to display the figure or return axis. Defaults to None.
|
2160
|
+
show: Whether to display the figure or return axis.
|
2178
2161
|
ax: A matplotlib axes object. Only works if plotting a single component.
|
2179
|
-
Defaults to None.
|
2180
2162
|
**kwargs: All other keyword arguments are passed to `scanpy.plot.umap()`
|
2181
2163
|
|
2182
2164
|
Returns:
|
@@ -2254,7 +2236,7 @@ class CompositionalModel2(ABC):
|
|
2254
2236
|
|
2255
2237
|
|
2256
2238
|
def get_a(
|
2257
|
-
tree: tt.
|
2239
|
+
tree: tt.core.ToyTree,
|
2258
2240
|
) -> tuple[np.ndarray, int]:
|
2259
2241
|
"""Calculate ancestor matrix from a toytree tree
|
2260
2242
|
|
@@ -2293,7 +2275,7 @@ def get_a(
|
|
2293
2275
|
return A, n_nodes - 1
|
2294
2276
|
|
2295
2277
|
|
2296
|
-
def collapse_singularities(tree: tt.
|
2278
|
+
def collapse_singularities(tree: tt.core.ToyTree) -> tt.core.ToyTree:
|
2297
2279
|
"""Collapses (deletes) nodes in a toytree tree that are singularities (have only one child).
|
2298
2280
|
|
2299
2281
|
Args:
|
@@ -2509,15 +2491,14 @@ def import_tree(
|
|
2509
2491
|
|
2510
2492
|
Args:
|
2511
2493
|
data: A tascCODA-compatible data object.
|
2512
|
-
modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
|
2513
|
-
modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
|
2514
|
-
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
2515
|
-
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2516
|
-
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2494
|
+
modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
|
2495
|
+
modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
|
2496
|
+
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
2497
|
+
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2498
|
+
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2517
2499
|
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
2518
|
-
Defaults to True.
|
2519
2500
|
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`.
|
2520
|
-
If `data` is MuData, save tree in data[modality_2].
|
2501
|
+
If `data` is MuData, save tree in data[modality_2].
|
2521
2502
|
|
2522
2503
|
Returns:
|
2523
2504
|
Updates data with the following:
|
@@ -2538,10 +2519,10 @@ def import_tree(
|
|
2538
2519
|
data_1 = data[modality_1]
|
2539
2520
|
data_2 = data[modality_2]
|
2540
2521
|
except KeyError as name:
|
2541
|
-
|
2522
|
+
logger.error(f"No {name} slot in MuData")
|
2542
2523
|
raise
|
2543
2524
|
except IndexError:
|
2544
|
-
|
2525
|
+
logger.error("Please specify modality_1 and modality_2 to indicate modalities in MuData")
|
2545
2526
|
raise
|
2546
2527
|
else:
|
2547
2528
|
data_1 = data
|
@@ -2613,43 +2594,38 @@ def from_scanpy(
|
|
2613
2594
|
Returns:
|
2614
2595
|
AnnData: A data set with cells aggregated to the (sample x cell type) level
|
2615
2596
|
"""
|
2616
|
-
if isinstance(sample_identifier, str)
|
2617
|
-
|
2597
|
+
sample_identifier = [sample_identifier] if isinstance(sample_identifier, str) else sample_identifier
|
2598
|
+
covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
|
2618
2599
|
|
2619
|
-
if
|
2600
|
+
if isinstance(sample_identifier, list):
|
2620
2601
|
adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
|
2621
2602
|
sample_identifier = "scCODA_sample_id"
|
2622
|
-
else:
|
2623
|
-
sample_identifier = sample_identifier[0]
|
2624
2603
|
|
2625
|
-
|
2626
|
-
ct_count_data =
|
2627
|
-
ct_count_data = ct_count_data.fillna(0)
|
2628
|
-
|
2629
|
-
# get covariates from different sources
|
2604
|
+
groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
|
2605
|
+
ct_count_data = groups.unstack(level=cell_type_identifier).fillna(0)
|
2630
2606
|
covariate_df_ = pd.DataFrame(index=ct_count_data.index)
|
2631
2607
|
|
2632
2608
|
if covariate_uns is not None:
|
2633
2609
|
covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns], index=ct_count_data.index)
|
2634
|
-
covariate_df_ =
|
2610
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df_uns], axis=1)
|
2635
2611
|
|
2636
2612
|
if covariate_obs:
|
2637
|
-
|
2638
|
-
|
2639
|
-
|
2640
|
-
|
2641
|
-
|
2642
|
-
|
2643
|
-
|
2644
|
-
|
2645
|
-
covariate_df_ = covariate_df_.join(covariate_df_obs, how="left")
|
2613
|
+
unique_check = adata.obs.groupby(sample_identifier).nunique()
|
2614
|
+
for c in covariate_obs.copy():
|
2615
|
+
if unique_check[c].max() != 1:
|
2616
|
+
logger.warning(f"Covariate {c} has non-unique values for batch! Skipping...")
|
2617
|
+
covariate_obs.remove(c)
|
2618
|
+
if covariate_obs:
|
2619
|
+
covariate_df_obs = adata.obs.groupby(sample_identifier).first()[covariate_obs]
|
2620
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df_obs], axis=1)
|
2646
2621
|
|
2647
2622
|
if covariate_df is not None:
|
2648
|
-
if
|
2649
|
-
raise ValueError("
|
2650
|
-
covariate_df_ = covariate_df_.
|
2623
|
+
if set(covariate_df.index) != set(ct_count_data.index):
|
2624
|
+
raise ValueError("Mismatch between sample names in anndata and covariate_df!")
|
2625
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df.reindex(ct_count_data.index)], axis=1)
|
2651
2626
|
|
2652
|
-
var_dat = ct_count_data.sum(
|
2627
|
+
var_dat = ct_count_data.sum().rename("n_cells").to_frame()
|
2653
2628
|
var_dat.index = var_dat.index.astype(str)
|
2629
|
+
covariate_df_.index = covariate_df_.index.astype(str)
|
2654
2630
|
|
2655
2631
|
return AnnData(X=ct_count_data.values, var=var_dat, obs=covariate_df_)
|
pertpy/tools/_coda/_sccoda.py
CHANGED
@@ -9,6 +9,7 @@ import numpyro as npy
|
|
9
9
|
import numpyro.distributions as npd
|
10
10
|
from anndata import AnnData
|
11
11
|
from jax import config, random
|
12
|
+
from lamin_utils import logger
|
12
13
|
from mudata import MuData
|
13
14
|
from numpyro.infer import Predictive
|
14
15
|
from rich import print
|
@@ -73,13 +74,13 @@ class Sccoda(CompositionalModel2):
|
|
73
74
|
adata: AnnData object.
|
74
75
|
type : Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
|
75
76
|
generate_sample_level: Whether to generate an AnnData object on the sample level or create an empty AnnData object.
|
76
|
-
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
77
|
-
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
78
|
-
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
79
|
-
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
80
|
-
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
81
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
82
|
-
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
77
|
+
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
78
|
+
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
79
|
+
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
80
|
+
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
81
|
+
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
82
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
83
|
+
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
83
84
|
|
84
85
|
Returns:
|
85
86
|
MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
|
@@ -127,10 +128,10 @@ class Sccoda(CompositionalModel2):
|
|
127
128
|
Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
|
128
129
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
129
130
|
reference_cell_type: Column name that sets the reference cell type.
|
130
|
-
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
131
|
+
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
131
132
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
|
132
|
-
to be considered as a possible reference cell type.
|
133
|
-
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
133
|
+
to be considered as a possible reference cell type.
|
134
|
+
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
134
135
|
|
135
136
|
Returns:
|
136
137
|
Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
|
@@ -309,10 +310,10 @@ class Sccoda(CompositionalModel2):
|
|
309
310
|
|
310
311
|
Args:
|
311
312
|
data: AnnData object or MuData object.
|
312
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
313
|
-
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
314
|
-
num_prior_samples: Number of prior samples calculated.
|
315
|
-
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
313
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
314
|
+
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
315
|
+
num_prior_samples: Number of prior samples calculated.
|
316
|
+
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
316
317
|
|
317
318
|
Returns:
|
318
319
|
az.InferenceData: arviz_data with all MCMC information
|
@@ -335,7 +336,7 @@ class Sccoda(CompositionalModel2):
|
|
335
336
|
try:
|
336
337
|
sample_adata = data[modality_key]
|
337
338
|
except IndexError:
|
338
|
-
|
339
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
339
340
|
raise
|
340
341
|
if isinstance(data, AnnData):
|
341
342
|
sample_adata = data
|