pertpy 0.7.0__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pertpy/__init__.py +2 -1
- pertpy/data/__init__.py +61 -0
- pertpy/data/_dataloader.py +27 -23
- pertpy/data/_datasets.py +58 -0
- pertpy/metadata/__init__.py +2 -0
- pertpy/metadata/_cell_line.py +39 -70
- pertpy/metadata/_compound.py +3 -4
- pertpy/metadata/_drug.py +2 -6
- pertpy/metadata/_look_up.py +38 -51
- pertpy/metadata/_metadata.py +7 -10
- pertpy/metadata/_moa.py +2 -6
- pertpy/plot/__init__.py +0 -5
- pertpy/preprocessing/__init__.py +2 -0
- pertpy/preprocessing/_guide_rna.py +6 -7
- pertpy/tools/__init__.py +67 -6
- pertpy/tools/_augur.py +14 -15
- pertpy/tools/_cinemaot.py +2 -2
- pertpy/tools/_coda/_base_coda.py +118 -142
- pertpy/tools/_coda/_sccoda.py +16 -15
- pertpy/tools/_coda/_tasccoda.py +21 -22
- pertpy/tools/_dialogue.py +18 -23
- pertpy/tools/_differential_gene_expression/__init__.py +20 -0
- pertpy/tools/_differential_gene_expression/_base.py +657 -0
- pertpy/tools/_differential_gene_expression/_checks.py +41 -0
- pertpy/tools/_differential_gene_expression/_dge_comparison.py +86 -0
- pertpy/tools/_differential_gene_expression/_edger.py +125 -0
- pertpy/tools/_differential_gene_expression/_formulaic.py +189 -0
- pertpy/tools/_differential_gene_expression/_pydeseq2.py +95 -0
- pertpy/tools/_differential_gene_expression/_simple_tests.py +162 -0
- pertpy/tools/_differential_gene_expression/_statsmodels.py +72 -0
- pertpy/tools/_distances/_distance_tests.py +21 -16
- pertpy/tools/_distances/_distances.py +406 -70
- pertpy/tools/_enrichment.py +10 -15
- pertpy/tools/_kernel_pca.py +1 -1
- pertpy/tools/_milo.py +77 -54
- pertpy/tools/_mixscape.py +15 -11
- pertpy/tools/_perturbation_space/_clustering.py +5 -2
- pertpy/tools/_perturbation_space/_comparison.py +112 -0
- pertpy/tools/_perturbation_space/_discriminator_classifiers.py +21 -23
- pertpy/tools/_perturbation_space/_perturbation_space.py +23 -21
- pertpy/tools/_perturbation_space/_simple.py +3 -3
- pertpy/tools/_scgen/__init__.py +1 -1
- pertpy/tools/_scgen/_base_components.py +2 -3
- pertpy/tools/_scgen/_scgen.py +33 -28
- pertpy/tools/_scgen/_utils.py +2 -2
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/METADATA +32 -14
- pertpy-0.9.1.dist-info/RECORD +57 -0
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/WHEEL +1 -1
- pertpy/plot/_augur.py +0 -171
- pertpy/plot/_coda.py +0 -601
- pertpy/plot/_guide_rna.py +0 -64
- pertpy/plot/_milopy.py +0 -209
- pertpy/plot/_mixscape.py +0 -355
- pertpy/tools/_differential_gene_expression.py +0 -325
- pertpy-0.7.0.dist-info/RECORD +0 -53
- {pertpy-0.7.0.dist-info → pertpy-0.9.1.dist-info}/licenses/LICENSE +0 -0
pertpy/tools/_coda/_base_coda.py
CHANGED
@@ -15,6 +15,7 @@ import seaborn as sns
|
|
15
15
|
from adjustText import adjust_text
|
16
16
|
from anndata import AnnData
|
17
17
|
from jax import config, random
|
18
|
+
from lamin_utils import logger
|
18
19
|
from matplotlib import cm, rcParams
|
19
20
|
from matplotlib import image as mpimg
|
20
21
|
from matplotlib.colors import ListedColormap
|
@@ -110,9 +111,9 @@ class CompositionalModel2(ABC):
|
|
110
111
|
Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
|
111
112
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
112
113
|
reference_cell_type: Column name that sets the reference cell type.
|
113
|
-
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
114
|
+
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
114
115
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
|
115
|
-
to be considered as a possible reference cell type.
|
116
|
+
to be considered as a possible reference cell type.
|
116
117
|
|
117
118
|
Returns:
|
118
119
|
AnnData object that is ready for CODA models.
|
@@ -148,7 +149,7 @@ class CompositionalModel2(ABC):
|
|
148
149
|
ref_index = np.where(cell_type_disp == min_var)[0][0]
|
149
150
|
|
150
151
|
ref_cell_type = cell_types[ref_index]
|
151
|
-
|
152
|
+
logger.info(f"Automatic reference selection! Reference cell type set to {ref_cell_type}")
|
152
153
|
|
153
154
|
# Column name as reference cell type
|
154
155
|
elif reference_cell_type in cell_types:
|
@@ -160,7 +161,7 @@ class CompositionalModel2(ABC):
|
|
160
161
|
|
161
162
|
# Add pseudocount if zeroes are present.
|
162
163
|
if np.count_nonzero(sample_adata.X) != np.size(sample_adata.X):
|
163
|
-
|
164
|
+
logger.info("Zero counts encountered in data! Added a pseudocount of 0.5.")
|
164
165
|
sample_adata.X[sample_adata.X == 0] = 0.5
|
165
166
|
|
166
167
|
sample_adata.obsm["sample_counts"] = np.sum(sample_adata.X, axis=1)
|
@@ -201,7 +202,7 @@ class CompositionalModel2(ABC):
|
|
201
202
|
sample_adata: anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
|
202
203
|
kernel: A `numpyro.infer.mcmc.MCMCKernel` object
|
203
204
|
rng_key: The rng state used. If None, a random state will be selected
|
204
|
-
copy: Return a copy instead of writing to adata.
|
205
|
+
copy: Return a copy instead of writing to adata.
|
205
206
|
args: Passed to `numpyro.infer.mcmc.MCMC`
|
206
207
|
kwargs: Passed to `numpyro.infer.mcmc.MCMC`
|
207
208
|
|
@@ -237,13 +238,13 @@ class CompositionalModel2(ABC):
|
|
237
238
|
|
238
239
|
acc_rate = np.array(self.mcmc.last_state.mean_accept_prob)
|
239
240
|
if acc_rate < 0.6:
|
240
|
-
|
241
|
-
f"
|
241
|
+
logger.warning(
|
242
|
+
f"Acceptance rate unusually low ({acc_rate} < 0.5)! Results might be incorrect! "
|
242
243
|
f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
|
243
244
|
)
|
244
245
|
if acc_rate > 0.95:
|
245
|
-
|
246
|
-
f"
|
246
|
+
logger.warning(
|
247
|
+
f"Acceptance rate unusually high ({acc_rate} > 0.95)! Results might be incorrect! "
|
247
248
|
f"Please check feasibility of results and re-run the sampling step with a different rng_key if necessary."
|
248
249
|
)
|
249
250
|
|
@@ -286,11 +287,11 @@ class CompositionalModel2(ABC):
|
|
286
287
|
|
287
288
|
Args:
|
288
289
|
data: AnnData object or MuData object.
|
289
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
290
|
-
num_samples: Number of sampled values after burn-in.
|
291
|
-
num_warmup: Number of burn-in (warmup) samples.
|
292
|
-
rng_key: The rng state used.
|
293
|
-
copy: Return a copy instead of writing to adata.
|
290
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
291
|
+
num_samples: Number of sampled values after burn-in.
|
292
|
+
num_warmup: Number of burn-in (warmup) samples.
|
293
|
+
rng_key: The rng state used.
|
294
|
+
copy: Return a copy instead of writing to adata.
|
294
295
|
|
295
296
|
Returns:
|
296
297
|
Calls `self.__run_mcmc`
|
@@ -299,7 +300,7 @@ class CompositionalModel2(ABC):
|
|
299
300
|
try:
|
300
301
|
sample_adata = data[modality_key]
|
301
302
|
except IndexError:
|
302
|
-
|
303
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
303
304
|
raise
|
304
305
|
if isinstance(data, AnnData):
|
305
306
|
sample_adata = data
|
@@ -339,11 +340,11 @@ class CompositionalModel2(ABC):
|
|
339
340
|
|
340
341
|
Args:
|
341
342
|
data: AnnData object or MuData object.
|
342
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
343
|
-
num_samples: Number of sampled values after burn-in.
|
344
|
-
num_warmup: Number of burn-in (warmup) samples.
|
345
|
-
rng_key: The rng state used. If None, a random state will be selected.
|
346
|
-
copy: Return a copy instead of writing to adata.
|
343
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
344
|
+
num_samples: Number of sampled values after burn-in.
|
345
|
+
num_warmup: Number of burn-in (warmup) samples.
|
346
|
+
rng_key: The rng state used. If None, a random state will be selected.
|
347
|
+
copy: Return a copy instead of writing to adata.
|
347
348
|
|
348
349
|
Examples:
|
349
350
|
>>> import pertpy as pt
|
@@ -358,7 +359,7 @@ class CompositionalModel2(ABC):
|
|
358
359
|
try:
|
359
360
|
sample_adata = data[modality_key]
|
360
361
|
except IndexError:
|
361
|
-
|
362
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
362
363
|
raise
|
363
364
|
if isinstance(data, AnnData):
|
364
365
|
sample_adata = data
|
@@ -397,7 +398,7 @@ class CompositionalModel2(ABC):
|
|
397
398
|
|
398
399
|
Args:
|
399
400
|
sample_adata: Anndata object with cell counts as sample_adata.X and covariates saved in sample_adata.obs.
|
400
|
-
est_fdr: Desired FDR value.
|
401
|
+
est_fdr: Desired FDR value.
|
401
402
|
args: Passed to ``az.summary``
|
402
403
|
kwargs: Passed to ``az.summary``
|
403
404
|
|
@@ -637,8 +638,8 @@ class CompositionalModel2(ABC):
|
|
637
638
|
effect_df: Effect summary, see ``summary_prepare``
|
638
639
|
model_type: String indicating the model type ("classic" or "tree_agg")
|
639
640
|
select_type: String indicating the type of spike_and_slab selection ("spikeslab" or "sslasso")
|
640
|
-
target_fdr: Desired FDR value.
|
641
|
-
node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
|
641
|
+
target_fdr: Desired FDR value.
|
642
|
+
node_df: If using tree aggregation, the node-level effect DataFrame must be passed.
|
642
643
|
|
643
644
|
Returns:
|
644
645
|
pd.DataFrame: effect DataFrame with inclusion probability, final parameters, expected sample.
|
@@ -790,8 +791,8 @@ class CompositionalModel2(ABC):
|
|
790
791
|
|
791
792
|
Args:
|
792
793
|
data: AnnData object or MuData object.
|
793
|
-
extended: If True, return the extended summary with additional statistics.
|
794
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
794
|
+
extended: If True, return the extended summary with additional statistics.
|
795
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
795
796
|
args: Passed to az.summary
|
796
797
|
kwargs: Passed to az.summary
|
797
798
|
|
@@ -809,7 +810,7 @@ class CompositionalModel2(ABC):
|
|
809
810
|
try:
|
810
811
|
sample_adata = data[modality_key]
|
811
812
|
except IndexError:
|
812
|
-
|
813
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
813
814
|
raise
|
814
815
|
if isinstance(data, AnnData):
|
815
816
|
sample_adata = data
|
@@ -848,10 +849,10 @@ class CompositionalModel2(ABC):
|
|
848
849
|
table.add_column("Name", justify="left", style="cyan")
|
849
850
|
table.add_column("Value", justify="left")
|
850
851
|
table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
|
851
|
-
table.add_row("Reference cell type", "
|
852
|
-
table.add_row("Formula", "
|
852
|
+
table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
|
853
|
+
table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
|
853
854
|
if extended:
|
854
|
-
table.add_row("Reference index", "
|
855
|
+
table.add_row("Reference index", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_index"])))
|
855
856
|
if select_type == "spikeslab":
|
856
857
|
table.add_row(
|
857
858
|
"Spike-and-slab threshold",
|
@@ -934,7 +935,7 @@ class CompositionalModel2(ABC):
|
|
934
935
|
|
935
936
|
Args:
|
936
937
|
data: AnnData object or MuData object.
|
937
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
938
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
938
939
|
|
939
940
|
Returns:
|
940
941
|
pd.DataFrame: Intercept data frame.
|
@@ -953,7 +954,7 @@ class CompositionalModel2(ABC):
|
|
953
954
|
try:
|
954
955
|
sample_adata = data[modality_key]
|
955
956
|
except IndexError:
|
956
|
-
|
957
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
957
958
|
raise
|
958
959
|
if isinstance(data, AnnData):
|
959
960
|
sample_adata = data
|
@@ -965,7 +966,7 @@ class CompositionalModel2(ABC):
|
|
965
966
|
|
966
967
|
Args:
|
967
968
|
data: AnnData object or MuData object.
|
968
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
969
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
969
970
|
|
970
971
|
Returns:
|
971
972
|
pd.DataFrame: Effect data frame.
|
@@ -984,7 +985,7 @@ class CompositionalModel2(ABC):
|
|
984
985
|
try:
|
985
986
|
sample_adata = data[modality_key]
|
986
987
|
except IndexError:
|
987
|
-
|
988
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
988
989
|
raise
|
989
990
|
if isinstance(data, AnnData):
|
990
991
|
sample_adata = data
|
@@ -1007,7 +1008,7 @@ class CompositionalModel2(ABC):
|
|
1007
1008
|
|
1008
1009
|
Args:
|
1009
1010
|
data: AnnData object or MuData object.
|
1010
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1011
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1011
1012
|
|
1012
1013
|
Returns:
|
1013
1014
|
pd.DataFrame: Node effect data frame.
|
@@ -1032,7 +1033,7 @@ class CompositionalModel2(ABC):
|
|
1032
1033
|
try:
|
1033
1034
|
sample_adata = data[modality_key]
|
1034
1035
|
except IndexError:
|
1035
|
-
|
1036
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1036
1037
|
raise
|
1037
1038
|
if isinstance(data, AnnData):
|
1038
1039
|
sample_adata = data
|
@@ -1046,7 +1047,7 @@ class CompositionalModel2(ABC):
|
|
1046
1047
|
Args:
|
1047
1048
|
data: AnnData object or MuData object.
|
1048
1049
|
est_fdr: Desired FDR value.
|
1049
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1050
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1050
1051
|
args: passed to self.summary_prepare
|
1051
1052
|
kwargs: passed to self.summary_prepare
|
1052
1053
|
|
@@ -1057,7 +1058,7 @@ class CompositionalModel2(ABC):
|
|
1057
1058
|
try:
|
1058
1059
|
sample_adata = data[modality_key]
|
1059
1060
|
except IndexError:
|
1060
|
-
|
1061
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1061
1062
|
raise
|
1062
1063
|
if isinstance(data, AnnData):
|
1063
1064
|
sample_adata = data
|
@@ -1080,8 +1081,8 @@ class CompositionalModel2(ABC):
|
|
1080
1081
|
|
1081
1082
|
Args:
|
1082
1083
|
data: AnnData object or MuData object.
|
1083
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1084
|
-
est_fdr: Estimated false discovery rate. Must be between 0 and 1.
|
1084
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1085
|
+
est_fdr: Estimated false discovery rate. Must be between 0 and 1.
|
1085
1086
|
|
1086
1087
|
Returns:
|
1087
1088
|
pd.Series: Credible effect decision series which includes boolean values indicate whether effects are credible under inc_prob_threshold.
|
@@ -1090,7 +1091,7 @@ class CompositionalModel2(ABC):
|
|
1090
1091
|
try:
|
1091
1092
|
sample_adata = data[modality_key]
|
1092
1093
|
except IndexError:
|
1093
|
-
|
1094
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
1094
1095
|
raise
|
1095
1096
|
if isinstance(data, AnnData):
|
1096
1097
|
sample_adata = data
|
@@ -1143,10 +1144,10 @@ class CompositionalModel2(ABC):
|
|
1143
1144
|
type_names: The names of all cell types
|
1144
1145
|
title: Plot title, usually the covariate's name
|
1145
1146
|
level_names: Names of the covariate's levels
|
1146
|
-
figsize: Figure size
|
1147
|
-
dpi:
|
1148
|
-
palette: The color map for the barplot.
|
1149
|
-
show_legend: If True, adds a legend.
|
1147
|
+
figsize: Figure size (matplotlib).
|
1148
|
+
dpi: Resolution in DPI (matplotlib).
|
1149
|
+
palette: The color map for the barplot.
|
1150
|
+
show_legend: If True, adds a legend.
|
1150
1151
|
|
1151
1152
|
Returns:
|
1152
1153
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1205,12 +1206,12 @@ class CompositionalModel2(ABC):
|
|
1205
1206
|
Args:
|
1206
1207
|
data: AnnData object or MuData object.
|
1207
1208
|
feature_name: The name of the covariate to plot. If feature_name=="samples", one bar for every sample will be plotted
|
1208
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1209
|
-
figsize: Figure size.
|
1210
|
-
dpi: Dpi setting.
|
1211
|
-
palette: The matplotlib color map for the barplot.
|
1212
|
-
show_legend: If True, adds a legend.
|
1213
|
-
level_order: Custom ordering of bars on the x-axis.
|
1209
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1210
|
+
figsize: Figure size.
|
1211
|
+
dpi: Dpi setting.
|
1212
|
+
palette: The matplotlib color map for the barplot.
|
1213
|
+
show_legend: If True, adds a legend.
|
1214
|
+
level_order: Custom ordering of bars on the x-axis.
|
1214
1215
|
|
1215
1216
|
Returns:
|
1216
1217
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1311,20 +1312,17 @@ class CompositionalModel2(ABC):
|
|
1311
1312
|
|
1312
1313
|
Args:
|
1313
1314
|
data: AnnData object or MuData object.
|
1314
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1315
|
-
covariates: The name of the covariates in data.obs to plot.
|
1316
|
-
parameter: The parameter in effect summary to plot.
|
1315
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1316
|
+
covariates: The name of the covariates in data.obs to plot.
|
1317
|
+
parameter: The parameter in effect summary to plot.
|
1317
1318
|
plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
|
1318
|
-
Defaults to True.
|
1319
1319
|
plot_zero_covariate: If True, plot covariate that have all zero effects. If False, do not plot.
|
1320
|
-
Defaults to True.
|
1321
1320
|
plot_zero_cell_type: If True, plot cell type that have zero effect. If False, do not plot.
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
args_barplot: Arguments passed to sns.barplot. Defaults to None.
|
1321
|
+
figsize: Figure size.
|
1322
|
+
dpi: Figure size.
|
1323
|
+
palette: The seaborn color map for the barplot.
|
1324
|
+
level_order: Custom ordering of bars on the x-axis.
|
1325
|
+
args_barplot: Arguments passed to sns.barplot.
|
1328
1326
|
|
1329
1327
|
Returns:
|
1330
1328
|
Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
|
@@ -1519,20 +1517,19 @@ class CompositionalModel2(ABC):
|
|
1519
1517
|
Args:
|
1520
1518
|
data: AnnData object or MuData object
|
1521
1519
|
feature_name: The name of the feature in data.obs to plot
|
1522
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1520
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1523
1521
|
y_scale: Transformation to of cell counts. Options: "relative" - Relative abundance, "log" - log(count),
|
1524
1522
|
"log10" - log10(count), "count" - absolute abundance (cell counts).
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
level_order: Custom ordering of bars on the x-axis. Defaults to None.
|
1523
|
+
plot_facets: If False, plot cell types on the x-axis. If True, plot as facets.
|
1524
|
+
add_dots: If True, overlay a scatterplot with one dot for each data point.
|
1525
|
+
cell_types: Subset of cell types that should be plotted.
|
1526
|
+
args_boxplot: Arguments passed to sns.boxplot.
|
1527
|
+
args_swarmplot: Arguments passed to sns.swarmplot.
|
1528
|
+
figsize: Figure size.
|
1529
|
+
dpi: Dpi setting.
|
1530
|
+
palette: The seaborn color map for the barplot.
|
1531
|
+
show_legend: If True, adds a legend.
|
1532
|
+
level_order: Custom ordering of bars on the x-axis.
|
1536
1533
|
|
1537
1534
|
Returns:
|
1538
1535
|
Depending on `plot_facets`, returns a :class:`~matplotlib.axes.Axes` (`plot_facets = False`)
|
@@ -1758,16 +1755,14 @@ class CompositionalModel2(ABC):
|
|
1758
1755
|
|
1759
1756
|
Args:
|
1760
1757
|
data: AnnData or MuData object.
|
1761
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
1762
|
-
|
1763
|
-
|
1764
|
-
default_color: Bar color for all non-minimal cell types. Defaults to "Grey".
|
1758
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
1759
|
+
abundant_threshold: Presence threshold for abundant cell types.
|
1760
|
+
default_color: Bar color for all non-minimal cell types.
|
1765
1761
|
abundant_color: Bar color for cell types with abundant percentage larger than abundant_threshold.
|
1766
|
-
|
1767
|
-
|
1768
|
-
|
1769
|
-
|
1770
|
-
ax: A matplotlib axes object. Only works if plotting a single component. Defaults to None.
|
1762
|
+
label_cell_types: Label dots with cell type names.
|
1763
|
+
figsize: Figure size.
|
1764
|
+
dpi: Dpi setting.
|
1765
|
+
ax: A matplotlib axes object. Only works if plotting a single component.
|
1771
1766
|
|
1772
1767
|
Returns:
|
1773
1768
|
A :class:`~matplotlib.axes.Axes` object
|
@@ -1882,22 +1877,16 @@ class CompositionalModel2(ABC):
|
|
1882
1877
|
Args:
|
1883
1878
|
data: AnnData object or MuData object.
|
1884
1879
|
modality_key: If data is a MuData object, specify which modality to use.
|
1885
|
-
Defaults to "coda".
|
1886
1880
|
tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
|
1887
|
-
Defaults to "tree".
|
1888
1881
|
tight_text: When False, boundaries of the text are approximated according to general font metrics,
|
1889
1882
|
producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
|
1890
|
-
Default to False.
|
1891
1883
|
show_scale: Include the scale legend in the tree image or not.
|
1892
|
-
Defaults to False.
|
1893
1884
|
show: If True, plot the tree inline. If false, return tree and tree_style objects.
|
1894
|
-
Defaults to True.
|
1895
1885
|
file_name: Path to the output image file. Valid extensions are .SVG, .PDF, .PNG.
|
1896
1886
|
Output image can be saved whether show is True or not.
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
dpi: Dots per inches. Defaults to 100.
|
1887
|
+
units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
|
1888
|
+
figsize: Figure size.
|
1889
|
+
dpi: Dots per inches.
|
1901
1890
|
|
1902
1891
|
Returns:
|
1903
1892
|
Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`) or plot the tree inline (`show = False`)
|
@@ -1972,23 +1961,18 @@ class CompositionalModel2(ABC):
|
|
1972
1961
|
data: AnnData object or MuData object.
|
1973
1962
|
covariate: The covariate, whose effects should be plotted.
|
1974
1963
|
modality_key: If data is a MuData object, specify which modality to use.
|
1975
|
-
Defaults to "coda".
|
1976
1964
|
tree: A ete3 tree object or a str to indicate the tree stored in `.uns`.
|
1977
|
-
Defaults to "tree".
|
1978
1965
|
show_legend: If show legend of nodes significant effects or not.
|
1979
1966
|
Defaults to False if show_leaf_effects is True.
|
1980
1967
|
show_leaf_effects: If True, plot bar plots which indicate leave-level significant effects.
|
1981
|
-
Defaults to False.
|
1982
1968
|
tight_text: When False, boundaries of the text are approximated according to general font metrics,
|
1983
1969
|
producing slightly worse aligned text faces but improving the performance of tree visualization in scenes with a lot of text faces.
|
1984
|
-
|
1985
|
-
|
1986
|
-
show: If True, plot the tree inline. If false, return tree and tree_style objects. Defaults to True.
|
1970
|
+
show_scale: Include the scale legend in the tree image or not.
|
1971
|
+
show: If True, plot the tree inline. If false, return tree and tree_style objects.
|
1987
1972
|
file_name: Path to the output image file. valid extensions are .SVG, .PDF, .PNG. Output image can be saved whether show is True or not.
|
1988
|
-
|
1989
|
-
|
1990
|
-
|
1991
|
-
dpi: Dots per inches. Defaults to 100.
|
1973
|
+
units: Unit of image sizes. “px”: pixels, “mm”: millimeters, “in”: inches.
|
1974
|
+
figsize: Figure size.
|
1975
|
+
dpi: Dots per inches.
|
1992
1976
|
|
1993
1977
|
Returns:
|
1994
1978
|
Depending on `show`, returns :class:`ete3.TreeNode` and :class:`ete3.TreeStyle` (`show = False`)
|
@@ -2026,7 +2010,7 @@ class CompositionalModel2(ABC):
|
|
2026
2010
|
if show_legend is None:
|
2027
2011
|
show_legend = not show_leaf_effects
|
2028
2012
|
elif show_legend:
|
2029
|
-
|
2013
|
+
logger.info("Tree leaves and leaf effect bars won't be aligned when legend is shown!")
|
2030
2014
|
|
2031
2015
|
if isinstance(tree, str):
|
2032
2016
|
tree = data.uns[tree]
|
@@ -2171,12 +2155,10 @@ class CompositionalModel2(ABC):
|
|
2171
2155
|
effect_name: The name of the effect results in .varm of aggregated sample-level AnnData to plot
|
2172
2156
|
cluster_key: The cluster information in .obs of cell-level AnnData (default is data['rna']).
|
2173
2157
|
To assign cell types' effects to original cells.
|
2174
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
2158
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
2175
2159
|
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
2176
|
-
|
2177
|
-
show: Whether to display the figure or return axis. Defaults to None.
|
2160
|
+
show: Whether to display the figure or return axis.
|
2178
2161
|
ax: A matplotlib axes object. Only works if plotting a single component.
|
2179
|
-
Defaults to None.
|
2180
2162
|
**kwargs: All other keyword arguments are passed to `scanpy.plot.umap()`
|
2181
2163
|
|
2182
2164
|
Returns:
|
@@ -2254,7 +2236,7 @@ class CompositionalModel2(ABC):
|
|
2254
2236
|
|
2255
2237
|
|
2256
2238
|
def get_a(
|
2257
|
-
tree: tt.
|
2239
|
+
tree: tt.core.ToyTree,
|
2258
2240
|
) -> tuple[np.ndarray, int]:
|
2259
2241
|
"""Calculate ancestor matrix from a toytree tree
|
2260
2242
|
|
@@ -2293,7 +2275,7 @@ def get_a(
|
|
2293
2275
|
return A, n_nodes - 1
|
2294
2276
|
|
2295
2277
|
|
2296
|
-
def collapse_singularities(tree: tt.
|
2278
|
+
def collapse_singularities(tree: tt.core.ToyTree) -> tt.core.ToyTree:
|
2297
2279
|
"""Collapses (deletes) nodes in a toytree tree that are singularities (have only one child).
|
2298
2280
|
|
2299
2281
|
Args:
|
@@ -2509,15 +2491,14 @@ def import_tree(
|
|
2509
2491
|
|
2510
2492
|
Args:
|
2511
2493
|
data: A tascCODA-compatible data object.
|
2512
|
-
modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
|
2513
|
-
modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
|
2514
|
-
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
2515
|
-
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2516
|
-
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2494
|
+
modality_1: If `data` is MuData, specify the modality name to the original cell level anndata object.
|
2495
|
+
modality_2: If `data` is MuData, specify the modality name to the aggregated level anndata object.
|
2496
|
+
dendrogram_key: Key to the scanpy.tl.dendrogram result in `.uns` of original cell level anndata object.
|
2497
|
+
levels_orig: List that indicates which columns in `.obs` of the original data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2498
|
+
levels_agg: List that indicates which columns in `.var` of the aggregated data correspond to tree levels. The list must begin with the root level, and end with the leaf level.
|
2517
2499
|
add_level_name: If True, internal nodes in the tree will be named as "{level_name}_{node_name}" instead of just {level_name}.
|
2518
|
-
Defaults to True.
|
2519
2500
|
key_added: If not specified, the tree is stored in .uns[‘tree’]. If `data` is AnnData, save tree in `data`.
|
2520
|
-
If `data` is MuData, save tree in data[modality_2].
|
2501
|
+
If `data` is MuData, save tree in data[modality_2].
|
2521
2502
|
|
2522
2503
|
Returns:
|
2523
2504
|
Updates data with the following:
|
@@ -2538,10 +2519,10 @@ def import_tree(
|
|
2538
2519
|
data_1 = data[modality_1]
|
2539
2520
|
data_2 = data[modality_2]
|
2540
2521
|
except KeyError as name:
|
2541
|
-
|
2522
|
+
logger.error(f"No {name} slot in MuData")
|
2542
2523
|
raise
|
2543
2524
|
except IndexError:
|
2544
|
-
|
2525
|
+
logger.error("Please specify modality_1 and modality_2 to indicate modalities in MuData")
|
2545
2526
|
raise
|
2546
2527
|
else:
|
2547
2528
|
data_1 = data
|
@@ -2613,43 +2594,38 @@ def from_scanpy(
|
|
2613
2594
|
Returns:
|
2614
2595
|
AnnData: A data set with cells aggregated to the (sample x cell type) level
|
2615
2596
|
"""
|
2616
|
-
if isinstance(sample_identifier, str)
|
2617
|
-
|
2597
|
+
sample_identifier = [sample_identifier] if isinstance(sample_identifier, str) else sample_identifier
|
2598
|
+
covariate_obs = list(set(covariate_obs or []) | set(sample_identifier))
|
2618
2599
|
|
2619
|
-
if
|
2600
|
+
if isinstance(sample_identifier, list):
|
2620
2601
|
adata.obs["scCODA_sample_id"] = adata.obs[sample_identifier].agg("-".join, axis=1)
|
2621
2602
|
sample_identifier = "scCODA_sample_id"
|
2622
|
-
else:
|
2623
|
-
sample_identifier = sample_identifier[0]
|
2624
2603
|
|
2625
|
-
|
2626
|
-
ct_count_data =
|
2627
|
-
ct_count_data = ct_count_data.fillna(0)
|
2628
|
-
|
2629
|
-
# get covariates from different sources
|
2604
|
+
groups = adata.obs.value_counts([sample_identifier, cell_type_identifier])
|
2605
|
+
ct_count_data = groups.unstack(level=cell_type_identifier).fillna(0)
|
2630
2606
|
covariate_df_ = pd.DataFrame(index=ct_count_data.index)
|
2631
2607
|
|
2632
2608
|
if covariate_uns is not None:
|
2633
2609
|
covariate_df_uns = pd.DataFrame(adata.uns[covariate_uns], index=ct_count_data.index)
|
2634
|
-
covariate_df_ =
|
2610
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df_uns], axis=1)
|
2635
2611
|
|
2636
2612
|
if covariate_obs:
|
2637
|
-
|
2638
|
-
|
2639
|
-
|
2640
|
-
|
2641
|
-
|
2642
|
-
|
2643
|
-
|
2644
|
-
|
2645
|
-
covariate_df_ = covariate_df_.join(covariate_df_obs, how="left")
|
2613
|
+
unique_check = adata.obs.groupby(sample_identifier).nunique()
|
2614
|
+
for c in covariate_obs.copy():
|
2615
|
+
if unique_check[c].max() != 1:
|
2616
|
+
logger.warning(f"Covariate {c} has non-unique values for batch! Skipping...")
|
2617
|
+
covariate_obs.remove(c)
|
2618
|
+
if covariate_obs:
|
2619
|
+
covariate_df_obs = adata.obs.groupby(sample_identifier).first()[covariate_obs]
|
2620
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df_obs], axis=1)
|
2646
2621
|
|
2647
2622
|
if covariate_df is not None:
|
2648
|
-
if
|
2649
|
-
raise ValueError("
|
2650
|
-
covariate_df_ = covariate_df_.
|
2623
|
+
if set(covariate_df.index) != set(ct_count_data.index):
|
2624
|
+
raise ValueError("Mismatch between sample names in anndata and covariate_df!")
|
2625
|
+
covariate_df_ = pd.concat([covariate_df_, covariate_df.reindex(ct_count_data.index)], axis=1)
|
2651
2626
|
|
2652
|
-
var_dat = ct_count_data.sum(
|
2627
|
+
var_dat = ct_count_data.sum().rename("n_cells").to_frame()
|
2653
2628
|
var_dat.index = var_dat.index.astype(str)
|
2629
|
+
covariate_df_.index = covariate_df_.index.astype(str)
|
2654
2630
|
|
2655
2631
|
return AnnData(X=ct_count_data.values, var=var_dat, obs=covariate_df_)
|
pertpy/tools/_coda/_sccoda.py
CHANGED
@@ -9,6 +9,7 @@ import numpyro as npy
|
|
9
9
|
import numpyro.distributions as npd
|
10
10
|
from anndata import AnnData
|
11
11
|
from jax import config, random
|
12
|
+
from lamin_utils import logger
|
12
13
|
from mudata import MuData
|
13
14
|
from numpyro.infer import Predictive
|
14
15
|
from rich import print
|
@@ -73,13 +74,13 @@ class Sccoda(CompositionalModel2):
|
|
73
74
|
adata: AnnData object.
|
74
75
|
type : Specify the input adata type, which could be either a cell-level AnnData or an aggregated sample-level AnnData.
|
75
76
|
generate_sample_level: Whether to generate an AnnData object on the sample level or create an empty AnnData object.
|
76
|
-
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
77
|
-
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
78
|
-
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
79
|
-
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
80
|
-
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
81
|
-
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
82
|
-
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
77
|
+
cell_type_identifier: If type is "cell_level", specify column name in adata.obs that specifies the cell types.
|
78
|
+
sample_identifier: If type is "cell_level", specify column name in adata.obs that specifies the sample.
|
79
|
+
covariate_uns: If type is "cell_level", specify key for adata.uns, where covariate values are stored.
|
80
|
+
covariate_obs: If type is "cell_level", specify list of keys for adata.obs, where covariate values are stored.
|
81
|
+
covariate_df: If type is "cell_level", specify dataFrame with covariates.
|
82
|
+
modality_key_1: Key to the cell-level AnnData in the MuData object.
|
83
|
+
modality_key_2: Key to the aggregated sample-level AnnData object in the MuData object.
|
83
84
|
|
84
85
|
Returns:
|
85
86
|
MuData: MuData object with cell-level AnnData (`mudata[modality_key_1]`) and aggregated sample-level AnnData (`mudata[modality_key_2]`).
|
@@ -127,10 +128,10 @@ class Sccoda(CompositionalModel2):
|
|
127
128
|
Categorical covariates are handled automatically, with the covariate value of the first sample being used as the reference category.
|
128
129
|
To set a different level as the base category for a categorical covariate, use "C(<CovariateName>, Treatment('<ReferenceLevelName>'))"
|
129
130
|
reference_cell_type: Column name that sets the reference cell type.
|
130
|
-
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
131
|
+
Reference the name of a column. If "automatic", the cell type with the lowest dispersion in relative abundance that is present in at least 90% of samlpes will be chosen.
|
131
132
|
automatic_reference_absence_threshold: If using reference_cell_type = "automatic", determine the maximum fraction of zero entries for a cell type
|
132
|
-
to be considered as a possible reference cell type.
|
133
|
-
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
133
|
+
to be considered as a possible reference cell type.
|
134
|
+
modality_key: If data is a MuData object, specify key to the aggregated sample-level AnnData object in the MuData object.
|
134
135
|
|
135
136
|
Returns:
|
136
137
|
Return an AnnData (if input data is an AnnData object) or return a MuData (if input data is a MuData object)
|
@@ -309,10 +310,10 @@ class Sccoda(CompositionalModel2):
|
|
309
310
|
|
310
311
|
Args:
|
311
312
|
data: AnnData object or MuData object.
|
312
|
-
modality_key: If data is a MuData object, specify which modality to use.
|
313
|
-
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
314
|
-
num_prior_samples: Number of prior samples calculated.
|
315
|
-
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
313
|
+
modality_key: If data is a MuData object, specify which modality to use.
|
314
|
+
rng_key: The rng state used for the prior simulation. If None, a random state will be selected.
|
315
|
+
num_prior_samples: Number of prior samples calculated.
|
316
|
+
use_posterior_predictive: If True, the posterior predictive will be calculated.
|
316
317
|
|
317
318
|
Returns:
|
318
319
|
az.InferenceData: arviz_data with all MCMC information
|
@@ -335,7 +336,7 @@ class Sccoda(CompositionalModel2):
|
|
335
336
|
try:
|
336
337
|
sample_adata = data[modality_key]
|
337
338
|
except IndexError:
|
338
|
-
|
339
|
+
logger.error("When data is a MuData object, modality_key must be specified!")
|
339
340
|
raise
|
340
341
|
if isinstance(data, AnnData):
|
341
342
|
sample_adata = data
|