biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,605 @@
1
+ from __future__ import annotations
2
+ import os
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ from diot import Diot # type: ignore[import]
7
+ import scanpy as sc
8
+ import scvelo as scv
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ from biopipen.utils.misc import logger
12
+ from biopipen.scripts.scrna.seurat_anndata_conversion import (
13
+ convert_seurat_to_anndata,
14
+ convert_anndata_to_seurat,
15
+ )
16
+
17
+
18
+
19
+ def SCVELO(
20
+ adata,
21
+ group_by,
22
+ dirpath,
23
+ logger,
24
+ palette=None,
25
+ linear_reduction=None,
26
+ nonlinear_reduction=None,
27
+ basis=None,
28
+ mode=["deterministic", "stochastic", "dynamical"],
29
+ fitting_by="stochastic",
30
+ min_shared_counts=30,
31
+ n_pcs=30,
32
+ n_neighbors=30,
33
+ stream_smooth=None,
34
+ stream_density=2,
35
+ arrow_size=5,
36
+ arrow_length=5,
37
+ arrow_density=0.5,
38
+ denoise=False,
39
+ denoise_topn=3,
40
+ kinetics=False,
41
+ kinetics_topn=100,
42
+ calculate_velocity_genes=False,
43
+ top_n=6,
44
+ ncores=1,
45
+ dpi=100,
46
+ fileprefix="",
47
+ ):
48
+ warnings.simplefilter("ignore", category=UserWarning)
49
+ warnings.simplefilter("ignore", category=FutureWarning)
50
+ warnings.simplefilter("ignore", category=DeprecationWarning)
51
+
52
+ os.chdir(os.path.expanduser(dirpath))
53
+ if linear_reduction is None:
54
+ sc.pp.pca(adata, n_comps=n_pcs)
55
+ linear_reduction = "X_pca"
56
+ elif linear_reduction not in adata.obsm.keys():
57
+ logger.warning(
58
+ f"Linear reduction '{linear_reduction}' not found in adata.obsm. "
59
+ "Running PCA to generate it."
60
+ )
61
+ sc.pp.pca(adata, n_comps=n_pcs)
62
+ linear_reduction = "X_pca"
63
+
64
+ if basis is None:
65
+ if nonlinear_reduction is not None:
66
+ basis = nonlinear_reduction
67
+ else:
68
+ basis = "basis"
69
+ adata.obsm["X_basis"] = adata.obsm[linear_reduction][
70
+ :, 0:2
71
+ ]
72
+ scv.pl.utils.check_basis(adata, basis)
73
+
74
+ if "spliced" not in adata.layers.keys():
75
+ raise ValueError("'spliced' data must be provided.")
76
+
77
+ if "unspliced" not in adata.layers.keys():
78
+ raise ValueError("'unspliced' data must be provided.")
79
+
80
+ if type(mode) is str:
81
+ mode = [mode]
82
+
83
+ mode.append(fitting_by)
84
+ if kinetics is True or denoise is True:
85
+ mode.append("dynamical")
86
+
87
+ mode = list(set(mode))
88
+ if "dynamical" in mode:
89
+ mode.sort(key="dynamical".__eq__)
90
+
91
+ adata.obs[group_by] = adata.obs[group_by].astype(dtype="category")
92
+ scv.pl.proportions(adata, groupby=group_by, save=False, show=False)
93
+
94
+ plt.savefig(
95
+ ".".join(filter(None, [fileprefix, "proportions.png"])), dpi=dpi
96
+ )
97
+
98
+ logger.info("- Filtering and normalizing data ...")
99
+ scv.pp.filter_and_normalize(adata, min_shared_counts=min_shared_counts)
100
+
101
+ logger.info("- Running moments ...")
102
+ # adata.var['highly_variable_genes'].astype(bool)
103
+ # adata.var['highly_variable_genes'].fillna(False, inplace=True)
104
+ scv.pp.moments(
105
+ adata, n_pcs=n_pcs, n_neighbors=n_neighbors, use_rep=linear_reduction
106
+ )
107
+
108
+ highly_variable_genes = adata.var["highly_variable_genes"].index.tolist()
109
+ adata.uns["layer_features_RNA"] = highly_variable_genes
110
+ adata.uns["layer_features_spliced"] = highly_variable_genes
111
+ adata.uns["layer_features_unspliced"] = highly_variable_genes
112
+
113
+ for m in mode:
114
+ vkey_list = [m]
115
+ dk_list = [False]
116
+ gene_subset_list = [None]
117
+ autoscale_list = [True]
118
+
119
+ logger.info(f"- mode: {m}")
120
+ adata.uns["layer_features_" + m] = highly_variable_genes
121
+ adata.uns["layer_features_variance_" + m] = highly_variable_genes
122
+ if m == "dynamical":
123
+ adata2 = adata[:, adata.var[fitting_by + "_genes"]].copy()
124
+ Ms = adata2.layers["Ms"]
125
+ Mu = adata2.layers["Mu"]
126
+ adata2.layers.clear()
127
+ adata2.layers["Ms"] = Ms
128
+ adata2.layers["Mu"] = Mu
129
+ connectivities = adata2.obsp["connectivities"]
130
+ adata2.obsp.clear()
131
+ adata2.obsp["connectivities"] = connectivities
132
+ adata.uns["layer_features_Ms"] = highly_variable_genes
133
+ adata.uns["layer_features_Mu"] = highly_variable_genes
134
+
135
+ scv.tl.recover_dynamics(
136
+ adata2,
137
+ var_names=fitting_by + "_genes",
138
+ use_raw=False,
139
+ n_jobs=ncores,
140
+ )
141
+
142
+ var_add = [
143
+ i
144
+ for i in list(adata2.var.columns)
145
+ if not i in list(adata.var.columns)
146
+ ]
147
+ adata.var = adata.var.merge(
148
+ adata2.var[var_add], how="left", left_index=True, right_index=True
149
+ )
150
+ adata.uns["recover_dynamics"] = adata2.uns["recover_dynamics"]
151
+
152
+ adata.varm["loss"] = np.empty(
153
+ (adata.shape[1], adata2.varm["loss"].shape[1])
154
+ )
155
+ adata.varm["loss"][:] = np.nan
156
+ adata.varm["loss"][adata.var[fitting_by + "_genes"], :] = adata2.varm[
157
+ "loss"
158
+ ]
159
+
160
+ empty_layer = np.empty((adata.layers["spliced"].shape))
161
+ empty_layer[:] = np.nan
162
+ adata.layers["fit_t"] = adata.layers["fit_tau"] = adata.layers[
163
+ "fit_tau_"
164
+ ] = empty_layer
165
+ adata.layers["fit_t"][:, adata.var[fitting_by + "_genes"]] = (
166
+ adata2.layers["fit_t"]
167
+ )
168
+ adata.layers["fit_tau"][:, adata.var[fitting_by + "_genes"]] = (
169
+ adata2.layers["fit_tau"]
170
+ )
171
+ adata.layers["fit_tau_"][:, adata.var[fitting_by + "_genes"]] = (
172
+ adata2.layers["fit_tau_"]
173
+ )
174
+ adata.uns["layer_features_fit_t"] = highly_variable_genes
175
+ adata.uns["layer_features_fit_tau"] = highly_variable_genes
176
+ adata.uns["layer_features_fit_tau_"] = highly_variable_genes
177
+
178
+ if kinetics is True:
179
+ vkey_list.append("dynamical_kinetics")
180
+ dk_list.append(True)
181
+ gene_subset_list.append(None)
182
+ autoscale_list.append(True)
183
+ top_genes = (
184
+ adata.var["fit_likelihood"]
185
+ .sort_values(ascending=False)
186
+ .index[:kinetics_topn]
187
+ )
188
+ scv.tl.differential_kinetic_test(
189
+ adata, var_names=top_genes, groupby=group_by
190
+ )
191
+
192
+ if denoise is True:
193
+ vkey_list.append("dynamical_denoise")
194
+ dk_list.append(False)
195
+ gene_subset_list.append(
196
+ adata.var["fit_likelihood"]
197
+ .sort_values(ascending=False)
198
+ .index[:denoise_topn]
199
+ )
200
+ autoscale_list.append(False)
201
+ adata.layers["dynamical_denoise"] = adata.layers[m] + np.random.normal(
202
+ adata.layers[m], scale=adata.layers["Ms"].std(0)
203
+ )
204
+ adata.uns["layer_features_dynamical_denoise"] = highly_variable_genes
205
+
206
+ for i in range(len(vkey_list)):
207
+ vkey = vkey_list[i]
208
+ dk = dk_list[i]
209
+ gene_subset = gene_subset_list[i]
210
+ autoscale = autoscale_list[i]
211
+
212
+ # Velocity graph
213
+ scv.tl.velocity(adata, mode=m, vkey=vkey, diff_kinetics=dk)
214
+ scv.tl.velocity_graph(
215
+ adata,
216
+ vkey=vkey,
217
+ gene_subset=gene_subset,
218
+ n_neighbors=n_neighbors,
219
+ n_jobs=ncores,
220
+ )
221
+ if m == "dynamical":
222
+ adata.var["velocity_genes"] = adata.var[m + "_genes"]
223
+ adata.layers["velocity"] = adata.layers[m]
224
+ adata.layers["variance_u"] = adata.layers[m + "_u"]
225
+ adata.uns["layer_features_velocity"] = highly_variable_genes
226
+ adata.uns["layer_features_variance_u"] = highly_variable_genes
227
+ adata.uns["layer_features_dynamical_u"] = highly_variable_genes
228
+ else:
229
+ adata.var["velocity_gamma"] = adata.var[m + "_gamma"]
230
+ adata.var["velocity_r2"] = adata.var[m + "_r2"]
231
+ adata.var["velocity_genes"] = adata.var[m + "_genes"]
232
+ adata.layers["velocity"] = adata.layers[m]
233
+ # adata.layers["variance_velocity"] = adata.layers["variance_" + m]
234
+ adata.uns["layer_features_velocity"] = highly_variable_genes
235
+
236
+ # Velocity embedding
237
+ scv.tl.velocity_embedding(
238
+ adata, basis=basis, vkey=vkey, autoscale=autoscale
239
+ )
240
+ scv.pl.velocity_embedding_stream(
241
+ adata,
242
+ vkey=vkey,
243
+ basis=basis,
244
+ title=vkey,
245
+ color=group_by,
246
+ palette=palette,
247
+ smooth=stream_smooth,
248
+ density=stream_density,
249
+ legend_loc="none",
250
+ save=False,
251
+ show=False,
252
+ )
253
+ plt.savefig(
254
+ ".".join(filter(None, [fileprefix, vkey + "_stream.png"])),
255
+ dpi=dpi,
256
+ )
257
+
258
+ scv.pl.velocity_embedding(
259
+ adata,
260
+ vkey=vkey,
261
+ basis=basis,
262
+ title=vkey,
263
+ color=group_by,
264
+ palette=palette,
265
+ arrow_length=arrow_length,
266
+ arrow_size=arrow_size,
267
+ density=arrow_density,
268
+ linewidth=0.3,
269
+ save=False,
270
+ show=False,
271
+ )
272
+ plt.savefig(
273
+ ".".join(filter(None, [fileprefix, vkey + "_arrow.png"])),
274
+ dpi=dpi,
275
+ )
276
+
277
+ scv.pl.velocity_embedding_grid(
278
+ adata,
279
+ vkey=vkey,
280
+ basis=basis,
281
+ title=vkey,
282
+ color=group_by,
283
+ palette=palette,
284
+ arrow_length=arrow_length / 2,
285
+ arrow_size=arrow_size / 2,
286
+ density=arrow_density * 2,
287
+ save=False,
288
+ show=False,
289
+ )
290
+ plt.savefig(
291
+ ".".join(
292
+ filter(None, [fileprefix, vkey + "_embedding_grid.png"])
293
+ ),
294
+ dpi=dpi,
295
+ )
296
+
297
+ # Velocity confidence
298
+ scv.tl.velocity_confidence(adata, vkey=vkey)
299
+ scv.pl.scatter(
300
+ adata,
301
+ basis=basis,
302
+ title=vkey + " length",
303
+ color=vkey + "_length",
304
+ cmap="coolwarm",
305
+ save=False,
306
+ show=False,
307
+ )
308
+ plt.savefig(
309
+ ".".join(filter(None, [fileprefix, vkey + "_length.png"])),
310
+ dpi=dpi,
311
+ )
312
+
313
+ scv.pl.scatter(
314
+ adata,
315
+ basis=basis,
316
+ title=vkey + " confidence",
317
+ color=vkey + "_confidence",
318
+ cmap="magma",
319
+ save=False,
320
+ show=False,
321
+ )
322
+ plt.savefig(
323
+ ".".join(filter(None, [fileprefix, vkey + "_confidence.png"])),
324
+ dpi=dpi,
325
+ )
326
+
327
+ # Terminal states
328
+ for term in [
329
+ "root_cells",
330
+ "end_points",
331
+ vkey + "_root_cells",
332
+ vkey + "_end_points",
333
+ ]:
334
+ if term in adata.obs.columns:
335
+ adata.obs.drop(term, axis=1, inplace=True)
336
+
337
+ scv.tl.terminal_states(
338
+ adata,
339
+ vkey=vkey,
340
+ )
341
+ for term in ["root_cells", "end_points"]:
342
+ adata.obs[vkey + "_" + term] = adata.obs[term]
343
+ adata.obs.drop(term, axis=1, inplace=True)
344
+
345
+ # scv.pl.scatter(adata,basis=basis,title=vkey+" terminal_states",color_gradients=[vkey+'_root_cells', vkey+'_end_points'], legend_loc="best", save=False, show=False)
346
+ # if show_plot is True:
347
+ # plt.show()
348
+ # if save:
349
+ # plt.savefig('.'.join(filter(None, [fileprefix, vkey+"_terminal_states.png"])), dpi=dpi)
350
+
351
+ # Pseudotime
352
+ scv.tl.velocity_pseudotime(
353
+ adata,
354
+ vkey=vkey,
355
+ root_key=vkey + "_root_cells",
356
+ end_key=vkey + "_end_points",
357
+ )
358
+ scv.pl.scatter(
359
+ adata,
360
+ basis=basis,
361
+ title=vkey + " pseudotime",
362
+ color=vkey + "_pseudotime",
363
+ cmap="cividis",
364
+ save=False,
365
+ show=False,
366
+ )
367
+ plt.savefig(
368
+ ".".join(filter(None, [fileprefix, vkey + "_pseudotime.png"])),
369
+ dpi=dpi,
370
+ )
371
+
372
+ # Latent time
373
+ if m == "dynamical":
374
+ scv.tl.latent_time(
375
+ adata,
376
+ vkey=vkey,
377
+ root_key=vkey + "_root_cells",
378
+ end_key=vkey + "_end_points",
379
+ )
380
+ scv.pl.scatter(
381
+ adata,
382
+ basis=basis,
383
+ title=vkey + " latent time",
384
+ color="latent_time",
385
+ color_map="cividis",
386
+ save=False,
387
+ show=False,
388
+ )
389
+ plt.savefig(
390
+ ".".join(
391
+ filter(None, [fileprefix, vkey + "_latent_time.png"])
392
+ ),
393
+ dpi=dpi,
394
+ )
395
+
396
+ # PAGA
397
+ adata.uns["neighbors"]["distances"] = adata.obsp["distances"]
398
+ adata.uns["neighbors"]["connectivities"] = adata.obsp["connectivities"]
399
+ scv.tl.paga(
400
+ adata,
401
+ groups=group_by,
402
+ vkey=vkey,
403
+ root_key=vkey + "_root_cells",
404
+ end_key=vkey + "_end_points",
405
+ )
406
+ scv.pl.paga(
407
+ adata,
408
+ title=vkey + " PAGA (" + group_by + ")",
409
+ node_colors=palette,
410
+ basis=basis,
411
+ alpha=0.5,
412
+ min_edge_width=2,
413
+ node_size_scale=1.5, # type: ignore
414
+ legend_loc="none",
415
+ save=False,
416
+ show=False,
417
+ )
418
+ plt.savefig(
419
+ ".".join(filter(None, [fileprefix, vkey + "_paga.png"])),
420
+ dpi=dpi,
421
+ )
422
+
423
+ # Velocity genes
424
+ if calculate_velocity_genes is True:
425
+ if m != "dynamical":
426
+ scv.tl.rank_velocity_genes(adata, vkey=vkey, groupby=group_by)
427
+ adata.var[vkey + "_score"] = adata.var["spearmans_score"]
428
+ df1 = scv.get_df(adata.uns["rank_velocity_genes"]["names"])
429
+ adata.uns["rank_" + vkey + "_genenames"] = df1
430
+ df2 = scv.get_df(adata.uns["rank_velocity_genes"]["scores"])
431
+ adata.uns["rank_" + vkey + "_genescores"] = df2
432
+ del adata.uns["rank_velocity_genes"]
433
+ else:
434
+ scv.tl.rank_dynamical_genes(adata, groupby=group_by)
435
+ df1 = scv.get_df(adata.uns["rank_dynamical_genes"]["names"])
436
+ adata.uns["rank_" + vkey + "_genenames"] = df1
437
+ df2 = scv.get_df(adata.uns["rank_dynamical_genes"]["scores"])
438
+ adata.uns["rank_" + vkey + "_genescores"] = df2
439
+ del adata.uns["rank_dynamical_genes"]
440
+
441
+ for cluster in df1.columns:
442
+ # df1[0:1].values.ravel()[:12] ### by row
443
+
444
+ scv.pl.scatter(
445
+ adata,
446
+ color=group_by,
447
+ palette=palette,
448
+ basis=df1[cluster].values[:top_n],
449
+ vkey=vkey,
450
+ size=10,
451
+ linewidth=2,
452
+ alpha=1,
453
+ ylabel="cluster: " + cluster + "\nunspliced",
454
+ add_linfit=True,
455
+ add_rug=True,
456
+ add_outline=True,
457
+ ncols=3,
458
+ frameon=True,
459
+ save=False,
460
+ show=False,
461
+ )
462
+ plt.savefig(
463
+ ".".join(
464
+ filter(
465
+ None,
466
+ [fileprefix, cluster, vkey + "_genes1.png"],
467
+ )
468
+ ),
469
+ dpi=dpi,
470
+ )
471
+
472
+ scv.pl.velocity(
473
+ adata,
474
+ color=group_by,
475
+ var_names=df1[cluster].values[:top_n],
476
+ vkey=vkey,
477
+ size=10,
478
+ linewidth=2,
479
+ alpha=1,
480
+ ylabel="cluster: " + cluster + "\nunspliced",
481
+ add_outline=True,
482
+ basis=basis,
483
+ color_map=["Blues", "YlOrRd"],
484
+ ncols=2,
485
+ save=False,
486
+ show=False,
487
+ )
488
+ plt.savefig(
489
+ ".".join(
490
+ filter(
491
+ None,
492
+ [fileprefix, cluster, vkey + "_genes2.png"],
493
+ )
494
+ ),
495
+ dpi=dpi,
496
+ )
497
+
498
+ try:
499
+ adata.__dict__["_raw"].__dict__["_var"] = (
500
+ adata.__dict__["_raw"]
501
+ .__dict__["_var"]
502
+ .rename(columns={"_index": "features"})
503
+ )
504
+ except:
505
+ pass
506
+
507
+ return adata
508
+
509
+
510
+ sobjfile: str = {{in.sobjfile | quote}} # pyright: ignore # noqa: E999
511
+ outfile: str = {{out.outfile | quote}} # pyright: ignore # noqa: E999
512
+ outdir: str = os.path.dirname(outfile)
513
+
514
+ ncores: int = {{envs.ncores | repr}} # pyright: ignore # noqa: E999
515
+ group_by: str | None = {{envs.group_by | repr}} # pyright: ignore # noqa: E999
516
+ mode: str | list[str] = {{envs.mode | repr}} # pyright: ignore # noqa: E999
517
+ fitting_by: str = {{envs.fitting_by | repr}} # pyright: ignore # noqa: E999
518
+ min_shared_counts: int = {{envs.min_shared_counts | repr}} # pyright: ignore # noqa: E999
519
+ n_pcs: int = {{envs.n_pcs | repr}} # pyright: ignore # noqa: E999
520
+ n_neighbors: int = {{envs.n_neighbors | repr}} # pyright: ignore # noqa: E999
521
+ denoise: bool = {{envs.denoise | repr}} # pyright: ignore # noqa: E999
522
+ denoise_topn: int = {{envs.denoise_topn | repr}} # pyright: ignore # noqa: E999
523
+ kinetics: bool = {{envs.kinetics | repr}} # pyright: ignore # noqa: E999
524
+ kinetics_topn: int = {{envs.kinetics_topn | repr}} # pyright: ignore # noqa: E999
525
+ calculate_velocity_genes: bool = {{envs.calculate_velocity_genes | repr}} # pyright: ignore # noqa: E999
526
+ top_n: int = {{envs.top_n | repr}} # pyright: ignore # noqa: E999
527
+ rscript: str = {{envs.rscript | repr}} # pyright: ignore # noqa: E999
528
+
529
+ if group_by is None:
530
+ raise ValueError("The 'envs.group_by' parameter must be specified.")
531
+
532
+ if sobjfile.endswith(".h5ad"):
533
+ h5ad_file = Path(sobjfile)
534
+ else:
535
+ h5ad_file = Path(outfile).with_suffix(".input.h5ad")
536
+ logger.info("Converting Seurat object to AnnData (h5ad) format...")
537
+ convert_seurat_to_anndata(
538
+ input_file=sobjfile,
539
+ output_file=h5ad_file,
540
+ rscript=rscript,
541
+ )
542
+
543
+ logger.info(f"Reading AnnData (h5ad) file ...")
544
+ adata = sc.read_h5ad(h5ad_file)
545
+
546
+ if group_by not in adata.obs.columns:
547
+ raise ValueError(
548
+ f"The group_by column envs.group_by = '{group_by}' is not found in the AnnData object."
549
+ )
550
+
551
+ logger.info(f"Running scVelo analysis ...")
552
+
553
+ if isinstance(mode, str):
554
+ mode = [mode]
555
+
556
+ if not all([m in ["deterministic","stochastic","dynamical"] for m in mode]):
557
+ raise ValueError(
558
+ "The 'envs.mode' parameter must be one or more of 'deterministic', 'stochastic', or 'dynamical'."
559
+ )
560
+
561
+ if not fitting_by in ["deterministic","stochastic"]:
562
+ raise ValueError(
563
+ "The 'envs.fitting_by' parameter must be either 'deterministic' or 'stochastic'."
564
+ )
565
+
566
+ adata = SCVELO(
567
+ adata=adata,
568
+ group_by=group_by,
569
+ dirpath=outdir,
570
+ linear_reduction="X_pca",
571
+ mode=mode,
572
+ fitting_by=fitting_by,
573
+ min_shared_counts=min_shared_counts,
574
+ n_pcs=n_pcs,
575
+ n_neighbors=n_neighbors,
576
+ stream_smooth=None,
577
+ stream_density=2,
578
+ arrow_size=5,
579
+ arrow_length=5,
580
+ arrow_density=0.5,
581
+ denoise=denoise,
582
+ denoise_topn=denoise_topn,
583
+ kinetics=kinetics,
584
+ kinetics_topn=kinetics_topn,
585
+ calculate_velocity_genes=calculate_velocity_genes,
586
+ top_n=top_n,
587
+ ncores=ncores,
588
+ logger=logger,
589
+ )
590
+
591
+ if outfile.endswith(".h5ad"):
592
+ h5ad_file = Path(outfile)
593
+ else:
594
+ h5ad_file = Path(outfile).with_suffix(".output.h5ad")
595
+
596
+ logger.info(f"Writing object to AnnData (h5ad) file ...")
597
+ adata.write_h5ad(h5ad_file)
598
+
599
+ if not outfile.endswith(".h5ad"):
600
+ logger.info(f"Converting AnnData (h5ad) file to Seurat format ...")
601
+ convert_anndata_to_seurat(
602
+ input_file=h5ad_file,
603
+ output_file=outfile,
604
+ rscript=rscript,
605
+ )
@@ -1,8 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
1
+ library(biopipen.utils)
3
2
 
4
3
  sobjfile <- {{in.sobjfile | r}}
5
4
  outfile <- {{out.outfile | r}}
6
5
  assay <- {{envs.assay | r}}
7
6
 
8
- seurat_to_anndata(sobjfile, outfile, assay, log_info)
7
+ ConvertSeuratToAnnData(sobjfile, outfile = outfile, assay = assay)
@@ -55,7 +55,7 @@ if (
55
55
 
56
56
  if (save_code) {
57
57
  save_plotcode(p, plot_prefix,
58
- setup = c("library(scplotter)", "load('data.RData')", "invisible(list2env('case'))"),
58
+ setup = c("library(scplotter)", "load('data.RData')", "invisible(list2env(case, envir = .GlobalEnv))"),
59
59
  "case",
60
60
  auto_data_setup = FALSE)
61
61
  }