biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
biopipen/ns/scrna.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
"""Tools to analyze single-cell RNA"""
|
|
2
2
|
|
|
3
|
+
from pipen.utils import mark
|
|
3
4
|
from ..core.proc import Proc
|
|
4
5
|
from ..core.config import config
|
|
5
|
-
from ..utils.common_docstrs import (
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
)
|
|
6
|
+
# from ..utils.common_docstrs import (
|
|
7
|
+
# indent_docstr,
|
|
8
|
+
# format_placeholder,
|
|
9
|
+
# MUTATE_HELPERS_CLONESIZE,
|
|
10
|
+
# ENVS_SECTION_EACH,
|
|
11
|
+
# )
|
|
11
12
|
|
|
12
|
-
MUTATE_HELPERS_CLONESIZE_INDENTED = indent_docstr(MUTATE_HELPERS_CLONESIZE, " " * 3)
|
|
13
|
-
ENVS_SECTION_EACH_INDENTED = indent_docstr(ENVS_SECTION_EACH, " " * 3)
|
|
13
|
+
# MUTATE_HELPERS_CLONESIZE_INDENTED = indent_docstr(MUTATE_HELPERS_CLONESIZE, " " * 3)
|
|
14
|
+
# ENVS_SECTION_EACH_INDENTED = indent_docstr(ENVS_SECTION_EACH, " " * 3)
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class SeuratLoading(Proc):
|
|
@@ -96,14 +97,22 @@ class SeuratPreparing(Proc):
|
|
|
96
97
|
to the h5 file that can be read by `Read10X_h5()` from `Seurat`.
|
|
97
98
|
|
|
98
99
|
Output:
|
|
99
|
-
|
|
100
|
+
outfile: The qs2 file with the Seurat object with all samples integrated.
|
|
100
101
|
Note that the cell ids are prefixied with sample names.
|
|
101
|
-
QC plots will be saved in `<job.outdir>/plots`.
|
|
102
102
|
|
|
103
103
|
Envs:
|
|
104
104
|
ncores (type=int): Number of cores to use.
|
|
105
105
|
Used in `future::plan(strategy = "multicore", workers = <ncores>)`
|
|
106
106
|
to parallelize some Seurat procedures.
|
|
107
|
+
mutaters (type=json): The mutaters to mutate the metadata to the cells.
|
|
108
|
+
These new columns will be added to the metadata of the Seurat object and
|
|
109
|
+
will be saved in the output file.
|
|
110
|
+
min_cells (type=int): The minimum number of cells that a gene must be
|
|
111
|
+
expressed in to be kept. This is used in `Seurat::CreateSeuratObject()`.
|
|
112
|
+
Futher QC (`envs.cell_qc`, `envs.gene_qc`) will be performed after this.
|
|
113
|
+
min_features (type=int): The minimum number of features that a cell must
|
|
114
|
+
express to be kept. This is used in `Seurat::CreateSeuratObject()`.
|
|
115
|
+
Futher QC (`envs.cell_qc`, `envs.gene_qc`) will be performed after this.
|
|
107
116
|
cell_qc: Filter expression to filter cells, using
|
|
108
117
|
`tidyrseurat::filter()`.
|
|
109
118
|
Available QC keys include `nFeature_RNA`, `nCount_RNA`,
|
|
@@ -122,9 +131,6 @@ class SeuratPreparing(Proc):
|
|
|
122
131
|
genes.
|
|
123
132
|
///
|
|
124
133
|
|
|
125
|
-
cell_qc_per_sample (flag): Whether to perform cell QC per sample or not.
|
|
126
|
-
If `True`, the cell QC will be performed per sample, and the QC will be
|
|
127
|
-
applied to each sample before merging.
|
|
128
134
|
gene_qc (ns): Filter genes.
|
|
129
135
|
`gene_qc` is applied after `cell_qc`.
|
|
130
136
|
- min_cells: The minimum number of cells that a gene must be
|
|
@@ -259,34 +265,37 @@ class SeuratPreparing(Proc):
|
|
|
259
265
|
""" # noqa: E501
|
|
260
266
|
|
|
261
267
|
input = "metafile:file"
|
|
262
|
-
output = "
|
|
268
|
+
output = "outfile:file:{{in.metafile | stem}}.seurat.qs"
|
|
263
269
|
lang = config.lang.rscript
|
|
270
|
+
envs_depth = 4
|
|
264
271
|
envs = {
|
|
265
272
|
"ncores": config.misc.ncores,
|
|
273
|
+
"mutaters": {},
|
|
274
|
+
"min_cells": 0,
|
|
275
|
+
"min_features": 0,
|
|
266
276
|
"cell_qc": None, # "nFeature_RNA > 200 & percent.mt < 5",
|
|
267
|
-
"cell_qc_per_sample": False,
|
|
268
277
|
"gene_qc": {"min_cells": 0, "excludes": []},
|
|
269
278
|
"qc_plots": {
|
|
270
|
-
"Violin Plots
|
|
279
|
+
"Violin Plots": {
|
|
271
280
|
"kind": "cell",
|
|
272
281
|
"plot_type": "violin",
|
|
273
282
|
"devpars": {"res": 100, "height": 600, "width": 1200},
|
|
274
283
|
},
|
|
275
|
-
"Scatter Plots
|
|
284
|
+
"Scatter Plots": {
|
|
276
285
|
"kind": "cell",
|
|
277
286
|
"plot_type": "scatter",
|
|
278
287
|
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
279
288
|
},
|
|
280
|
-
"Ridge Plots
|
|
289
|
+
"Ridge Plots": {
|
|
281
290
|
"kind": "cell",
|
|
282
291
|
"plot_type": "ridge",
|
|
283
292
|
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
284
293
|
},
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
294
|
+
"Distribution of number of cells a gene is expressed in": {
|
|
295
|
+
"kind": "gene",
|
|
296
|
+
"plot_type": "histogram",
|
|
297
|
+
"devpars": {"res": 100, "height": 1200, "width": 1200},
|
|
298
|
+
},
|
|
290
299
|
},
|
|
291
300
|
"use_sct": False,
|
|
292
301
|
"no_integration": False,
|
|
@@ -297,6 +306,7 @@ class SeuratPreparing(Proc):
|
|
|
297
306
|
"SCTransform": {
|
|
298
307
|
"return-only-var-genes": True,
|
|
299
308
|
"min_cells": 5,
|
|
309
|
+
"verbose": True,
|
|
300
310
|
},
|
|
301
311
|
"IntegrateLayers": {"method": "harmony"},
|
|
302
312
|
"doublet_detector": "none",
|
|
@@ -306,7 +316,7 @@ class SeuratPreparing(Proc):
|
|
|
306
316
|
}
|
|
307
317
|
script = "file://../scripts/scrna/SeuratPreparing.R"
|
|
308
318
|
plugin_opts = {
|
|
309
|
-
"report": "file://../reports/
|
|
319
|
+
"report": "file://../reports/common.svelte",
|
|
310
320
|
}
|
|
311
321
|
|
|
312
322
|
|
|
@@ -318,24 +328,13 @@ class SeuratClustering(Proc):
|
|
|
318
328
|
srtobj: The seurat object loaded by SeuratPreparing
|
|
319
329
|
|
|
320
330
|
Output:
|
|
321
|
-
|
|
322
|
-
If `SCTransform` was used, the default Assay will be reset to `RNA`.
|
|
331
|
+
outfile: The seurat object with cluster information at `seurat_clusters`.
|
|
323
332
|
|
|
324
333
|
Envs:
|
|
325
334
|
ncores (type=int;order=-100): Number of cores to use.
|
|
326
335
|
Used in `future::plan(strategy = "multicore", workers = <ncores>)`
|
|
327
336
|
to parallelize some Seurat procedures.
|
|
328
337
|
See also: <https://satijalab.org/seurat/articles/future_vignette.html>
|
|
329
|
-
ScaleData (ns): Arguments for [`ScaleData()`](https://satijalab.org/seurat/reference/scaledata).
|
|
330
|
-
If you want to re-scale the data by regressing to some variables, `Seurat::ScaleData`
|
|
331
|
-
will be called. If nothing is specified, `Seurat::ScaleData` will not be called.
|
|
332
|
-
- vars-to-regress: The variables to regress on.
|
|
333
|
-
- <more>: See <https://satijalab.org/seurat/reference/scaledata>
|
|
334
|
-
SCTransform (ns): Arguments for [`SCTransform()`](https://satijalab.org/seurat/reference/sctransform).
|
|
335
|
-
If you want to re-scale the data by regressing to some variables, `Seurat::SCTransform`
|
|
336
|
-
will be called. If nothing is specified, `Seurat::SCTransform` will not be called.
|
|
337
|
-
- vars-to-regress: The variables to regress on.
|
|
338
|
-
- <more>: See <https://satijalab.org/seurat/reference/sctransform>
|
|
339
338
|
RunUMAP (ns): Arguments for [`RunUMAP()`](https://satijalab.org/seurat/reference/runumap).
|
|
340
339
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
341
340
|
`dims=N` will be expanded to `dims=1:N`; The maximal value of `N` will be the minimum of `N` and the number of columns - 1 for each sample.
|
|
@@ -343,6 +342,7 @@ class SeuratClustering(Proc):
|
|
|
343
342
|
- reduction: The reduction to use for UMAP.
|
|
344
343
|
If not provided, `sobj@misc$integrated_new_reduction` will be used.
|
|
345
344
|
- <more>: See <https://satijalab.org/seurat/reference/runumap>
|
|
345
|
+
RunPCA (ns): Arguments for [`RunPCA()`](https://satijalab.org/seurat/reference/runpca).
|
|
346
346
|
FindNeighbors (ns): Arguments for [`FindNeighbors()`](https://satijalab.org/seurat/reference/findneighbors).
|
|
347
347
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
348
348
|
- reduction: The reduction to use.
|
|
@@ -357,14 +357,9 @@ class SeuratClustering(Proc):
|
|
|
357
357
|
The results will be saved in `seurat_clusters_<resolution>`.
|
|
358
358
|
The final resolution will be used to define the clusters at `seurat_clusters`.
|
|
359
359
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
360
|
-
cache (type=auto):
|
|
360
|
+
cache (type=auto): Where to cache the information at different steps.
|
|
361
361
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
362
|
-
|
|
363
|
-
the input and envs of the process.
|
|
364
|
-
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
365
|
-
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
366
|
-
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
367
|
-
`<signature>.RDS` in the cache directory.
|
|
362
|
+
Set to `False` to not cache the results.
|
|
368
363
|
|
|
369
364
|
Requires:
|
|
370
365
|
r-seurat:
|
|
@@ -376,13 +371,12 @@ class SeuratClustering(Proc):
|
|
|
376
371
|
""" # noqa: E501
|
|
377
372
|
|
|
378
373
|
input = "srtobj:file"
|
|
379
|
-
output = "
|
|
374
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
380
375
|
lang = config.lang.rscript
|
|
381
376
|
envs = {
|
|
382
377
|
"ncores": config.misc.ncores,
|
|
383
|
-
"
|
|
384
|
-
"
|
|
385
|
-
"RunUMAP": {"dims": 30},
|
|
378
|
+
"RunPCA": {},
|
|
379
|
+
"RunUMAP": {},
|
|
386
380
|
"FindNeighbors": {},
|
|
387
381
|
"FindClusters": {"resolution": 0.8},
|
|
388
382
|
"cache": config.path.tmpdir,
|
|
@@ -399,15 +393,15 @@ class SeuratSubClustering(Proc):
|
|
|
399
393
|
|
|
400
394
|
For the subset of cells, the reductions will be re-performed on the subset of cells,
|
|
401
395
|
and then the clustering will be performed on the subset of cells. The reduction
|
|
402
|
-
will be saved in `
|
|
403
|
-
clustering will be saved in the metadata of the original object using the casename
|
|
396
|
+
will be saved in `object@reduction$<casename>.<reduction>` of the original object and the
|
|
397
|
+
clustering will be saved in the metadata of the original object using the casename
|
|
404
398
|
as the column name.
|
|
405
399
|
|
|
406
400
|
Input:
|
|
407
|
-
srtobj: The seurat object
|
|
401
|
+
srtobj: The seurat object in RDS or qs/qs2 format.
|
|
408
402
|
|
|
409
403
|
Output:
|
|
410
|
-
|
|
404
|
+
outfile: The seurat object with the subclustering information in qs/qs2 format.
|
|
411
405
|
|
|
412
406
|
Envs:
|
|
413
407
|
ncores (type=int;order=-100): Number of cores to use.
|
|
@@ -417,7 +411,9 @@ class SeuratSubClustering(Proc):
|
|
|
417
411
|
The mutaters will be applied in the order specified.
|
|
418
412
|
subset: An expression to subset the cells, will be passed to
|
|
419
413
|
[`tidyseurat::filter()`](https://stemangiola.github.io/tidyseurat/reference/filter.html).
|
|
420
|
-
|
|
414
|
+
RunPCA (ns): Arguments for [`RunPCA()`](https://satijalab.org/seurat/reference/runpca).
|
|
415
|
+
`object` is specified internally as the subset object, and `-` in the key will be replaced with `.`.
|
|
416
|
+
- <more>: See <https://satijalab.org/seurat/reference/runpca>
|
|
421
417
|
RunUMAP (ns): Arguments for [`RunUMAP()`](https://satijalab.org/seurat/reference/runumap).
|
|
422
418
|
`object` is specified internally as the subset object, and `-` in the key will be replaced with `.`.
|
|
423
419
|
`dims=N` will be expanded to `dims=1:N`; The maximal value of `N` will be the minimum of `N` and the number of columns - 1 for each sample.
|
|
@@ -428,7 +424,7 @@ class SeuratSubClustering(Proc):
|
|
|
428
424
|
FindNeighbors (ns): Arguments for [`FindNeighbors()`](https://satijalab.org/seurat/reference/findneighbors).
|
|
429
425
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
430
426
|
- reduction: The reduction to use.
|
|
431
|
-
If not provided, `
|
|
427
|
+
If not provided, `object@misc$integrated_new_reduction` will be used.
|
|
432
428
|
- <more>: See <https://satijalab.org/seurat/reference/findneighbors>
|
|
433
429
|
FindClusters (ns): Arguments for [`FindClusters()`](https://satijalab.org/seurat/reference/findclusters).
|
|
434
430
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
@@ -438,32 +434,33 @@ class SeuratSubClustering(Proc):
|
|
|
438
434
|
The results will be saved in `<casename>_<resolution>`.
|
|
439
435
|
The final resolution will be used to define the clusters at `<casename>`.
|
|
440
436
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
441
|
-
cache (type=auto): Whether to cache the
|
|
437
|
+
cache (type=auto): Whether to cache the results.
|
|
442
438
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
443
|
-
|
|
444
|
-
the input and envs of the process.
|
|
445
|
-
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
446
|
-
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
447
|
-
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
448
|
-
`<signature>.RDS` in the cache directory.
|
|
439
|
+
Set to `False` to not cache the results.
|
|
449
440
|
cases (type=json): The cases to perform subclustering.
|
|
450
441
|
Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
|
|
451
442
|
If empty, a case with name `subcluster` will be created with default parameters.
|
|
443
|
+
The case name will be passed to `biopipen.utils::SeuratSubCluster()` as `name`.
|
|
444
|
+
It will be used as the prefix for the reduction name, keys and cluster names.
|
|
445
|
+
For reduction keys, it will be `toupper(<name>)` + "PC_" and `toupper(<name>)` + "UMAP_".
|
|
446
|
+
For cluster names, it will be `<name>` + "." + resolution.
|
|
447
|
+
And the final cluster name will be `<name>`.
|
|
448
|
+
Note that the `name` should be alphanumeric and anything other than alphanumeric will be removed.
|
|
452
449
|
""" # noqa: E501
|
|
453
|
-
|
|
454
450
|
input = "srtobj:file"
|
|
455
|
-
output = "
|
|
451
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
456
452
|
lang = config.lang.rscript
|
|
457
453
|
envs_depth = 1
|
|
458
454
|
envs = {
|
|
459
455
|
"ncores": config.misc.ncores,
|
|
460
456
|
"mutaters": {},
|
|
461
457
|
"subset": None,
|
|
462
|
-
"
|
|
458
|
+
"RunPCA": {},
|
|
459
|
+
"RunUMAP": {},
|
|
463
460
|
"FindNeighbors": {},
|
|
464
461
|
"FindClusters": {"resolution": 0.8},
|
|
465
462
|
"cache": config.path.tmpdir,
|
|
466
|
-
"cases": {
|
|
463
|
+
"cases": {},
|
|
467
464
|
}
|
|
468
465
|
script = "file://../scripts/scrna/SeuratSubClustering.R"
|
|
469
466
|
|
|
@@ -533,12 +530,17 @@ class SeuratClusterStats(Proc):
|
|
|
533
530
|
Envs:
|
|
534
531
|
mutaters (type=json): The mutaters to mutate the metadata to subset the cells.
|
|
535
532
|
The mutaters will be applied in the order specified.
|
|
533
|
+
cache (type=auto): Whether to cache the plots.
|
|
534
|
+
Currently only plots for features are supported, since creating the those
|
|
535
|
+
plots can be time consuming.
|
|
536
|
+
If `True`, the plots will be cached in the job output directory, which will
|
|
537
|
+
be not cleaned up when job is rerunning.
|
|
536
538
|
clustrees_defaults (ns): The parameters for the clustree plots.
|
|
537
539
|
- devpars (ns): The device parameters for the clustree plot.
|
|
538
540
|
- res (type=int): The resolution of the plots.
|
|
539
541
|
- height (type=int): The height of the plots.
|
|
540
542
|
- width (type=int): The width of the plots.
|
|
541
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
543
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
542
544
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
543
545
|
- prefix (type=auto): string indicating columns containing clustering information.
|
|
544
546
|
The trailing dot is not necessary and will be added automatically.
|
|
@@ -561,7 +563,7 @@ class SeuratClusterStats(Proc):
|
|
|
561
563
|
- res (type=int): The resolution of the plots.
|
|
562
564
|
- height (type=int): The height of the plots.
|
|
563
565
|
- width (type=int): The width of the plots.
|
|
564
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
566
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
565
567
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
566
568
|
- save_data (flag): Whether to save the data used to generate the plot.
|
|
567
569
|
- <more>: Other arguments passed to `scplotter::CellStatPlot`.
|
|
@@ -576,10 +578,7 @@ class SeuratClusterStats(Proc):
|
|
|
576
578
|
>>> }
|
|
577
579
|
ngenes_defaults (ns): The default parameters for `ngenes`.
|
|
578
580
|
The default parameters to plot the number of genes expressed in each cell.
|
|
579
|
-
-
|
|
580
|
-
- group-by: The column name in metadata to group the cells.
|
|
581
|
-
Dodge position will be used to separate the groups.
|
|
582
|
-
- split-by: The column name in metadata to split the cells into different plots.
|
|
581
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
583
582
|
- subset: An expression to subset the cells, will be passed to `tidyrseurat::filter()`.
|
|
584
583
|
- devpars (ns): The device parameters for the plots.
|
|
585
584
|
- res (type=int): The resolution of the plots.
|
|
@@ -603,7 +602,7 @@ class SeuratClusterStats(Proc):
|
|
|
603
602
|
- height (type=int): The height of the plots.
|
|
604
603
|
- width (type=int): The width of the plots.
|
|
605
604
|
- descr: The description of the plot, showing in the report.
|
|
606
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
605
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
607
606
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
608
607
|
- save_data (flag): Whether to save the data used to generate the plot.
|
|
609
608
|
- <more>: Other arguments passed to `scplotter::FeatureStatPlot`.
|
|
@@ -643,6 +642,7 @@ class SeuratClusterStats(Proc):
|
|
|
643
642
|
lang = config.lang.rscript
|
|
644
643
|
envs = {
|
|
645
644
|
"mutaters": {},
|
|
645
|
+
"cache": config.path.tmpdir,
|
|
646
646
|
"clustrees_defaults": {
|
|
647
647
|
"devpars": {"res": 100},
|
|
648
648
|
"more_formats": [],
|
|
@@ -667,10 +667,8 @@ class SeuratClusterStats(Proc):
|
|
|
667
667
|
},
|
|
668
668
|
},
|
|
669
669
|
"ngenes_defaults": {
|
|
670
|
-
"ident": "seurat_clusters",
|
|
671
|
-
"group-by": None,
|
|
672
|
-
"split-by": None,
|
|
673
670
|
"subset": None,
|
|
671
|
+
"more_formats": [],
|
|
674
672
|
"devpars": {"res": 100, "height": 800, "width": 1000},
|
|
675
673
|
},
|
|
676
674
|
"ngenes": {
|
|
@@ -702,7 +700,10 @@ class SeuratClusterStats(Proc):
|
|
|
702
700
|
},
|
|
703
701
|
}
|
|
704
702
|
script = "file://../scripts/scrna/SeuratClusterStats.R"
|
|
705
|
-
plugin_opts = {
|
|
703
|
+
plugin_opts = {
|
|
704
|
+
"report": "file://../reports/common.svelte",
|
|
705
|
+
"report_paging": 8,
|
|
706
|
+
}
|
|
706
707
|
|
|
707
708
|
|
|
708
709
|
class ModuleScoreCalculator(Proc):
|
|
@@ -781,7 +782,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
781
782
|
""" # noqa: E501
|
|
782
783
|
|
|
783
784
|
input = "srtobj:file"
|
|
784
|
-
output = "rdsfile:file:{{in.srtobj | stem}}.
|
|
785
|
+
output = "rdsfile:file:{{in.srtobj | stem}}.qs"
|
|
785
786
|
lang = config.lang.rscript
|
|
786
787
|
envs = {
|
|
787
788
|
"defaults": {
|
|
@@ -805,9 +806,11 @@ class ModuleScoreCalculator(Proc):
|
|
|
805
806
|
script = "file://../scripts/scrna/ModuleScoreCalculator.R"
|
|
806
807
|
|
|
807
808
|
|
|
808
|
-
@
|
|
809
|
-
|
|
810
|
-
|
|
809
|
+
@mark(
|
|
810
|
+
deprecated=(
|
|
811
|
+
"[{proc.name}] is deprecated, "
|
|
812
|
+
"use [SeuratClusterStats] or [ClonalStats] instead."
|
|
813
|
+
)
|
|
811
814
|
)
|
|
812
815
|
class CellsDistribution(Proc):
|
|
813
816
|
"""Distribution of cells (i.e. in a TCR clone) from different groups
|
|
@@ -846,7 +849,6 @@ class CellsDistribution(Proc):
|
|
|
846
849
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
847
850
|
Keys are the names of the mutaters and values are the R expressions
|
|
848
851
|
passed by `dplyr::mutate()` to mutate the metadata.
|
|
849
|
-
%(mutate_helpers_clonesize)s
|
|
850
852
|
|
|
851
853
|
cluster_orderby: The order of the clusters to show on the plot.
|
|
852
854
|
An expression passed to `dplyr::summarise()` on the grouped data frame (by `seurat_clusters`).
|
|
@@ -889,7 +891,6 @@ class CellsDistribution(Proc):
|
|
|
889
891
|
value as the case/section name.
|
|
890
892
|
section: The section to show in the report. This allows different cases to be put in the same section in report.
|
|
891
893
|
Only works when `each` is not specified.
|
|
892
|
-
%(envs_section_each)s
|
|
893
894
|
overlap (list): Plot the overlap of cell groups (values of `cells_by`) in different cases
|
|
894
895
|
under the same section.
|
|
895
896
|
The section must have at least 2 cases, each case should have a single `cells_by` column.
|
|
@@ -936,7 +937,6 @@ class CellsDistribution(Proc):
|
|
|
936
937
|
}
|
|
937
938
|
|
|
938
939
|
|
|
939
|
-
@format_placeholder(mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED)
|
|
940
940
|
class SeuratMetadataMutater(Proc):
|
|
941
941
|
"""Mutate the metadata of the seurat object
|
|
942
942
|
|
|
@@ -947,12 +947,11 @@ class SeuratMetadataMutater(Proc):
|
|
|
947
947
|
cells.
|
|
948
948
|
|
|
949
949
|
Output:
|
|
950
|
-
|
|
950
|
+
outfile: The seurat object with the additional metadata
|
|
951
951
|
|
|
952
952
|
Envs:
|
|
953
953
|
mutaters (type=json): The mutaters to mutate the metadata.
|
|
954
954
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
955
|
-
%(mutate_helpers_clonesize)s
|
|
956
955
|
|
|
957
956
|
Requires:
|
|
958
957
|
r-seurat:
|
|
@@ -964,12 +963,13 @@ class SeuratMetadataMutater(Proc):
|
|
|
964
963
|
""" # noqa: E501
|
|
965
964
|
|
|
966
965
|
input = "srtobj:file, metafile:file"
|
|
967
|
-
output = "
|
|
966
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
968
967
|
lang = config.lang.rscript
|
|
969
968
|
envs = {"mutaters": {}}
|
|
970
969
|
script = "file://../scripts/scrna/SeuratMetadataMutater.R"
|
|
971
970
|
|
|
972
971
|
|
|
972
|
+
@mark(deprecated="[{proc.name}] is deprecated, use [SeuratClusterStats] instead.")
|
|
973
973
|
class DimPlots(Proc):
|
|
974
974
|
"""Seurat - Dimensional reduction plots
|
|
975
975
|
|
|
@@ -999,9 +999,6 @@ class DimPlots(Proc):
|
|
|
999
999
|
}
|
|
1000
1000
|
|
|
1001
1001
|
|
|
1002
|
-
@format_placeholder(
|
|
1003
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
1004
|
-
)
|
|
1005
1002
|
class MarkersFinder(Proc):
|
|
1006
1003
|
"""Find markers between different groups of cells
|
|
1007
1004
|
|
|
@@ -1026,21 +1023,24 @@ class MarkersFinder(Proc):
|
|
|
1026
1023
|
* Used in `future::plan(strategy = "multicore", workers = <ncores>)` to parallelize some Seurat procedures.
|
|
1027
1024
|
* See also: <https://satijalab.org/seurat/articles/future_vignette.html>
|
|
1028
1025
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
1029
|
-
%(mutate_helpers_clonesize)s
|
|
1030
|
-
ident-1: The first group of cells to compare
|
|
1031
|
-
ident-2: The second group of cells to compare
|
|
1032
|
-
If not provided, the rest of the cells are used for `ident-2`.
|
|
1033
1026
|
group-by: The column name in metadata to group the cells.
|
|
1034
1027
|
If only `group-by` is specified, and `ident-1` and `ident-2` are
|
|
1035
1028
|
not specified, markers will be found for all groups in this column
|
|
1036
1029
|
in the manner of "group vs rest" comparison.
|
|
1037
1030
|
`NA` group will be ignored.
|
|
1031
|
+
If `None`, `Seurat::Idents(srtobj)` will be used, which is usually
|
|
1032
|
+
`"seurat_clusters"` after unsupervised clustering.
|
|
1033
|
+
ident-1: The first group of cells to compare
|
|
1034
|
+
When this is empty, the comparisons will be expanded to each group v.s. the rest of the cells in `group-by`.
|
|
1035
|
+
ident-2: The second group of cells to compare
|
|
1036
|
+
If not provided, the rest of the cells are used for `ident-2`.
|
|
1038
1037
|
each: The column name in metadata to separate the cells into different
|
|
1039
1038
|
cases.
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1039
|
+
When this is specified, the case will be expanded for each value of
|
|
1040
|
+
the column in metadata. For example, when you have `envs.cases."Cluster Markers".each = "Sample"`,
|
|
1041
|
+
then the case will be expanded as `envs.cases."Cluster Markers - Sample1"`, `envs.cases."Cluster Markers - Sample2"`, etc.
|
|
1042
|
+
You can specify `allmarker_plots` and `overlaps` to plot the markers for all cases in the same plot and plot the overlaps of the markers
|
|
1043
|
+
between different cases by values in this column.
|
|
1044
1044
|
dbs (list): The dbs to do enrichment analysis for significant
|
|
1045
1045
|
markers See below for all libraries.
|
|
1046
1046
|
<https://maayanlab.cloud/Enrichr/#libraries>
|
|
@@ -1050,24 +1050,28 @@ class MarkersFinder(Proc):
|
|
|
1050
1050
|
`p_val_adj`. For example, `"p_val_adj < 0.05 & abs(avg_log2FC) > 1"`
|
|
1051
1051
|
to select markers with adjusted p-value < 0.05 and absolute log2
|
|
1052
1052
|
fold change > 1.
|
|
1053
|
+
enrich_style (choice): The style of the enrichment analysis.
|
|
1054
|
+
The enrichment analysis will be done by `EnrichIt()` from [`enrichit`](https://pwwang.github.io/enrichit/).
|
|
1055
|
+
Two styles are available:
|
|
1056
|
+
- enrichr: `enrichr` style enrichment analysis (fisher's exact test will be used).
|
|
1057
|
+
- clusterprofiler: `clusterProfiler` style enrichment analysis (hypergeometric test will be used).
|
|
1058
|
+
- clusterProfiler: alias for `clusterprofiler`
|
|
1053
1059
|
assay: The assay to use.
|
|
1054
1060
|
error (flag): Error out if no/not enough markers are found or no pathways are enriched.
|
|
1055
1061
|
If `False`, empty results will be returned.
|
|
1056
|
-
site: The site to use for the `enrichR` enrichment analysis.
|
|
1057
1062
|
subset: An expression to subset the cells for each case.
|
|
1058
|
-
cache (type=auto): Where to cache
|
|
1063
|
+
cache (type=auto): Where to cache the results.
|
|
1059
1064
|
If `True`, cache to `outdir` of the job. If `False`, don't cache.
|
|
1060
1065
|
Otherwise, specify the directory to cache to.
|
|
1061
1066
|
rest (ns): Rest arguments for `Seurat::FindMarkers()`.
|
|
1062
1067
|
Use `-` to replace `.` in the argument name. For example,
|
|
1063
1068
|
use `min-pct` instead of `min.pct`.
|
|
1064
|
-
This only works when `use_presto` is `False`.
|
|
1065
1069
|
- <more>: See <https://satijalab.org/seurat/reference/findmarkers>
|
|
1066
1070
|
allmarker_plots_defaults (ns): Default options for the plots for all markers when `ident-1` is not specified.
|
|
1067
1071
|
- plot_type: The type of the plot.
|
|
1068
1072
|
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1069
1073
|
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1070
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1074
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1071
1075
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1072
1076
|
- devpars (ns): The device parameters for the plots.
|
|
1073
1077
|
- res (type=int): The resolution of the plots.
|
|
@@ -1083,7 +1087,7 @@ class MarkersFinder(Proc):
|
|
|
1083
1087
|
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1084
1088
|
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1085
1089
|
There are two additional types available - `volcano_pct` and `volcano_log2fc`.
|
|
1086
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1090
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1087
1091
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1088
1092
|
- devpars (ns): The device parameters for the plots.
|
|
1089
1093
|
- res (type=int): The resolution of the plots.
|
|
@@ -1096,11 +1100,12 @@ class MarkersFinder(Proc):
|
|
|
1096
1100
|
[`scplotter::VolcanoPlot()`](https://pwwang.github.io/plotthis/reference/VolcanoPlot.html).
|
|
1097
1101
|
marker_plots (type=json): Cases of the plots to generate for the markers.
|
|
1098
1102
|
Plot cases. The keys are the names of the cases and the values are the dicts inherited from `marker_plots_defaults`.
|
|
1103
|
+
The cases under `envs.cases` can inherit this options.
|
|
1099
1104
|
enrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1100
1105
|
- plot_type: The type of the plot.
|
|
1101
1106
|
See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1102
1107
|
Available types are `bar`, `dot`, `lollipop`, `network`, `enrichmap` and `wordcloud`.
|
|
1103
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1108
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1104
1109
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1105
1110
|
- devpars (ns): The device parameters for the plots.
|
|
1106
1111
|
- res (type=int): The resolution of the plots.
|
|
@@ -1109,44 +1114,33 @@ class MarkersFinder(Proc):
|
|
|
1109
1114
|
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.htmll>.
|
|
1110
1115
|
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1111
1116
|
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
If no cases are specified, the default case will be added with the default values under `envs` with the name `DEFAULT`.
|
|
1116
|
-
If you want to put some cases under the same section in the report, you can specify the section name in the case name
|
|
1117
|
-
as a prefix separated by `::`. For example, `section1::case1` and `section1::case2` will be put `case1` and `case2`
|
|
1118
|
-
under the section `section1`.
|
|
1119
|
-
overlaps_defaults (ns): Default options for investigating the overlapping of significant markers between different cases.
|
|
1120
|
-
- cases (list): The cases to do the overlapping analysis, including the prefix section name.
|
|
1121
|
-
The case must have `ident-1` specified. When `each` is specified, the case will be expanded.
|
|
1122
|
-
For example, `case1` with `each = "group"`, where `group` has `g1` and `g2`, will be expanded to
|
|
1123
|
-
`case1::g1` and `case1::g2`, or `case1::group - g1` and `case1::group - g2` if `prefix_each` is `True`.
|
|
1124
|
-
There must be at least 2 cases to do the overlapping analysis.
|
|
1117
|
+
The cases under `envs.cases` can inherit this options.
|
|
1118
|
+
overlaps_defaults (ns): Default options for investigating the overlapping of significant markers between different cases or comparisons.
|
|
1119
|
+
This means either `ident-1` should be empty, so that they can be expanded to multiple comparisons.
|
|
1125
1120
|
- sigmarkers: The expression to filter the significant markers for each case.
|
|
1126
1121
|
If not provided, `envs.sigmarkers` will be used.
|
|
1127
|
-
-
|
|
1128
|
-
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
- save_code (flag): Whether to save the code to generate the plot.
|
|
1142
|
-
- devpars (ns): The device parameters for the plots.
|
|
1143
|
-
- res (type=int): The resolution of the plots.
|
|
1144
|
-
- height (type=int): The height of the plots.
|
|
1145
|
-
- width (type=int): The width of the plots.
|
|
1146
|
-
- <more>: More arguments pased to `plotthis::UpsetPlot()`.
|
|
1147
|
-
https://pwwang.github.io/plotthis/reference/upsetplot1.html
|
|
1148
|
-
overlaps (type=json): Cases for investigating the overlapping of significant markers between different cases.
|
|
1122
|
+
- plot_type (choice): The type of the plot to generate for the overlaps.
|
|
1123
|
+
- venn: Use `plotthis::VennDiagram()`.
|
|
1124
|
+
- upset: Use `plotthis::UpsetPlot()`.
|
|
1125
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1126
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1127
|
+
- devpars (ns): The device parameters for the plots.
|
|
1128
|
+
- res (type=int): The resolution of the plots.
|
|
1129
|
+
- height (type=int): The height of the plots.
|
|
1130
|
+
- width (type=int): The width of the plots.
|
|
1131
|
+
- <more>: More arguments pased to `plotthis::VennDiagram()`
|
|
1132
|
+
(<https://pwwang.github.io/plotthis/reference/venndiagram1.html>)
|
|
1133
|
+
or `plotthis::UpsetPlot()`
|
|
1134
|
+
(<https://pwwang.github.io/plotthis/reference/upsetplot1.html>)
|
|
1135
|
+
overlaps (type=json): Cases for investigating the overlapping of significant markers between different cases or comparisons.
|
|
1149
1136
|
The keys are the names of the cases and the values are the dicts inherited from `overlaps_defaults`.
|
|
1137
|
+
There are two situations that we can perform overlaps:
|
|
1138
|
+
1. If `ident-1` is not specified, the overlaps can be performed between different comparisons.
|
|
1139
|
+
2. If `each` is specified, the overlaps can be performed between different cases, where in each case, `ident-1` must be specified.
|
|
1140
|
+
cases (type=json): If you have multiple cases for marker discovery, you can specify them
|
|
1141
|
+
here. The keys are the names of the cases and the values are the above options. If some options are
|
|
1142
|
+
not specified, the default values specified above (under `envs`) will be used.
|
|
1143
|
+
If no cases are specified, the default case will be added with the default values under `envs` with the name `Marker Discovery`.
|
|
1150
1144
|
""" # noqa: E501
|
|
1151
1145
|
|
|
1152
1146
|
input = "srtobj:file"
|
|
@@ -1155,20 +1149,18 @@ class MarkersFinder(Proc):
|
|
|
1155
1149
|
envs = {
|
|
1156
1150
|
"ncores": config.misc.ncores,
|
|
1157
1151
|
"mutaters": {},
|
|
1152
|
+
"group-by": None,
|
|
1158
1153
|
"ident-1": None,
|
|
1159
1154
|
"ident-2": None,
|
|
1160
|
-
"group-by": "seurat_clusters",
|
|
1161
1155
|
"each": None,
|
|
1162
|
-
"prefix_each": True,
|
|
1163
|
-
"prefix_group": True,
|
|
1164
|
-
"assay": None,
|
|
1165
|
-
"subset": None,
|
|
1166
|
-
"error": True,
|
|
1167
|
-
"site": "Enrichr",
|
|
1168
|
-
"rest": {},
|
|
1169
1156
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1170
1157
|
"sigmarkers": "p_val_adj < 0.05",
|
|
1158
|
+
"enrich_style": "enrichr",
|
|
1159
|
+
"assay": None,
|
|
1160
|
+
"error": True,
|
|
1161
|
+
"subset": None,
|
|
1171
1162
|
"cache": config.path.tmpdir,
|
|
1163
|
+
"rest": {},
|
|
1172
1164
|
"allmarker_plots_defaults": {
|
|
1173
1165
|
"plot_type": None,
|
|
1174
1166
|
"more_formats": [],
|
|
@@ -1199,24 +1191,15 @@ class MarkersFinder(Proc):
|
|
|
1199
1191
|
"enrich_plots": {
|
|
1200
1192
|
"Bar Plot": {"plot_type": "bar", "ncol": 1, "top_term": 10},
|
|
1201
1193
|
},
|
|
1202
|
-
"cases": {},
|
|
1203
1194
|
"overlaps_defaults": {
|
|
1204
|
-
"cases": [],
|
|
1205
1195
|
"sigmarkers": None,
|
|
1206
|
-
"
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
"devpars": {"res": 100},
|
|
1211
|
-
},
|
|
1212
|
-
"upset": {
|
|
1213
|
-
"enabled": True,
|
|
1214
|
-
"more_formats": [],
|
|
1215
|
-
"save_code": False,
|
|
1216
|
-
"devpars": {"res": 100},
|
|
1217
|
-
},
|
|
1196
|
+
"plot_type": "venn",
|
|
1197
|
+
"more_formats": [],
|
|
1198
|
+
"save_code": False,
|
|
1199
|
+
"devpars": {"res": 100},
|
|
1218
1200
|
},
|
|
1219
1201
|
"overlaps": {},
|
|
1202
|
+
"cases": {},
|
|
1220
1203
|
}
|
|
1221
1204
|
order = 5
|
|
1222
1205
|
script = "file://../scripts/scrna/MarkersFinder.R"
|
|
@@ -1230,7 +1213,7 @@ class TopExpressingGenes(Proc):
|
|
|
1230
1213
|
"""Find the top expressing genes in each cluster
|
|
1231
1214
|
|
|
1232
1215
|
Input:
|
|
1233
|
-
srtobj: The seurat object in RDS format
|
|
1216
|
+
srtobj: The seurat object in RDS or qs/qs2 format
|
|
1234
1217
|
|
|
1235
1218
|
Output:
|
|
1236
1219
|
outdir: The output directory for the tables and plots
|
|
@@ -1245,27 +1228,38 @@ class TopExpressingGenes(Proc):
|
|
|
1245
1228
|
group-by: The column name in metadata to group the cells.
|
|
1246
1229
|
each: The column name in metadata to separate the cells into different
|
|
1247
1230
|
cases.
|
|
1248
|
-
When specified, `ident` must be specified
|
|
1249
|
-
prefix_each (flag): Whether to prefix the `each` column name to the
|
|
1250
|
-
value as the case/section name.
|
|
1251
|
-
section: The section name for the report.
|
|
1252
|
-
Worked only when `each` is not specified and `ident` is specified.
|
|
1253
|
-
Otherwise, the section name will be constructed from `each` and
|
|
1254
|
-
`group-by`.
|
|
1255
|
-
If `DEFAULT`, and it's the only section, it not included in the
|
|
1256
|
-
case/section names.
|
|
1257
1231
|
dbs (list): The dbs to do enrichment analysis for significant
|
|
1258
1232
|
markers See below for all libraries.
|
|
1259
1233
|
<https://maayanlab.cloud/Enrichr/#libraries>
|
|
1260
1234
|
n (type=int): The number of top expressing genes to find.
|
|
1235
|
+
enrich_style (choice): The style of the enrichment analysis.
|
|
1236
|
+
The enrichment analysis will be done by `EnrichIt()` from [`enrichit`](https://pwwang.github.io/enrichit/).
|
|
1237
|
+
Two styles are available:
|
|
1238
|
+
- enrichr: `enrichr` style enrichment analysis (fisher's exact test will be used).
|
|
1239
|
+
- clusterprofiler: `clusterProfiler` style enrichment analysis (hypergeometric test will be used).
|
|
1240
|
+
- clusterProfiler: alias for `clusterprofiler`
|
|
1241
|
+
enrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1242
|
+
- plot_type: The type of the plot.
|
|
1243
|
+
See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1244
|
+
Available types are `bar`, `dot`, `lollipop`, `network`, `enrichmap` and `wordcloud`.
|
|
1245
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1246
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1247
|
+
- devpars (ns): The device parameters for the plots.
|
|
1248
|
+
- res (type=int): The resolution of the plots.
|
|
1249
|
+
- height (type=int): The height of the plots.
|
|
1250
|
+
- width (type=int): The width of the plots.
|
|
1251
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.htmll>.
|
|
1252
|
+
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1253
|
+
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1254
|
+
The cases under `envs.cases` can inherit this options.
|
|
1261
1255
|
subset: An expression to subset the cells for each case.
|
|
1262
1256
|
cases (type=json): If you have multiple cases, you can specify them
|
|
1263
1257
|
here. The keys are the names of the cases and the values are the
|
|
1264
1258
|
above options except `mutaters`. If some options are
|
|
1265
1259
|
not specified, the default values specified above will be used.
|
|
1266
1260
|
If no cases are specified, the default case will be added with
|
|
1267
|
-
the default values under `envs` with the name `
|
|
1268
|
-
"""
|
|
1261
|
+
the default values under `envs` with the name `Top Expressing Genes`.
|
|
1262
|
+
""" # noqa: E501
|
|
1269
1263
|
|
|
1270
1264
|
input = "srtobj:file"
|
|
1271
1265
|
output = "outdir:dir:{{in.srtobj | stem}}.top_expressing_genes"
|
|
@@ -1274,13 +1268,20 @@ class TopExpressingGenes(Proc):
|
|
|
1274
1268
|
envs = {
|
|
1275
1269
|
"mutaters": {},
|
|
1276
1270
|
"ident": None,
|
|
1277
|
-
"group-by":
|
|
1271
|
+
"group-by": None,
|
|
1278
1272
|
"each": None,
|
|
1279
|
-
"prefix_each": True,
|
|
1280
|
-
"section": "DEFAULT",
|
|
1281
1273
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1282
1274
|
"n": 250,
|
|
1283
1275
|
"subset": None,
|
|
1276
|
+
"enrich_style": "enrichr",
|
|
1277
|
+
"enrich_plots_defaults": {
|
|
1278
|
+
"more_formats": [],
|
|
1279
|
+
"save_code": False,
|
|
1280
|
+
"devpars": {"res": 100},
|
|
1281
|
+
},
|
|
1282
|
+
"enrich_plots": {
|
|
1283
|
+
"Bar Plot": {"plot_type": "bar", "ncol": 1, "top_term": 10},
|
|
1284
|
+
},
|
|
1284
1285
|
"cases": {},
|
|
1285
1286
|
}
|
|
1286
1287
|
plugin_opts = {
|
|
@@ -1301,7 +1302,7 @@ class ExprImputation(Proc):
|
|
|
1301
1302
|
- [Dijk, David van, et al. "MAGIC: A diffusion-based imputation method reveals gene-gene interactions in single-cell RNA-sequencing data." BioRxiv (2017): 111591.](https://www.cell.com/cell/abstract/S0092-8674(18)30724-4)
|
|
1302
1303
|
|
|
1303
1304
|
Input:
|
|
1304
|
-
infile: The input file in RDS format of Seurat object
|
|
1305
|
+
infile: The input file in RDS/qs format of Seurat object
|
|
1305
1306
|
|
|
1306
1307
|
Output:
|
|
1307
1308
|
outfile: The output file in RDS format of Seurat object
|
|
@@ -1321,6 +1322,9 @@ class ExprImputation(Proc):
|
|
|
1321
1322
|
- refgene: The reference gene file
|
|
1322
1323
|
rmagic_args (ns): The arguments for rmagic
|
|
1323
1324
|
- python: The python path where magic-impute is installed.
|
|
1325
|
+
- threshold (type=float): The threshold for magic imputation.
|
|
1326
|
+
Only the genes with dropout rates greater than this threshold (No. of
|
|
1327
|
+
cells with non-zero expression / total number of cells) will be imputed.
|
|
1324
1328
|
alra_args (type=json): The arguments for `RunALRA()`
|
|
1325
1329
|
|
|
1326
1330
|
Requires:
|
|
@@ -1353,11 +1357,11 @@ class ExprImputation(Proc):
|
|
|
1353
1357
|
""" # noqa: E501
|
|
1354
1358
|
|
|
1355
1359
|
input = "infile:file"
|
|
1356
|
-
output = "outfile:file:{{in.infile | stem}}.imputed.
|
|
1360
|
+
output = "outfile:file:{{in.infile | stem}}.imputed.qs"
|
|
1357
1361
|
lang = config.lang.rscript
|
|
1358
1362
|
envs = {
|
|
1359
1363
|
"tool": "alra",
|
|
1360
|
-
"rmagic_args": {"python": config.exe.magic_python},
|
|
1364
|
+
"rmagic_args": {"python": config.exe.magic_python, "threshold": 0.5},
|
|
1361
1365
|
"scimpute_args": {
|
|
1362
1366
|
"drop_thre": 0.5,
|
|
1363
1367
|
"kcluster": None,
|
|
@@ -1556,10 +1560,6 @@ class SeuratTo10X(Proc):
|
|
|
1556
1560
|
script = "file://../scripts/scrna/SeuratTo10X.R"
|
|
1557
1561
|
|
|
1558
1562
|
|
|
1559
|
-
@format_placeholder(
|
|
1560
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
1561
|
-
envs_section_each=ENVS_SECTION_EACH_INDENTED,
|
|
1562
|
-
)
|
|
1563
1563
|
class ScFGSEA(Proc):
|
|
1564
1564
|
"""Gene set enrichment analysis for cells in different groups using `fgsea`
|
|
1565
1565
|
|
|
@@ -1588,17 +1588,12 @@ class ScFGSEA(Proc):
|
|
|
1588
1588
|
Passed to `nproc` of `fgseaMultilevel()`.
|
|
1589
1589
|
mutaters (type=json): The mutaters to mutate the metadata.
|
|
1590
1590
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
1591
|
-
%(mutate_helpers_clonesize)s
|
|
1592
1591
|
|
|
1593
1592
|
group-by: The column name in metadata to group the cells.
|
|
1594
1593
|
ident-1: The first group of cells to compare
|
|
1595
1594
|
ident-2: The second group of cells to compare, if not provided, the rest of the cells that are not `NA`s in `group-by` column are used for `ident-2`.
|
|
1596
1595
|
each: The column name in metadata to separate the cells into different subsets to do the analysis.
|
|
1597
|
-
prefix_each (flag): Whether to prefix the `each` column name to the values as the case/section name.
|
|
1598
1596
|
subset: An expression to subset the cells.
|
|
1599
|
-
section: The section name for the report. Worked only when `each` is not specified. Otherwise, the section name will be constructed from `each` and its value.
|
|
1600
|
-
This allows different cases to be put into the same section in the report.
|
|
1601
|
-
%(envs_section_each)s
|
|
1602
1597
|
gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
|
|
1603
1598
|
One could also use a URL to a GMT file. For example, from <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/Pathways/>.
|
|
1604
1599
|
method (choice): The method to do the preranking.
|
|
@@ -1629,7 +1624,7 @@ class ScFGSEA(Proc):
|
|
|
1629
1624
|
cases (type=json;order=99): If you have multiple cases, you can specify them here.
|
|
1630
1625
|
The keys are the names of the cases and the values are the above options except `mutaters`.
|
|
1631
1626
|
If some options are not specified, the default values specified above will be used.
|
|
1632
|
-
If no cases are specified, the default case will be added with the name `
|
|
1627
|
+
If no cases are specified, the default case will be added with the name `GSEA`.
|
|
1633
1628
|
|
|
1634
1629
|
Requires:
|
|
1635
1630
|
bioconductor-fgsea:
|
|
@@ -1648,9 +1643,7 @@ class ScFGSEA(Proc):
|
|
|
1648
1643
|
"ident-1": None,
|
|
1649
1644
|
"ident-2": None,
|
|
1650
1645
|
"each": None,
|
|
1651
|
-
"prefix_each": True,
|
|
1652
1646
|
"subset": None,
|
|
1653
|
-
"section": "DEFAULT",
|
|
1654
1647
|
"gmtfile": "",
|
|
1655
1648
|
"method": "s2n",
|
|
1656
1649
|
"top": 20,
|
|
@@ -1701,10 +1694,10 @@ class CellTypeAnnotation(Proc):
|
|
|
1701
1694
|
```
|
|
1702
1695
|
|
|
1703
1696
|
Input:
|
|
1704
|
-
sobjfile: The
|
|
1697
|
+
sobjfile: The single-cell object in RDS/qs/qs2/h5ad format.
|
|
1705
1698
|
|
|
1706
1699
|
Output:
|
|
1707
|
-
outfile: The rds file of seurat object with cell type annotated.
|
|
1700
|
+
outfile: The rds/qs/qs2/h5ad file of seurat object with cell type annotated.
|
|
1708
1701
|
A text file containing the mapping from the old `seurat_clusters` to the new cell types
|
|
1709
1702
|
will be generated and saved to `cluster2celltype.tsv` under the job output directory.
|
|
1710
1703
|
|
|
@@ -1761,6 +1754,8 @@ class CellTypeAnnotation(Proc):
|
|
|
1761
1754
|
at the cost of increased runtime.
|
|
1762
1755
|
- over_clustering (type=auto): The column name in metadata to use as clusters for majority voting.
|
|
1763
1756
|
Set to `False` to disable over-clustering.
|
|
1757
|
+
When `in.sobjfile` is rds/qs/qs2 (supposing we have a Seurat object), the default ident is used by default.
|
|
1758
|
+
Otherwise, it is False by default.
|
|
1764
1759
|
- assay: When converting a Seurat object to AnnData, the assay to use.
|
|
1765
1760
|
If input is h5seurat, this defaults to RNA.
|
|
1766
1761
|
If input is Seurat object in RDS, this defaults to the default assay.
|
|
@@ -1773,7 +1768,8 @@ class CellTypeAnnotation(Proc):
|
|
|
1773
1768
|
An RDS file will be generated for other tools.
|
|
1774
1769
|
- input: Use the same file type as the input.
|
|
1775
1770
|
- rds: Use RDS file.
|
|
1776
|
-
-
|
|
1771
|
+
- qs: Use qs2 file.
|
|
1772
|
+
- qs2: Use qs2 file.
|
|
1777
1773
|
- h5ad: Use AnnData file.
|
|
1778
1774
|
|
|
1779
1775
|
Requires:
|
|
@@ -1816,7 +1812,7 @@ class CellTypeAnnotation(Proc):
|
|
|
1816
1812
|
"model": None,
|
|
1817
1813
|
"python": config.lang.python,
|
|
1818
1814
|
"majority_voting": True,
|
|
1819
|
-
"over_clustering":
|
|
1815
|
+
"over_clustering": None,
|
|
1820
1816
|
"assay": None,
|
|
1821
1817
|
},
|
|
1822
1818
|
"merge": False,
|
|
@@ -1860,8 +1856,9 @@ class SeuratMap2Ref(Proc):
|
|
|
1860
1856
|
The file type is determined by the extension. `.rds` or `.RDS` for
|
|
1861
1857
|
RDS file, `.h5seurat` or `.h5` for h5seurat file.
|
|
1862
1858
|
refnorm (choice): Normalization method the reference used. The same method will be used for the query.
|
|
1863
|
-
-
|
|
1859
|
+
- LogNormalize: Using [`NormalizeData`](https://satijalab.org/seurat/reference/normalizedata).
|
|
1864
1860
|
- SCTransform: Using [`SCTransform`](https://satijalab.org/seurat/reference/sctransform).
|
|
1861
|
+
- SCT: Alias of SCTransform.
|
|
1865
1862
|
- auto: Automatically detect the normalization method.
|
|
1866
1863
|
If the default assay of reference is `SCT`, then `SCTransform` will be used.
|
|
1867
1864
|
split_by: The column name in metadata to split the query into multiple objects.
|
|
@@ -1900,9 +1897,19 @@ class SeuratMap2Ref(Proc):
|
|
|
1900
1897
|
- refdata (type=json): Extra data to transfer from the reference to the query.
|
|
1901
1898
|
- <more>: See <https://satijalab.org/seurat/reference/mapquery>.
|
|
1902
1899
|
Note that the hyphen (`-`) will be transformed into `.` for the keys.
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1900
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
1901
|
+
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
1902
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
1903
|
+
the input and envs of the process.
|
|
1904
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
1905
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
1906
|
+
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
1907
|
+
`<signature>.RDS` in the cache directory.
|
|
1908
|
+
plots (type=json): The plots to generate.
|
|
1909
|
+
The keys are the names of the plots and the values are the arguments for the plot.
|
|
1910
|
+
The arguments will be passed to `biopipen.utils::VizSeuratMap2Ref()` to generate the plots.
|
|
1911
|
+
The plots will be saved to the output directory.
|
|
1912
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizSeuratMap2Ref.html>.
|
|
1906
1913
|
|
|
1907
1914
|
Requires:
|
|
1908
1915
|
r-seurat:
|
|
@@ -1910,8 +1917,9 @@ class SeuratMap2Ref(Proc):
|
|
|
1910
1917
|
""" # noqa: E501
|
|
1911
1918
|
|
|
1912
1919
|
input = "sobjfile:file"
|
|
1913
|
-
output = "outfile:file:{{in.sobjfile | stem}}.
|
|
1920
|
+
output = "outfile:file:{{in.sobjfile | stem}}.qs"
|
|
1914
1921
|
lang = config.lang.rscript
|
|
1922
|
+
envs_depth = 3
|
|
1915
1923
|
envs = {
|
|
1916
1924
|
"ncores": config.misc.ncores,
|
|
1917
1925
|
"use": None,
|
|
@@ -1930,21 +1938,29 @@ class SeuratMap2Ref(Proc):
|
|
|
1930
1938
|
"normalization-method": "LogNormalize",
|
|
1931
1939
|
},
|
|
1932
1940
|
"FindTransferAnchors": {
|
|
1933
|
-
"reference-reduction": "spca",
|
|
1941
|
+
# "reference-reduction": "spca",
|
|
1934
1942
|
},
|
|
1935
1943
|
"MapQuery": {
|
|
1936
|
-
"reference-reduction": "spca",
|
|
1937
|
-
"reduction-model": "wnn.umap",
|
|
1944
|
+
# "reference-reduction": "spca",
|
|
1945
|
+
# "reduction-model": "wnn.umap",
|
|
1938
1946
|
"refdata": {
|
|
1939
1947
|
# "celltype-l1": "celltype.l1",
|
|
1940
1948
|
# "celltype-l2": "celltype.l2",
|
|
1941
1949
|
# "predicted_ADT": "ADT",
|
|
1942
1950
|
},
|
|
1943
1951
|
},
|
|
1944
|
-
"
|
|
1952
|
+
"cache": config.path.tmpdir,
|
|
1953
|
+
"plots": {
|
|
1954
|
+
"Mapped Identity": {
|
|
1955
|
+
"features": "{ident}:{use}",
|
|
1956
|
+
},
|
|
1957
|
+
"Mapping Score": {
|
|
1958
|
+
"features": "{ident}.score",
|
|
1959
|
+
},
|
|
1960
|
+
},
|
|
1945
1961
|
}
|
|
1946
1962
|
script = "file://../scripts/scrna/SeuratMap2Ref.R"
|
|
1947
|
-
plugin_opts = {"report": "file://../reports/
|
|
1963
|
+
plugin_opts = {"report": "file://../reports/common.svelte"}
|
|
1948
1964
|
|
|
1949
1965
|
|
|
1950
1966
|
class RadarPlots(Proc):
|
|
@@ -2008,7 +2024,7 @@ class RadarPlots(Proc):
|
|
|
2008
2024
|
///
|
|
2009
2025
|
|
|
2010
2026
|
Input:
|
|
2011
|
-
srtobj: The seurat object in RDS format
|
|
2027
|
+
srtobj: The seurat object in RDS or qs/qs2 format
|
|
2012
2028
|
|
|
2013
2029
|
Output:
|
|
2014
2030
|
outdir: The output directory for the plots
|
|
@@ -2124,10 +2140,7 @@ class RadarPlots(Proc):
|
|
|
2124
2140
|
}
|
|
2125
2141
|
|
|
2126
2142
|
|
|
2127
|
-
@
|
|
2128
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
2129
|
-
envs_section_each=ENVS_SECTION_EACH_INDENTED,
|
|
2130
|
-
)
|
|
2143
|
+
@mark(deprecated=True)
|
|
2131
2144
|
class MetaMarkers(Proc):
|
|
2132
2145
|
"""Find markers between three or more groups of cells, using one-way ANOVA
|
|
2133
2146
|
or Kruskal-Wallis test.
|
|
@@ -2153,7 +2166,6 @@ class MetaMarkers(Proc):
|
|
|
2153
2166
|
ncores (type=int): Number of cores to use to parallelize for genes
|
|
2154
2167
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
2155
2168
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
2156
|
-
%(mutate_helpers_clonesize)s
|
|
2157
2169
|
|
|
2158
2170
|
group-by: The column name in metadata to group the cells.
|
|
2159
2171
|
If only `group-by` is specified, and `idents` are
|
|
@@ -2187,7 +2199,6 @@ class MetaMarkers(Proc):
|
|
|
2187
2199
|
Worked only when `each` is not specified.
|
|
2188
2200
|
Otherwise, the section name will be constructed from `each` and `group-by`.
|
|
2189
2201
|
If `DEFAULT`, and it's the only section, it not included in the case/section names.
|
|
2190
|
-
%(envs_section_each)s
|
|
2191
2202
|
method (choice): The method for the test.
|
|
2192
2203
|
- anova: One-way ANOVA
|
|
2193
2204
|
- kruskal: Kruskal-Wallis test
|
|
@@ -2228,7 +2239,7 @@ class Seurat2AnnData(Proc):
|
|
|
2228
2239
|
"""Convert seurat object to AnnData
|
|
2229
2240
|
|
|
2230
2241
|
Input:
|
|
2231
|
-
sobjfile: The seurat object file, in RDS or
|
|
2242
|
+
sobjfile: The seurat object file, in RDS or qs/qs2 format
|
|
2232
2243
|
|
|
2233
2244
|
Output:
|
|
2234
2245
|
outfile: The AnnData file
|
|
@@ -2249,29 +2260,26 @@ class AnnData2Seurat(Proc):
|
|
|
2249
2260
|
"""Convert AnnData to seurat object
|
|
2250
2261
|
|
|
2251
2262
|
Input:
|
|
2252
|
-
adfile: The AnnData file
|
|
2263
|
+
adfile: The AnnData .h5ad file
|
|
2253
2264
|
|
|
2254
2265
|
Output:
|
|
2255
|
-
outfile: The seurat object file in RDS format
|
|
2266
|
+
outfile: The seurat object file in RDS or qs/qs2 format
|
|
2256
2267
|
|
|
2257
2268
|
Envs:
|
|
2258
2269
|
assay: The assay to use to convert to seurat object.
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
- h5seurat: h5seurat file
|
|
2262
|
-
dotplot_check (type=auto): Whether to do a check with `Seurat::DotPlot`
|
|
2270
|
+
dotplot_check (type=auto): Whether to do a check with a dot plot.
|
|
2271
|
+
(`scplotter::FeatureStatPlot(plot_type = "dot", ..)` will be used)
|
|
2263
2272
|
to see if the conversion is successful.
|
|
2264
2273
|
Set to `False` to disable the check.
|
|
2265
2274
|
If `True`, top 10 variable genes will be used for the check.
|
|
2266
2275
|
You can give a list of genes or a string of genes with comma (`,`) separated
|
|
2267
2276
|
to use for the check.
|
|
2268
|
-
Only works for `outtype = 'rds'`.
|
|
2269
2277
|
"""
|
|
2270
2278
|
|
|
2271
2279
|
input = "adfile:file"
|
|
2272
|
-
output = "outfile:file:{{in.adfile | stem}}.
|
|
2280
|
+
output = "outfile:file:{{in.adfile | stem}}.qs"
|
|
2273
2281
|
lang = config.lang.rscript
|
|
2274
|
-
envs = {"
|
|
2282
|
+
envs = {"assay": "RNA", "dotplot_check": True}
|
|
2275
2283
|
script = "file://../scripts/scrna/AnnData2Seurat.R"
|
|
2276
2284
|
|
|
2277
2285
|
|
|
@@ -2287,7 +2295,7 @@ class ScSimulation(Proc):
|
|
|
2287
2295
|
So this could also work as a unique identifier for the simulation (ie. Sample ID).
|
|
2288
2296
|
|
|
2289
2297
|
Output:
|
|
2290
|
-
outfile: The output Seurat object/SingleCellExperiment in
|
|
2298
|
+
outfile: The output Seurat object/SingleCellExperiment in qs/qs2 format
|
|
2291
2299
|
|
|
2292
2300
|
Envs:
|
|
2293
2301
|
ngenes (type=int): The number of genes to simulate
|
|
@@ -2450,58 +2458,56 @@ class CellCellCommunication(Proc):
|
|
|
2450
2458
|
class CellCellCommunicationPlots(Proc):
|
|
2451
2459
|
"""Visualization for cell-cell communication inference.
|
|
2452
2460
|
|
|
2453
|
-
R package [`CCPlotR`](https://github.com/Sarah145/CCPlotR) is used to visualize
|
|
2454
|
-
the results.
|
|
2455
|
-
|
|
2456
2461
|
Input:
|
|
2457
2462
|
cccfile: The output file from `CellCellCommunication`
|
|
2458
|
-
or a tab-separated file with the following columns: `source`, `target`,
|
|
2459
|
-
`ligand`, `receptor`, and `score`.
|
|
2460
|
-
If so, `in.expfile` can be provided where `exp_df` is needed.
|
|
2461
|
-
expfile: The expression file with the expression of ligands and receptors.
|
|
2462
|
-
Columns include: `cell_type`, `gene` and `mean_exp`.
|
|
2463
2463
|
|
|
2464
2464
|
Output:
|
|
2465
2465
|
outdir: The output directory for the plots.
|
|
2466
2466
|
|
|
2467
2467
|
Envs:
|
|
2468
|
-
score_col: The column name in the input file that contains the score, if
|
|
2469
|
-
the input file is from `CellCellCommunication`.
|
|
2470
|
-
Two alias columns are added in the result file of `CellCellCommunication`,
|
|
2471
|
-
`mag_score` and `spec_score`, which are the magnitude and specificity
|
|
2472
|
-
scores.
|
|
2473
2468
|
subset: An expression to pass to `dplyr::filter()` to subset the ccc data.
|
|
2469
|
+
magnitude: The column name in the data to use as the magnitude of the
|
|
2470
|
+
communication. By default, the second last column will be used.
|
|
2471
|
+
See `li.mt.show_methods()` for the available methods in LIANA. or
|
|
2472
|
+
<https://liana-py.readthedocs.io/en/latest/notebooks/basic_usage.html#Tileplot>
|
|
2473
|
+
specificity: The column name in the data to use as the specificity of the communication.
|
|
2474
|
+
By default, the last column will be used. If the method doesn't have a specificity, set it to None.
|
|
2475
|
+
devpars (ns): The parameters for the plot.
|
|
2476
|
+
- res (type=int): The resolution of the plot
|
|
2477
|
+
- height (type=int): The height of the plot
|
|
2478
|
+
- width (type=int): The width of the plot
|
|
2479
|
+
more_formats (type=list): The additional formats to save the plots.
|
|
2480
|
+
descr: The description of the plot.
|
|
2474
2481
|
cases (type=json): The cases for the plots.
|
|
2475
2482
|
The keys are the names of the cases and the values are the arguments for
|
|
2476
|
-
the plots. The arguments include
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
`width`, and `height`.
|
|
2481
|
-
* section: The section name for the report to group the plots.
|
|
2482
|
-
* <other>: Other arguments for `cc_<kind>` function in `CCPlotR`.
|
|
2483
|
-
See the documentation for more details.
|
|
2484
|
-
Or you can use `?CCPlotR::cc_<kind>` in R.
|
|
2485
|
-
"""
|
|
2483
|
+
the plots. The arguments include the ones inherited from `envs`.
|
|
2484
|
+
<more>: Other arguments passed to
|
|
2485
|
+
[scplotter::CCCPlot](https://pwwang.github.io/scplotter/reference/CCCPlot.html)
|
|
2486
|
+
""" # noqa: E501
|
|
2486
2487
|
|
|
2487
|
-
input = "cccfile:file
|
|
2488
|
-
output = "outdir:dir:{{in.cccfile | stem}}
|
|
2488
|
+
input = "cccfile:file"
|
|
2489
|
+
output = "outdir:dir:{{in.cccfile | stem}}_plots"
|
|
2489
2490
|
lang = config.lang.rscript
|
|
2490
2491
|
envs = {
|
|
2491
|
-
"score_col": "mag_score",
|
|
2492
2492
|
"subset": None,
|
|
2493
|
+
"magnitude": None,
|
|
2494
|
+
"specificity": None,
|
|
2495
|
+
"devpars": {"res": 100},
|
|
2496
|
+
"more_formats": [],
|
|
2497
|
+
"descr": "Cell-cell communication plot",
|
|
2493
2498
|
"cases": {},
|
|
2494
2499
|
}
|
|
2495
2500
|
script = "file://../scripts/scrna/CellCellCommunicationPlots.R"
|
|
2496
2501
|
plugin_opts = {
|
|
2497
|
-
"report": "file://../reports/
|
|
2502
|
+
"report": "file://../reports/common.svelte",
|
|
2498
2503
|
}
|
|
2499
2504
|
|
|
2500
2505
|
|
|
2501
2506
|
class ScVelo(Proc):
|
|
2502
2507
|
"""Velocity analysis for single-cell RNA-seq data
|
|
2503
2508
|
|
|
2504
|
-
This process is implemented based on the Python package `scvelo
|
|
2509
|
+
This process is implemented based on the Python package `scvelo` (v0.3.3).
|
|
2510
|
+
Note that it doesn't work with `numpy>=2`.
|
|
2505
2511
|
|
|
2506
2512
|
Input:
|
|
2507
2513
|
sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
|
|
@@ -2509,18 +2515,20 @@ class ScVelo(Proc):
|
|
|
2509
2515
|
Output:
|
|
2510
2516
|
outfile: The output object with the velocity embeddings and information.
|
|
2511
2517
|
In either RDS, h5seurat or h5ad format, depending on the `envs.outtype`.
|
|
2512
|
-
|
|
2518
|
+
There will be also plots generated in the output directory
|
|
2519
|
+
(parent directory of `outfile`).
|
|
2520
|
+
Note that these plots will not be used in the report, but can be used as
|
|
2521
|
+
supplementary information for the velocity analysis.
|
|
2522
|
+
To visualize the velocity embeddings, you can use the `SeuratClusterStats`
|
|
2523
|
+
process with `v_reduction` provided to one of the `envs.dimplots`.
|
|
2513
2524
|
|
|
2514
2525
|
Envs:
|
|
2515
2526
|
ncores (type=int): Number of cores to use.
|
|
2516
2527
|
group_by: The column name in metadata to group the cells.
|
|
2517
2528
|
Typically, this column should be the cluster id.
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
PCA will be performed.
|
|
2522
|
-
modes (type=auto): The modes to use for the analysis.
|
|
2523
|
-
A list or a string with comma separated values.
|
|
2529
|
+
mode (type=list): The mode to use for the velocity analysis.
|
|
2530
|
+
It should be a subset of `['deterministic', 'stochastic', 'dynamical']`,
|
|
2531
|
+
meaning that we can perform the velocity analysis in multiple modes.
|
|
2524
2532
|
fitting_by (choice): The mode to use for fitting the velocities.
|
|
2525
2533
|
- stochastic: Stochastic mode
|
|
2526
2534
|
- deterministic: Deterministic mode
|
|
@@ -2528,16 +2536,6 @@ class ScVelo(Proc):
|
|
|
2528
2536
|
(both unspliced and spliced) required for a gene.
|
|
2529
2537
|
n_neighbors (type=int): The number of neighbors to use for the velocity graph.
|
|
2530
2538
|
n_pcs (type=int): The number of PCs to use for the velocity graph.
|
|
2531
|
-
stream_smooth (type=float): Multiplication factor for scale in Gaussian kernel
|
|
2532
|
-
around grid point.
|
|
2533
|
-
stream_density (type=float): Controls the closeness of streamlines.
|
|
2534
|
-
When density = 2.0, the domain is divided into a 60x60 grid, whereas
|
|
2535
|
-
density linearly scales this grid. Each cell in the grid can have,
|
|
2536
|
-
at most, one traversing streamline. For different densities in each
|
|
2537
|
-
direction, use a tuple (density_x, density_y).
|
|
2538
|
-
arrow_size (type=float): Scaling factor for the arrow size.
|
|
2539
|
-
arrow_length (type=float): Length of arrows.
|
|
2540
|
-
arrow_density (type=float): Density of arrows.
|
|
2541
2539
|
denoise (flag): Whether to denoise the data.
|
|
2542
2540
|
denoise_topn (type=int): Number of genes with highest likelihood selected to
|
|
2543
2541
|
infer velocity directions.
|
|
@@ -2546,57 +2544,60 @@ class ScVelo(Proc):
|
|
|
2546
2544
|
infer velocity directions.
|
|
2547
2545
|
calculate_velocity_genes (flag): Whether to calculate the velocity genes.
|
|
2548
2546
|
top_n (type=int): The number of top features to plot.
|
|
2549
|
-
res (type=int): The resolution of the plots.
|
|
2550
2547
|
rscript: The path to the Rscript executable used to convert RDS file to AnnData.
|
|
2551
2548
|
if `in.sobjfile` is an RDS file, it will be converted to AnnData file
|
|
2552
2549
|
(h5ad). You need `Seurat`, `SeuratDisk` and `digest` installed.
|
|
2553
2550
|
outtype (choice): The output file type.
|
|
2554
|
-
- input
|
|
2555
|
-
-
|
|
2556
|
-
-
|
|
2557
|
-
-
|
|
2551
|
+
- <input>: The same as the input file type.
|
|
2552
|
+
- h5seurat: h5seurat file
|
|
2553
|
+
- h5ad: h5ad file
|
|
2554
|
+
- qs: qs/qs2 file
|
|
2555
|
+
- qs2: qs2 file
|
|
2556
|
+
- rds: RDS file
|
|
2558
2557
|
"""
|
|
2559
2558
|
|
|
2560
2559
|
input = "sobjfile:file"
|
|
2561
|
-
output =
|
|
2560
|
+
output = (
|
|
2561
|
+
"outfile:file:{{in.sobjfile | stem}}-scvelo."
|
|
2562
|
+
"{{ext0(in.sobjfile) if envs.outtype == '<input>' else envs.outtype}}"
|
|
2563
|
+
)
|
|
2562
2564
|
lang = config.lang.python
|
|
2563
2565
|
envs = {
|
|
2564
2566
|
"ncores": config.misc.ncores,
|
|
2565
2567
|
"group_by": "seurat_clusters",
|
|
2566
|
-
"
|
|
2567
|
-
"modes": ["stochastic", "deterministic", "dynamical"],
|
|
2568
|
+
"mode": ["deterministic", "stochastic", "dynamical"],
|
|
2568
2569
|
"fitting_by": "stochastic",
|
|
2569
2570
|
"min_shared_counts": 30,
|
|
2570
2571
|
"n_neighbors": 30,
|
|
2571
2572
|
"n_pcs": 30,
|
|
2572
|
-
"stream_smooth": 0.5,
|
|
2573
|
-
"stream_density": 2.0,
|
|
2574
|
-
"arrow_size": 5.0,
|
|
2575
|
-
"arrow_length": 5.0,
|
|
2576
|
-
"arrow_density": 0.5,
|
|
2577
2573
|
"denoise": False,
|
|
2578
2574
|
"denoise_topn": 3,
|
|
2579
2575
|
"kinetics": False,
|
|
2580
2576
|
"kinetics_topn": 100,
|
|
2581
2577
|
"calculate_velocity_genes": False,
|
|
2582
2578
|
"top_n": 6,
|
|
2583
|
-
"res": 100,
|
|
2584
2579
|
"rscript": config.lang.rscript,
|
|
2585
|
-
"outtype": "input",
|
|
2580
|
+
"outtype": "<input>",
|
|
2586
2581
|
}
|
|
2587
2582
|
script = "file://../scripts/scrna/ScVelo.py"
|
|
2588
2583
|
|
|
2589
2584
|
|
|
2590
|
-
class
|
|
2591
|
-
"""Trajectory inference using
|
|
2585
|
+
class Slingshot(Proc):
|
|
2586
|
+
"""Trajectory inference using Slingshot
|
|
2592
2587
|
|
|
2593
2588
|
This process is implemented based on the R package `slingshot`.
|
|
2594
2589
|
|
|
2595
2590
|
Input:
|
|
2596
|
-
sobjfile: The seurat object file in RDS.
|
|
2591
|
+
sobjfile: The seurat object file in RDS or qs format.
|
|
2597
2592
|
|
|
2598
2593
|
Output:
|
|
2599
2594
|
outfile: The output object with the trajectory information.
|
|
2595
|
+
The lineages are stored in the metadata of the seurat object at
|
|
2596
|
+
columns `LineageX`, where X is the lineage number. The `BranchID`
|
|
2597
|
+
column contains the branch id for each cell.
|
|
2598
|
+
One can use
|
|
2599
|
+
`scplotter::CellDimPlot(object, lineages = c("Lineage1", "Lineage2", ...))`
|
|
2600
|
+
to visualize the trajectories.
|
|
2600
2601
|
|
|
2601
2602
|
Envs:
|
|
2602
2603
|
group_by: The column name in metadata to group the cells.
|
|
@@ -2605,8 +2606,8 @@ class SlingShot(Proc):
|
|
|
2605
2606
|
dims (type=auto): The dimensions to use for the analysis.
|
|
2606
2607
|
A list or a string with comma separated values.
|
|
2607
2608
|
Consecutive numbers can be specified with a colon (`:`) or a dash (`-`).
|
|
2608
|
-
start: The starting group for the
|
|
2609
|
-
end: The ending group for the
|
|
2609
|
+
start: The starting group for the Slingshot analysis.
|
|
2610
|
+
end: The ending group for the Slingshot analysis.
|
|
2610
2611
|
prefix: The prefix to add to the column names of the resulting pseudotime variable.
|
|
2611
2612
|
reverse (flag): Logical value indicating whether to reverse the pseudotime variable.
|
|
2612
2613
|
align_start (flag): Whether to align the starting pseudotime values at the maximum pseudotime.
|
|
@@ -2614,7 +2615,7 @@ class SlingShot(Proc):
|
|
|
2614
2615
|
""" # noqa: E501
|
|
2615
2616
|
|
|
2616
2617
|
input = "sobjfile:file"
|
|
2617
|
-
output = "outfile:file:{{in.sobjfile | stem}}.
|
|
2618
|
+
output = "outfile:file:{{in.sobjfile | stem}}.qs"
|
|
2618
2619
|
lang = config.lang.rscript
|
|
2619
2620
|
envs = {
|
|
2620
2621
|
"group_by": "seurat_clusters",
|
|
@@ -2627,7 +2628,7 @@ class SlingShot(Proc):
|
|
|
2627
2628
|
"align_start": False,
|
|
2628
2629
|
"seed": 8525,
|
|
2629
2630
|
}
|
|
2630
|
-
script = "file://../scripts/scrna/
|
|
2631
|
+
script = "file://../scripts/scrna/Slingshot.R"
|
|
2631
2632
|
|
|
2632
2633
|
|
|
2633
2634
|
class LoomTo10X(Proc):
|
|
@@ -2641,6 +2642,7 @@ class LoomTo10X(Proc):
|
|
|
2641
2642
|
including the `matrix.mtx.gz`, `barcodes.tsv.gz` and `features.tsv.gz`
|
|
2642
2643
|
files.
|
|
2643
2644
|
"""
|
|
2645
|
+
|
|
2644
2646
|
input = "loomfile:file"
|
|
2645
2647
|
output = "outdir:dir:{{in.loomfile | stem}}.10X"
|
|
2646
2648
|
lang = config.lang.rscript
|