biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +328 -292
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +481 -215
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +231 -76
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +6 -5
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/ScFGSEA.svelte +0 -16
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
biopipen/ns/scrna.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
"""Tools to analyze single-cell RNA"""
|
|
2
2
|
|
|
3
|
+
from pipen.utils import mark
|
|
3
4
|
from ..core.proc import Proc
|
|
4
5
|
from ..core.config import config
|
|
5
|
-
from ..utils.common_docstrs import (
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
)
|
|
6
|
+
# from ..utils.common_docstrs import (
|
|
7
|
+
# indent_docstr,
|
|
8
|
+
# format_placeholder,
|
|
9
|
+
# MUTATE_HELPERS_CLONESIZE,
|
|
10
|
+
# ENVS_SECTION_EACH,
|
|
11
|
+
# )
|
|
11
12
|
|
|
12
|
-
MUTATE_HELPERS_CLONESIZE_INDENTED = indent_docstr(MUTATE_HELPERS_CLONESIZE, " " * 3)
|
|
13
|
-
ENVS_SECTION_EACH_INDENTED = indent_docstr(ENVS_SECTION_EACH, " " * 3)
|
|
13
|
+
# MUTATE_HELPERS_CLONESIZE_INDENTED = indent_docstr(MUTATE_HELPERS_CLONESIZE, " " * 3)
|
|
14
|
+
# ENVS_SECTION_EACH_INDENTED = indent_docstr(ENVS_SECTION_EACH, " " * 3)
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class SeuratLoading(Proc):
|
|
@@ -60,7 +61,8 @@ class SeuratPreparing(Proc):
|
|
|
60
61
|
Those paths should be either paths to directoies containing `matrix.mtx`,
|
|
61
62
|
`barcodes.tsv` and `features.tsv` files that can be loaded by
|
|
62
63
|
[`Seurat::Read10X()`](https://satijalab.org/seurat/reference/read10x),
|
|
63
|
-
or paths
|
|
64
|
+
or paths of loom files that can be loaded by `SeuratDisk::LoadLoom()`, or paths to
|
|
65
|
+
`h5` files that can be loaded by
|
|
64
66
|
[`Seurat::Read10X_h5()`](https://satijalab.org/seurat/reference/read10x_h5).
|
|
65
67
|
|
|
66
68
|
Each sample will be loaded individually and then merged into one `Seurat` object, and then perform QC.
|
|
@@ -96,14 +98,24 @@ class SeuratPreparing(Proc):
|
|
|
96
98
|
to the h5 file that can be read by `Read10X_h5()` from `Seurat`.
|
|
97
99
|
|
|
98
100
|
Output:
|
|
99
|
-
|
|
101
|
+
outfile: The qs2 file with the Seurat object with all samples integrated.
|
|
100
102
|
Note that the cell ids are prefixied with sample names.
|
|
101
|
-
QC plots will be saved in `<job.outdir>/plots`.
|
|
102
103
|
|
|
103
104
|
Envs:
|
|
104
105
|
ncores (type=int): Number of cores to use.
|
|
105
106
|
Used in `future::plan(strategy = "multicore", workers = <ncores>)`
|
|
106
107
|
to parallelize some Seurat procedures.
|
|
108
|
+
mutaters (type=json): The mutaters to mutate the metadata to the cells.
|
|
109
|
+
These new columns will be added to the metadata of the Seurat object and
|
|
110
|
+
will be saved in the output file.
|
|
111
|
+
min_cells (type=int): The minimum number of cells that a gene must be
|
|
112
|
+
expressed in to be kept. This is used in `Seurat::CreateSeuratObject()`.
|
|
113
|
+
Futher QC (`envs.cell_qc`, `envs.gene_qc`) will be performed after this.
|
|
114
|
+
It doesn't work when data is loaded from loom files.
|
|
115
|
+
min_features (type=int): The minimum number of features that a cell must
|
|
116
|
+
express to be kept. This is used in `Seurat::CreateSeuratObject()`.
|
|
117
|
+
Futher QC (`envs.cell_qc`, `envs.gene_qc`) will be performed after this.
|
|
118
|
+
It doesn't work when data is loaded from loom files.
|
|
107
119
|
cell_qc: Filter expression to filter cells, using
|
|
108
120
|
`tidyrseurat::filter()`.
|
|
109
121
|
Available QC keys include `nFeature_RNA`, `nCount_RNA`,
|
|
@@ -122,9 +134,6 @@ class SeuratPreparing(Proc):
|
|
|
122
134
|
genes.
|
|
123
135
|
///
|
|
124
136
|
|
|
125
|
-
cell_qc_per_sample (flag): Whether to perform cell QC per sample or not.
|
|
126
|
-
If `True`, the cell QC will be performed per sample, and the QC will be
|
|
127
|
-
applied to each sample before merging.
|
|
128
137
|
gene_qc (ns): Filter genes.
|
|
129
138
|
`gene_qc` is applied after `cell_qc`.
|
|
130
139
|
- min_cells: The minimum number of cells that a gene must be
|
|
@@ -259,34 +268,37 @@ class SeuratPreparing(Proc):
|
|
|
259
268
|
""" # noqa: E501
|
|
260
269
|
|
|
261
270
|
input = "metafile:file"
|
|
262
|
-
output = "
|
|
271
|
+
output = "outfile:file:{{in.metafile | stem}}.seurat.qs"
|
|
263
272
|
lang = config.lang.rscript
|
|
273
|
+
envs_depth = 4
|
|
264
274
|
envs = {
|
|
265
275
|
"ncores": config.misc.ncores,
|
|
276
|
+
"mutaters": {},
|
|
277
|
+
"min_cells": 0,
|
|
278
|
+
"min_features": 0,
|
|
266
279
|
"cell_qc": None, # "nFeature_RNA > 200 & percent.mt < 5",
|
|
267
|
-
"cell_qc_per_sample": False,
|
|
268
280
|
"gene_qc": {"min_cells": 0, "excludes": []},
|
|
269
281
|
"qc_plots": {
|
|
270
|
-
"Violin Plots
|
|
282
|
+
"Violin Plots": {
|
|
271
283
|
"kind": "cell",
|
|
272
284
|
"plot_type": "violin",
|
|
273
285
|
"devpars": {"res": 100, "height": 600, "width": 1200},
|
|
274
286
|
},
|
|
275
|
-
"Scatter Plots
|
|
287
|
+
"Scatter Plots": {
|
|
276
288
|
"kind": "cell",
|
|
277
289
|
"plot_type": "scatter",
|
|
278
290
|
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
279
291
|
},
|
|
280
|
-
"Ridge Plots
|
|
292
|
+
"Ridge Plots": {
|
|
281
293
|
"kind": "cell",
|
|
282
294
|
"plot_type": "ridge",
|
|
283
295
|
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
284
296
|
},
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
297
|
+
"Distribution of number of cells a gene is expressed in": {
|
|
298
|
+
"kind": "gene",
|
|
299
|
+
"plot_type": "histogram",
|
|
300
|
+
"devpars": {"res": 100, "height": 1200, "width": 1200},
|
|
301
|
+
},
|
|
290
302
|
},
|
|
291
303
|
"use_sct": False,
|
|
292
304
|
"no_integration": False,
|
|
@@ -297,6 +309,7 @@ class SeuratPreparing(Proc):
|
|
|
297
309
|
"SCTransform": {
|
|
298
310
|
"return-only-var-genes": True,
|
|
299
311
|
"min_cells": 5,
|
|
312
|
+
"verbose": True,
|
|
300
313
|
},
|
|
301
314
|
"IntegrateLayers": {"method": "harmony"},
|
|
302
315
|
"doublet_detector": "none",
|
|
@@ -306,7 +319,7 @@ class SeuratPreparing(Proc):
|
|
|
306
319
|
}
|
|
307
320
|
script = "file://../scripts/scrna/SeuratPreparing.R"
|
|
308
321
|
plugin_opts = {
|
|
309
|
-
"report": "file://../reports/
|
|
322
|
+
"report": "file://../reports/common.svelte",
|
|
310
323
|
}
|
|
311
324
|
|
|
312
325
|
|
|
@@ -318,24 +331,13 @@ class SeuratClustering(Proc):
|
|
|
318
331
|
srtobj: The seurat object loaded by SeuratPreparing
|
|
319
332
|
|
|
320
333
|
Output:
|
|
321
|
-
|
|
322
|
-
If `SCTransform` was used, the default Assay will be reset to `RNA`.
|
|
334
|
+
outfile: The seurat object with cluster information at `seurat_clusters`.
|
|
323
335
|
|
|
324
336
|
Envs:
|
|
325
337
|
ncores (type=int;order=-100): Number of cores to use.
|
|
326
338
|
Used in `future::plan(strategy = "multicore", workers = <ncores>)`
|
|
327
339
|
to parallelize some Seurat procedures.
|
|
328
340
|
See also: <https://satijalab.org/seurat/articles/future_vignette.html>
|
|
329
|
-
ScaleData (ns): Arguments for [`ScaleData()`](https://satijalab.org/seurat/reference/scaledata).
|
|
330
|
-
If you want to re-scale the data by regressing to some variables, `Seurat::ScaleData`
|
|
331
|
-
will be called. If nothing is specified, `Seurat::ScaleData` will not be called.
|
|
332
|
-
- vars-to-regress: The variables to regress on.
|
|
333
|
-
- <more>: See <https://satijalab.org/seurat/reference/scaledata>
|
|
334
|
-
SCTransform (ns): Arguments for [`SCTransform()`](https://satijalab.org/seurat/reference/sctransform).
|
|
335
|
-
If you want to re-scale the data by regressing to some variables, `Seurat::SCTransform`
|
|
336
|
-
will be called. If nothing is specified, `Seurat::SCTransform` will not be called.
|
|
337
|
-
- vars-to-regress: The variables to regress on.
|
|
338
|
-
- <more>: See <https://satijalab.org/seurat/reference/sctransform>
|
|
339
341
|
RunUMAP (ns): Arguments for [`RunUMAP()`](https://satijalab.org/seurat/reference/runumap).
|
|
340
342
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
341
343
|
`dims=N` will be expanded to `dims=1:N`; The maximal value of `N` will be the minimum of `N` and the number of columns - 1 for each sample.
|
|
@@ -343,6 +345,7 @@ class SeuratClustering(Proc):
|
|
|
343
345
|
- reduction: The reduction to use for UMAP.
|
|
344
346
|
If not provided, `sobj@misc$integrated_new_reduction` will be used.
|
|
345
347
|
- <more>: See <https://satijalab.org/seurat/reference/runumap>
|
|
348
|
+
RunPCA (ns): Arguments for [`RunPCA()`](https://satijalab.org/seurat/reference/runpca).
|
|
346
349
|
FindNeighbors (ns): Arguments for [`FindNeighbors()`](https://satijalab.org/seurat/reference/findneighbors).
|
|
347
350
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
348
351
|
- reduction: The reduction to use.
|
|
@@ -357,14 +360,9 @@ class SeuratClustering(Proc):
|
|
|
357
360
|
The results will be saved in `seurat_clusters_<resolution>`.
|
|
358
361
|
The final resolution will be used to define the clusters at `seurat_clusters`.
|
|
359
362
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
360
|
-
cache (type=auto):
|
|
363
|
+
cache (type=auto): Where to cache the information at different steps.
|
|
361
364
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
362
|
-
|
|
363
|
-
the input and envs of the process.
|
|
364
|
-
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
365
|
-
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
366
|
-
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
367
|
-
`<signature>.RDS` in the cache directory.
|
|
365
|
+
Set to `False` to not cache the results.
|
|
368
366
|
|
|
369
367
|
Requires:
|
|
370
368
|
r-seurat:
|
|
@@ -376,13 +374,12 @@ class SeuratClustering(Proc):
|
|
|
376
374
|
""" # noqa: E501
|
|
377
375
|
|
|
378
376
|
input = "srtobj:file"
|
|
379
|
-
output = "
|
|
377
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
380
378
|
lang = config.lang.rscript
|
|
381
379
|
envs = {
|
|
382
380
|
"ncores": config.misc.ncores,
|
|
383
|
-
"
|
|
384
|
-
"
|
|
385
|
-
"RunUMAP": {"dims": 30},
|
|
381
|
+
"RunPCA": {},
|
|
382
|
+
"RunUMAP": {},
|
|
386
383
|
"FindNeighbors": {},
|
|
387
384
|
"FindClusters": {"resolution": 0.8},
|
|
388
385
|
"cache": config.path.tmpdir,
|
|
@@ -399,15 +396,15 @@ class SeuratSubClustering(Proc):
|
|
|
399
396
|
|
|
400
397
|
For the subset of cells, the reductions will be re-performed on the subset of cells,
|
|
401
398
|
and then the clustering will be performed on the subset of cells. The reduction
|
|
402
|
-
will be saved in `
|
|
403
|
-
clustering will be saved in the metadata of the original object using the casename
|
|
399
|
+
will be saved in `object@reduction$<casename>.<reduction>` of the original object and the
|
|
400
|
+
clustering will be saved in the metadata of the original object using the casename
|
|
404
401
|
as the column name.
|
|
405
402
|
|
|
406
403
|
Input:
|
|
407
|
-
srtobj: The seurat object
|
|
404
|
+
srtobj: The seurat object in RDS or qs/qs2 format.
|
|
408
405
|
|
|
409
406
|
Output:
|
|
410
|
-
|
|
407
|
+
outfile: The seurat object with the subclustering information in qs/qs2 format.
|
|
411
408
|
|
|
412
409
|
Envs:
|
|
413
410
|
ncores (type=int;order=-100): Number of cores to use.
|
|
@@ -417,7 +414,9 @@ class SeuratSubClustering(Proc):
|
|
|
417
414
|
The mutaters will be applied in the order specified.
|
|
418
415
|
subset: An expression to subset the cells, will be passed to
|
|
419
416
|
[`tidyseurat::filter()`](https://stemangiola.github.io/tidyseurat/reference/filter.html).
|
|
420
|
-
|
|
417
|
+
RunPCA (ns): Arguments for [`RunPCA()`](https://satijalab.org/seurat/reference/runpca).
|
|
418
|
+
`object` is specified internally as the subset object, and `-` in the key will be replaced with `.`.
|
|
419
|
+
- <more>: See <https://satijalab.org/seurat/reference/runpca>
|
|
421
420
|
RunUMAP (ns): Arguments for [`RunUMAP()`](https://satijalab.org/seurat/reference/runumap).
|
|
422
421
|
`object` is specified internally as the subset object, and `-` in the key will be replaced with `.`.
|
|
423
422
|
`dims=N` will be expanded to `dims=1:N`; The maximal value of `N` will be the minimum of `N` and the number of columns - 1 for each sample.
|
|
@@ -428,7 +427,7 @@ class SeuratSubClustering(Proc):
|
|
|
428
427
|
FindNeighbors (ns): Arguments for [`FindNeighbors()`](https://satijalab.org/seurat/reference/findneighbors).
|
|
429
428
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
430
429
|
- reduction: The reduction to use.
|
|
431
|
-
If not provided, `
|
|
430
|
+
If not provided, `object@misc$integrated_new_reduction` will be used.
|
|
432
431
|
- <more>: See <https://satijalab.org/seurat/reference/findneighbors>
|
|
433
432
|
FindClusters (ns): Arguments for [`FindClusters()`](https://satijalab.org/seurat/reference/findclusters).
|
|
434
433
|
`object` is specified internally, and `-` in the key will be replaced with `.`.
|
|
@@ -438,32 +437,33 @@ class SeuratSubClustering(Proc):
|
|
|
438
437
|
The results will be saved in `<casename>_<resolution>`.
|
|
439
438
|
The final resolution will be used to define the clusters at `<casename>`.
|
|
440
439
|
- <more>: See <https://satijalab.org/seurat/reference/findclusters>
|
|
441
|
-
cache (type=auto): Whether to cache the
|
|
440
|
+
cache (type=auto): Whether to cache the results.
|
|
442
441
|
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
443
|
-
|
|
444
|
-
the input and envs of the process.
|
|
445
|
-
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
446
|
-
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
447
|
-
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
448
|
-
`<signature>.RDS` in the cache directory.
|
|
442
|
+
Set to `False` to not cache the results.
|
|
449
443
|
cases (type=json): The cases to perform subclustering.
|
|
450
444
|
Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
|
|
451
445
|
If empty, a case with name `subcluster` will be created with default parameters.
|
|
446
|
+
The case name will be passed to `biopipen.utils::SeuratSubCluster()` as `name`.
|
|
447
|
+
It will be used as the prefix for the reduction name, keys and cluster names.
|
|
448
|
+
For reduction keys, it will be `toupper(<name>)` + "PC_" and `toupper(<name>)` + "UMAP_".
|
|
449
|
+
For cluster names, it will be `<name>` + "." + resolution.
|
|
450
|
+
And the final cluster name will be `<name>`.
|
|
451
|
+
Note that the `name` should be alphanumeric and anything other than alphanumeric will be removed.
|
|
452
452
|
""" # noqa: E501
|
|
453
|
-
|
|
454
453
|
input = "srtobj:file"
|
|
455
|
-
output = "
|
|
454
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
456
455
|
lang = config.lang.rscript
|
|
457
456
|
envs_depth = 1
|
|
458
457
|
envs = {
|
|
459
458
|
"ncores": config.misc.ncores,
|
|
460
459
|
"mutaters": {},
|
|
461
460
|
"subset": None,
|
|
462
|
-
"
|
|
461
|
+
"RunPCA": {},
|
|
462
|
+
"RunUMAP": {},
|
|
463
463
|
"FindNeighbors": {},
|
|
464
464
|
"FindClusters": {"resolution": 0.8},
|
|
465
465
|
"cache": config.path.tmpdir,
|
|
466
|
-
"cases": {
|
|
466
|
+
"cases": {},
|
|
467
467
|
}
|
|
468
468
|
script = "file://../scripts/scrna/SeuratSubClustering.R"
|
|
469
469
|
|
|
@@ -533,12 +533,17 @@ class SeuratClusterStats(Proc):
|
|
|
533
533
|
Envs:
|
|
534
534
|
mutaters (type=json): The mutaters to mutate the metadata to subset the cells.
|
|
535
535
|
The mutaters will be applied in the order specified.
|
|
536
|
+
cache (type=auto): Whether to cache the plots.
|
|
537
|
+
Currently only plots for features are supported, since creating the those
|
|
538
|
+
plots can be time consuming.
|
|
539
|
+
If `True`, the plots will be cached in the job output directory, which will
|
|
540
|
+
be not cleaned up when job is rerunning.
|
|
536
541
|
clustrees_defaults (ns): The parameters for the clustree plots.
|
|
537
542
|
- devpars (ns): The device parameters for the clustree plot.
|
|
538
543
|
- res (type=int): The resolution of the plots.
|
|
539
544
|
- height (type=int): The height of the plots.
|
|
540
545
|
- width (type=int): The width of the plots.
|
|
541
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
546
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
542
547
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
543
548
|
- prefix (type=auto): string indicating columns containing clustering information.
|
|
544
549
|
The trailing dot is not necessary and will be added automatically.
|
|
@@ -561,7 +566,7 @@ class SeuratClusterStats(Proc):
|
|
|
561
566
|
- res (type=int): The resolution of the plots.
|
|
562
567
|
- height (type=int): The height of the plots.
|
|
563
568
|
- width (type=int): The width of the plots.
|
|
564
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
569
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
565
570
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
566
571
|
- save_data (flag): Whether to save the data used to generate the plot.
|
|
567
572
|
- <more>: Other arguments passed to `scplotter::CellStatPlot`.
|
|
@@ -576,10 +581,7 @@ class SeuratClusterStats(Proc):
|
|
|
576
581
|
>>> }
|
|
577
582
|
ngenes_defaults (ns): The default parameters for `ngenes`.
|
|
578
583
|
The default parameters to plot the number of genes expressed in each cell.
|
|
579
|
-
-
|
|
580
|
-
- group-by: The column name in metadata to group the cells.
|
|
581
|
-
Dodge position will be used to separate the groups.
|
|
582
|
-
- split-by: The column name in metadata to split the cells into different plots.
|
|
584
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
583
585
|
- subset: An expression to subset the cells, will be passed to `tidyrseurat::filter()`.
|
|
584
586
|
- devpars (ns): The device parameters for the plots.
|
|
585
587
|
- res (type=int): The resolution of the plots.
|
|
@@ -588,9 +590,11 @@ class SeuratClusterStats(Proc):
|
|
|
588
590
|
ngenes (type=json): The number of genes expressed in each cell.
|
|
589
591
|
Keys are the names of the plots and values are the dicts inherited from `env.ngenes_defaults`.
|
|
590
592
|
features_defaults (ns): The default parameters for `features`.
|
|
591
|
-
- features: The features to plot.
|
|
593
|
+
- features (type=auto): The features to plot.
|
|
592
594
|
It can be either a string with comma separated features, a list of features, a file path with `file://` prefix with features
|
|
593
595
|
(one per line), or an integer to use the top N features from `VariantFeatures(srtobj)`.
|
|
596
|
+
It can also be a dict with the keys as the feature group names and the values as the features, which
|
|
597
|
+
is used for heatmap to group the features.
|
|
594
598
|
- order_by (type=auto): The order of the clusters to show on the plot.
|
|
595
599
|
An expression passed to `dplyr::arrange()` on the grouped meta data frame (by `ident`).
|
|
596
600
|
For example, you can order the clusters by the activation score of
|
|
@@ -603,7 +607,7 @@ class SeuratClusterStats(Proc):
|
|
|
603
607
|
- height (type=int): The height of the plots.
|
|
604
608
|
- width (type=int): The width of the plots.
|
|
605
609
|
- descr: The description of the plot, showing in the report.
|
|
606
|
-
- more_formats (list): The formats to save the plots other than `png`.
|
|
610
|
+
- more_formats (type=list): The formats to save the plots other than `png`.
|
|
607
611
|
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
608
612
|
- save_data (flag): Whether to save the data used to generate the plot.
|
|
609
613
|
- <more>: Other arguments passed to `scplotter::FeatureStatPlot`.
|
|
@@ -643,6 +647,7 @@ class SeuratClusterStats(Proc):
|
|
|
643
647
|
lang = config.lang.rscript
|
|
644
648
|
envs = {
|
|
645
649
|
"mutaters": {},
|
|
650
|
+
"cache": config.path.tmpdir,
|
|
646
651
|
"clustrees_defaults": {
|
|
647
652
|
"devpars": {"res": 100},
|
|
648
653
|
"more_formats": [],
|
|
@@ -667,10 +672,8 @@ class SeuratClusterStats(Proc):
|
|
|
667
672
|
},
|
|
668
673
|
},
|
|
669
674
|
"ngenes_defaults": {
|
|
670
|
-
"ident": "seurat_clusters",
|
|
671
|
-
"group-by": None,
|
|
672
|
-
"split-by": None,
|
|
673
675
|
"subset": None,
|
|
676
|
+
"more_formats": [],
|
|
674
677
|
"devpars": {"res": 100, "height": 800, "width": 1000},
|
|
675
678
|
},
|
|
676
679
|
"ngenes": {
|
|
@@ -702,7 +705,10 @@ class SeuratClusterStats(Proc):
|
|
|
702
705
|
},
|
|
703
706
|
}
|
|
704
707
|
script = "file://../scripts/scrna/SeuratClusterStats.R"
|
|
705
|
-
plugin_opts = {
|
|
708
|
+
plugin_opts = {
|
|
709
|
+
"report": "file://../reports/common.svelte",
|
|
710
|
+
"report_paging": 8,
|
|
711
|
+
}
|
|
706
712
|
|
|
707
713
|
|
|
708
714
|
class ModuleScoreCalculator(Proc):
|
|
@@ -781,7 +787,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
781
787
|
""" # noqa: E501
|
|
782
788
|
|
|
783
789
|
input = "srtobj:file"
|
|
784
|
-
output = "rdsfile:file:{{in.srtobj | stem}}.
|
|
790
|
+
output = "rdsfile:file:{{in.srtobj | stem}}.qs"
|
|
785
791
|
lang = config.lang.rscript
|
|
786
792
|
envs = {
|
|
787
793
|
"defaults": {
|
|
@@ -805,9 +811,11 @@ class ModuleScoreCalculator(Proc):
|
|
|
805
811
|
script = "file://../scripts/scrna/ModuleScoreCalculator.R"
|
|
806
812
|
|
|
807
813
|
|
|
808
|
-
@
|
|
809
|
-
|
|
810
|
-
|
|
814
|
+
@mark(
|
|
815
|
+
deprecated=(
|
|
816
|
+
"[{proc.name}] is deprecated, "
|
|
817
|
+
"use [SeuratClusterStats] or [ClonalStats] instead."
|
|
818
|
+
)
|
|
811
819
|
)
|
|
812
820
|
class CellsDistribution(Proc):
|
|
813
821
|
"""Distribution of cells (i.e. in a TCR clone) from different groups
|
|
@@ -846,7 +854,6 @@ class CellsDistribution(Proc):
|
|
|
846
854
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
847
855
|
Keys are the names of the mutaters and values are the R expressions
|
|
848
856
|
passed by `dplyr::mutate()` to mutate the metadata.
|
|
849
|
-
%(mutate_helpers_clonesize)s
|
|
850
857
|
|
|
851
858
|
cluster_orderby: The order of the clusters to show on the plot.
|
|
852
859
|
An expression passed to `dplyr::summarise()` on the grouped data frame (by `seurat_clusters`).
|
|
@@ -889,7 +896,6 @@ class CellsDistribution(Proc):
|
|
|
889
896
|
value as the case/section name.
|
|
890
897
|
section: The section to show in the report. This allows different cases to be put in the same section in report.
|
|
891
898
|
Only works when `each` is not specified.
|
|
892
|
-
%(envs_section_each)s
|
|
893
899
|
overlap (list): Plot the overlap of cell groups (values of `cells_by`) in different cases
|
|
894
900
|
under the same section.
|
|
895
901
|
The section must have at least 2 cases, each case should have a single `cells_by` column.
|
|
@@ -936,7 +942,6 @@ class CellsDistribution(Proc):
|
|
|
936
942
|
}
|
|
937
943
|
|
|
938
944
|
|
|
939
|
-
@format_placeholder(mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED)
|
|
940
945
|
class SeuratMetadataMutater(Proc):
|
|
941
946
|
"""Mutate the metadata of the seurat object
|
|
942
947
|
|
|
@@ -947,12 +952,11 @@ class SeuratMetadataMutater(Proc):
|
|
|
947
952
|
cells.
|
|
948
953
|
|
|
949
954
|
Output:
|
|
950
|
-
|
|
955
|
+
outfile: The seurat object with the additional metadata
|
|
951
956
|
|
|
952
957
|
Envs:
|
|
953
958
|
mutaters (type=json): The mutaters to mutate the metadata.
|
|
954
959
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
955
|
-
%(mutate_helpers_clonesize)s
|
|
956
960
|
|
|
957
961
|
Requires:
|
|
958
962
|
r-seurat:
|
|
@@ -964,12 +968,13 @@ class SeuratMetadataMutater(Proc):
|
|
|
964
968
|
""" # noqa: E501
|
|
965
969
|
|
|
966
970
|
input = "srtobj:file, metafile:file"
|
|
967
|
-
output = "
|
|
971
|
+
output = "outfile:file:{{in.srtobj | stem}}.qs"
|
|
968
972
|
lang = config.lang.rscript
|
|
969
973
|
envs = {"mutaters": {}}
|
|
970
974
|
script = "file://../scripts/scrna/SeuratMetadataMutater.R"
|
|
971
975
|
|
|
972
976
|
|
|
977
|
+
@mark(deprecated="[{proc.name}] is deprecated, use [SeuratClusterStats] instead.")
|
|
973
978
|
class DimPlots(Proc):
|
|
974
979
|
"""Seurat - Dimensional reduction plots
|
|
975
980
|
|
|
@@ -999,9 +1004,6 @@ class DimPlots(Proc):
|
|
|
999
1004
|
}
|
|
1000
1005
|
|
|
1001
1006
|
|
|
1002
|
-
@format_placeholder(
|
|
1003
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
1004
|
-
)
|
|
1005
1007
|
class MarkersFinder(Proc):
|
|
1006
1008
|
"""Find markers between different groups of cells
|
|
1007
1009
|
|
|
@@ -1026,21 +1028,24 @@ class MarkersFinder(Proc):
|
|
|
1026
1028
|
* Used in `future::plan(strategy = "multicore", workers = <ncores>)` to parallelize some Seurat procedures.
|
|
1027
1029
|
* See also: <https://satijalab.org/seurat/articles/future_vignette.html>
|
|
1028
1030
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
1029
|
-
%(mutate_helpers_clonesize)s
|
|
1030
|
-
ident-1: The first group of cells to compare
|
|
1031
|
-
ident-2: The second group of cells to compare
|
|
1032
|
-
If not provided, the rest of the cells are used for `ident-2`.
|
|
1033
1031
|
group-by: The column name in metadata to group the cells.
|
|
1034
1032
|
If only `group-by` is specified, and `ident-1` and `ident-2` are
|
|
1035
1033
|
not specified, markers will be found for all groups in this column
|
|
1036
1034
|
in the manner of "group vs rest" comparison.
|
|
1037
1035
|
`NA` group will be ignored.
|
|
1036
|
+
If `None`, `Seurat::Idents(srtobj)` will be used, which is usually
|
|
1037
|
+
`"seurat_clusters"` after unsupervised clustering.
|
|
1038
|
+
ident-1: The first group of cells to compare
|
|
1039
|
+
When this is empty, the comparisons will be expanded to each group v.s. the rest of the cells in `group-by`.
|
|
1040
|
+
ident-2: The second group of cells to compare
|
|
1041
|
+
If not provided, the rest of the cells are used for `ident-2`.
|
|
1038
1042
|
each: The column name in metadata to separate the cells into different
|
|
1039
1043
|
cases.
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
+
When this is specified, the case will be expanded for each value of
|
|
1045
|
+
the column in metadata. For example, when you have `envs.cases."Cluster Markers".each = "Sample"`,
|
|
1046
|
+
then the case will be expanded as `envs.cases."Cluster Markers - Sample1"`, `envs.cases."Cluster Markers - Sample2"`, etc.
|
|
1047
|
+
You can specify `allmarker_plots` and `overlaps` to plot the markers for all cases in the same plot and plot the overlaps of the markers
|
|
1048
|
+
between different cases by values in this column.
|
|
1044
1049
|
dbs (list): The dbs to do enrichment analysis for significant
|
|
1045
1050
|
markers See below for all libraries.
|
|
1046
1051
|
<https://maayanlab.cloud/Enrichr/#libraries>
|
|
@@ -1050,24 +1055,28 @@ class MarkersFinder(Proc):
|
|
|
1050
1055
|
`p_val_adj`. For example, `"p_val_adj < 0.05 & abs(avg_log2FC) > 1"`
|
|
1051
1056
|
to select markers with adjusted p-value < 0.05 and absolute log2
|
|
1052
1057
|
fold change > 1.
|
|
1058
|
+
enrich_style (choice): The style of the enrichment analysis.
|
|
1059
|
+
The enrichment analysis will be done by `EnrichIt()` from [`enrichit`](https://pwwang.github.io/enrichit/).
|
|
1060
|
+
Two styles are available:
|
|
1061
|
+
- enrichr: `enrichr` style enrichment analysis (fisher's exact test will be used).
|
|
1062
|
+
- clusterprofiler: `clusterProfiler` style enrichment analysis (hypergeometric test will be used).
|
|
1063
|
+
- clusterProfiler: alias for `clusterprofiler`
|
|
1053
1064
|
assay: The assay to use.
|
|
1054
1065
|
error (flag): Error out if no/not enough markers are found or no pathways are enriched.
|
|
1055
1066
|
If `False`, empty results will be returned.
|
|
1056
|
-
site: The site to use for the `enrichR` enrichment analysis.
|
|
1057
1067
|
subset: An expression to subset the cells for each case.
|
|
1058
|
-
cache (type=auto): Where to cache
|
|
1068
|
+
cache (type=auto): Where to cache the results.
|
|
1059
1069
|
If `True`, cache to `outdir` of the job. If `False`, don't cache.
|
|
1060
1070
|
Otherwise, specify the directory to cache to.
|
|
1061
1071
|
rest (ns): Rest arguments for `Seurat::FindMarkers()`.
|
|
1062
1072
|
Use `-` to replace `.` in the argument name. For example,
|
|
1063
1073
|
use `min-pct` instead of `min.pct`.
|
|
1064
|
-
This only works when `use_presto` is `False`.
|
|
1065
1074
|
- <more>: See <https://satijalab.org/seurat/reference/findmarkers>
|
|
1066
1075
|
allmarker_plots_defaults (ns): Default options for the plots for all markers when `ident-1` is not specified.
|
|
1067
1076
|
- plot_type: The type of the plot.
|
|
1068
1077
|
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1069
1078
|
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1070
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1079
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1071
1080
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1072
1081
|
- devpars (ns): The device parameters for the plots.
|
|
1073
1082
|
- res (type=int): The resolution of the plots.
|
|
@@ -1078,12 +1087,22 @@ class MarkersFinder(Proc):
|
|
|
1078
1087
|
- <more>: Other arguments passed to [`scplotter::FeatureStatPlot()`](https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html).
|
|
1079
1088
|
allmarker_plots (type=json): All marker plot cases.
|
|
1080
1089
|
The keys are the names of the cases and the values are the dicts inherited from `allmarker_plots_defaults`.
|
|
1090
|
+
allenrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1091
|
+
- plot_type: The type of the plot.
|
|
1092
|
+
- devpars (ns): The device parameters for the plots.
|
|
1093
|
+
- res (type=int): The resolution of the plots.
|
|
1094
|
+
- height (type=int): The height of the plots.
|
|
1095
|
+
- width (type=int): The width of the plots.
|
|
1096
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1097
|
+
allenrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1098
|
+
The keys are the names of the cases and the values are the dicts inherited from `allenrich_plots_defaults`.
|
|
1099
|
+
The cases under `envs.cases` can inherit this options.
|
|
1081
1100
|
marker_plots_defaults (ns): Default options for the plots to generate for the markers.
|
|
1082
1101
|
- plot_type: The type of the plot.
|
|
1083
1102
|
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1084
1103
|
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1085
1104
|
There are two additional types available - `volcano_pct` and `volcano_log2fc`.
|
|
1086
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1105
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1087
1106
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1088
1107
|
- devpars (ns): The device parameters for the plots.
|
|
1089
1108
|
- res (type=int): The resolution of the plots.
|
|
@@ -1096,11 +1115,12 @@ class MarkersFinder(Proc):
|
|
|
1096
1115
|
[`scplotter::VolcanoPlot()`](https://pwwang.github.io/plotthis/reference/VolcanoPlot.html).
|
|
1097
1116
|
marker_plots (type=json): Cases of the plots to generate for the markers.
|
|
1098
1117
|
Plot cases. The keys are the names of the cases and the values are the dicts inherited from `marker_plots_defaults`.
|
|
1118
|
+
The cases under `envs.cases` can inherit this options.
|
|
1099
1119
|
enrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1100
1120
|
- plot_type: The type of the plot.
|
|
1101
1121
|
See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1102
1122
|
Available types are `bar`, `dot`, `lollipop`, `network`, `enrichmap` and `wordcloud`.
|
|
1103
|
-
- more_formats (list): The extra formats to save the plot in.
|
|
1123
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1104
1124
|
- save_code (flag): Whether to save the code to generate the plot.
|
|
1105
1125
|
- devpars (ns): The device parameters for the plots.
|
|
1106
1126
|
- res (type=int): The resolution of the plots.
|
|
@@ -1109,44 +1129,33 @@ class MarkersFinder(Proc):
|
|
|
1109
1129
|
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.htmll>.
|
|
1110
1130
|
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1111
1131
|
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
If no cases are specified, the default case will be added with the default values under `envs` with the name `DEFAULT`.
|
|
1116
|
-
If you want to put some cases under the same section in the report, you can specify the section name in the case name
|
|
1117
|
-
as a prefix separated by `::`. For example, `section1::case1` and `section1::case2` will be put `case1` and `case2`
|
|
1118
|
-
under the section `section1`.
|
|
1119
|
-
overlaps_defaults (ns): Default options for investigating the overlapping of significant markers between different cases.
|
|
1120
|
-
- cases (list): The cases to do the overlapping analysis, including the prefix section name.
|
|
1121
|
-
The case must have `ident-1` specified. When `each` is specified, the case will be expanded.
|
|
1122
|
-
For example, `case1` with `each = "group"`, where `group` has `g1` and `g2`, will be expanded to
|
|
1123
|
-
`case1::g1` and `case1::g2`, or `case1::group - g1` and `case1::group - g2` if `prefix_each` is `True`.
|
|
1124
|
-
There must be at least 2 cases to do the overlapping analysis.
|
|
1132
|
+
The cases under `envs.cases` can inherit this options.
|
|
1133
|
+
overlaps_defaults (ns): Default options for investigating the overlapping of significant markers between different cases or comparisons.
|
|
1134
|
+
This means either `ident-1` should be empty, so that they can be expanded to multiple comparisons.
|
|
1125
1135
|
- sigmarkers: The expression to filter the significant markers for each case.
|
|
1126
1136
|
If not provided, `envs.sigmarkers` will be used.
|
|
1127
|
-
-
|
|
1128
|
-
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
- save_code (flag): Whether to save the code to generate the plot.
|
|
1142
|
-
- devpars (ns): The device parameters for the plots.
|
|
1143
|
-
- res (type=int): The resolution of the plots.
|
|
1144
|
-
- height (type=int): The height of the plots.
|
|
1145
|
-
- width (type=int): The width of the plots.
|
|
1146
|
-
- <more>: More arguments pased to `plotthis::UpsetPlot()`.
|
|
1147
|
-
https://pwwang.github.io/plotthis/reference/upsetplot1.html
|
|
1148
|
-
overlaps (type=json): Cases for investigating the overlapping of significant markers between different cases.
|
|
1137
|
+
- plot_type (choice): The type of the plot to generate for the overlaps.
|
|
1138
|
+
- venn: Use `plotthis::VennDiagram()`.
|
|
1139
|
+
- upset: Use `plotthis::UpsetPlot()`.
|
|
1140
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1141
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1142
|
+
- devpars (ns): The device parameters for the plots.
|
|
1143
|
+
- res (type=int): The resolution of the plots.
|
|
1144
|
+
- height (type=int): The height of the plots.
|
|
1145
|
+
- width (type=int): The width of the plots.
|
|
1146
|
+
- <more>: More arguments pased to `plotthis::VennDiagram()`
|
|
1147
|
+
(<https://pwwang.github.io/plotthis/reference/venndiagram1.html>)
|
|
1148
|
+
or `plotthis::UpsetPlot()`
|
|
1149
|
+
(<https://pwwang.github.io/plotthis/reference/upsetplot1.html>)
|
|
1150
|
+
overlaps (type=json): Cases for investigating the overlapping of significant markers between different cases or comparisons.
|
|
1149
1151
|
The keys are the names of the cases and the values are the dicts inherited from `overlaps_defaults`.
|
|
1152
|
+
There are two situations that we can perform overlaps:
|
|
1153
|
+
1. If `ident-1` is not specified, the overlaps can be performed between different comparisons.
|
|
1154
|
+
2. If `each` is specified, the overlaps can be performed between different cases, where in each case, `ident-1` must be specified.
|
|
1155
|
+
cases (type=json): If you have multiple cases for marker discovery, you can specify them
|
|
1156
|
+
here. The keys are the names of the cases and the values are the above options. If some options are
|
|
1157
|
+
not specified, the default values specified above (under `envs`) will be used.
|
|
1158
|
+
If no cases are specified, the default case will be added with the default values under `envs` with the name `Marker Discovery`.
|
|
1150
1159
|
""" # noqa: E501
|
|
1151
1160
|
|
|
1152
1161
|
input = "srtobj:file"
|
|
@@ -1155,20 +1164,18 @@ class MarkersFinder(Proc):
|
|
|
1155
1164
|
envs = {
|
|
1156
1165
|
"ncores": config.misc.ncores,
|
|
1157
1166
|
"mutaters": {},
|
|
1167
|
+
"group-by": None,
|
|
1158
1168
|
"ident-1": None,
|
|
1159
1169
|
"ident-2": None,
|
|
1160
|
-
"group-by": "seurat_clusters",
|
|
1161
1170
|
"each": None,
|
|
1162
|
-
"prefix_each": True,
|
|
1163
|
-
"prefix_group": True,
|
|
1164
|
-
"assay": None,
|
|
1165
|
-
"subset": None,
|
|
1166
|
-
"error": True,
|
|
1167
|
-
"site": "Enrichr",
|
|
1168
|
-
"rest": {},
|
|
1169
1171
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1170
1172
|
"sigmarkers": "p_val_adj < 0.05",
|
|
1173
|
+
"enrich_style": "enrichr",
|
|
1174
|
+
"assay": None,
|
|
1175
|
+
"error": True,
|
|
1176
|
+
"subset": None,
|
|
1171
1177
|
"cache": config.path.tmpdir,
|
|
1178
|
+
"rest": {},
|
|
1172
1179
|
"allmarker_plots_defaults": {
|
|
1173
1180
|
"plot_type": None,
|
|
1174
1181
|
"more_formats": [],
|
|
@@ -1178,6 +1185,11 @@ class MarkersFinder(Proc):
|
|
|
1178
1185
|
"genes": 10,
|
|
1179
1186
|
},
|
|
1180
1187
|
"allmarker_plots": {},
|
|
1188
|
+
"allenrich_plots_defaults": {
|
|
1189
|
+
"plot_type": "heatmap",
|
|
1190
|
+
"devpars": {"res": 100},
|
|
1191
|
+
},
|
|
1192
|
+
"allenrich_plots": {},
|
|
1181
1193
|
"marker_plots_defaults": {
|
|
1182
1194
|
"plot_type": None,
|
|
1183
1195
|
"more_formats": [],
|
|
@@ -1199,24 +1211,15 @@ class MarkersFinder(Proc):
|
|
|
1199
1211
|
"enrich_plots": {
|
|
1200
1212
|
"Bar Plot": {"plot_type": "bar", "ncol": 1, "top_term": 10},
|
|
1201
1213
|
},
|
|
1202
|
-
"cases": {},
|
|
1203
1214
|
"overlaps_defaults": {
|
|
1204
|
-
"cases": [],
|
|
1205
1215
|
"sigmarkers": None,
|
|
1206
|
-
"
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
"devpars": {"res": 100},
|
|
1211
|
-
},
|
|
1212
|
-
"upset": {
|
|
1213
|
-
"enabled": True,
|
|
1214
|
-
"more_formats": [],
|
|
1215
|
-
"save_code": False,
|
|
1216
|
-
"devpars": {"res": 100},
|
|
1217
|
-
},
|
|
1216
|
+
"plot_type": "venn",
|
|
1217
|
+
"more_formats": [],
|
|
1218
|
+
"save_code": False,
|
|
1219
|
+
"devpars": {"res": 100},
|
|
1218
1220
|
},
|
|
1219
1221
|
"overlaps": {},
|
|
1222
|
+
"cases": {},
|
|
1220
1223
|
}
|
|
1221
1224
|
order = 5
|
|
1222
1225
|
script = "file://../scripts/scrna/MarkersFinder.R"
|
|
@@ -1230,7 +1233,7 @@ class TopExpressingGenes(Proc):
|
|
|
1230
1233
|
"""Find the top expressing genes in each cluster
|
|
1231
1234
|
|
|
1232
1235
|
Input:
|
|
1233
|
-
srtobj: The seurat object in RDS format
|
|
1236
|
+
srtobj: The seurat object in RDS or qs/qs2 format
|
|
1234
1237
|
|
|
1235
1238
|
Output:
|
|
1236
1239
|
outdir: The output directory for the tables and plots
|
|
@@ -1245,27 +1248,38 @@ class TopExpressingGenes(Proc):
|
|
|
1245
1248
|
group-by: The column name in metadata to group the cells.
|
|
1246
1249
|
each: The column name in metadata to separate the cells into different
|
|
1247
1250
|
cases.
|
|
1248
|
-
When specified, `ident` must be specified
|
|
1249
|
-
prefix_each (flag): Whether to prefix the `each` column name to the
|
|
1250
|
-
value as the case/section name.
|
|
1251
|
-
section: The section name for the report.
|
|
1252
|
-
Worked only when `each` is not specified and `ident` is specified.
|
|
1253
|
-
Otherwise, the section name will be constructed from `each` and
|
|
1254
|
-
`group-by`.
|
|
1255
|
-
If `DEFAULT`, and it's the only section, it not included in the
|
|
1256
|
-
case/section names.
|
|
1257
1251
|
dbs (list): The dbs to do enrichment analysis for significant
|
|
1258
1252
|
markers See below for all libraries.
|
|
1259
1253
|
<https://maayanlab.cloud/Enrichr/#libraries>
|
|
1260
1254
|
n (type=int): The number of top expressing genes to find.
|
|
1255
|
+
enrich_style (choice): The style of the enrichment analysis.
|
|
1256
|
+
The enrichment analysis will be done by `EnrichIt()` from [`enrichit`](https://pwwang.github.io/enrichit/).
|
|
1257
|
+
Two styles are available:
|
|
1258
|
+
- enrichr: `enrichr` style enrichment analysis (fisher's exact test will be used).
|
|
1259
|
+
- clusterprofiler: `clusterProfiler` style enrichment analysis (hypergeometric test will be used).
|
|
1260
|
+
- clusterProfiler: alias for `clusterprofiler`
|
|
1261
|
+
enrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1262
|
+
- plot_type: The type of the plot.
|
|
1263
|
+
See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1264
|
+
Available types are `bar`, `dot`, `lollipop`, `network`, `enrichmap` and `wordcloud`.
|
|
1265
|
+
- more_formats (type=list): The extra formats to save the plot in.
|
|
1266
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1267
|
+
- devpars (ns): The device parameters for the plots.
|
|
1268
|
+
- res (type=int): The resolution of the plots.
|
|
1269
|
+
- height (type=int): The height of the plots.
|
|
1270
|
+
- width (type=int): The width of the plots.
|
|
1271
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.htmll>.
|
|
1272
|
+
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1273
|
+
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1274
|
+
The cases under `envs.cases` can inherit this options.
|
|
1261
1275
|
subset: An expression to subset the cells for each case.
|
|
1262
1276
|
cases (type=json): If you have multiple cases, you can specify them
|
|
1263
1277
|
here. The keys are the names of the cases and the values are the
|
|
1264
1278
|
above options except `mutaters`. If some options are
|
|
1265
1279
|
not specified, the default values specified above will be used.
|
|
1266
1280
|
If no cases are specified, the default case will be added with
|
|
1267
|
-
the default values under `envs` with the name `
|
|
1268
|
-
"""
|
|
1281
|
+
the default values under `envs` with the name `Top Expressing Genes`.
|
|
1282
|
+
""" # noqa: E501
|
|
1269
1283
|
|
|
1270
1284
|
input = "srtobj:file"
|
|
1271
1285
|
output = "outdir:dir:{{in.srtobj | stem}}.top_expressing_genes"
|
|
@@ -1274,13 +1288,20 @@ class TopExpressingGenes(Proc):
|
|
|
1274
1288
|
envs = {
|
|
1275
1289
|
"mutaters": {},
|
|
1276
1290
|
"ident": None,
|
|
1277
|
-
"group-by":
|
|
1291
|
+
"group-by": None,
|
|
1278
1292
|
"each": None,
|
|
1279
|
-
"prefix_each": True,
|
|
1280
|
-
"section": "DEFAULT",
|
|
1281
1293
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1282
1294
|
"n": 250,
|
|
1283
1295
|
"subset": None,
|
|
1296
|
+
"enrich_style": "enrichr",
|
|
1297
|
+
"enrich_plots_defaults": {
|
|
1298
|
+
"more_formats": [],
|
|
1299
|
+
"save_code": False,
|
|
1300
|
+
"devpars": {"res": 100},
|
|
1301
|
+
},
|
|
1302
|
+
"enrich_plots": {
|
|
1303
|
+
"Bar Plot": {"plot_type": "bar", "ncol": 1, "top_term": 10},
|
|
1304
|
+
},
|
|
1284
1305
|
"cases": {},
|
|
1285
1306
|
}
|
|
1286
1307
|
plugin_opts = {
|
|
@@ -1301,7 +1322,7 @@ class ExprImputation(Proc):
|
|
|
1301
1322
|
- [Dijk, David van, et al. "MAGIC: A diffusion-based imputation method reveals gene-gene interactions in single-cell RNA-sequencing data." BioRxiv (2017): 111591.](https://www.cell.com/cell/abstract/S0092-8674(18)30724-4)
|
|
1302
1323
|
|
|
1303
1324
|
Input:
|
|
1304
|
-
infile: The input file in RDS format of Seurat object
|
|
1325
|
+
infile: The input file in RDS/qs format of Seurat object
|
|
1305
1326
|
|
|
1306
1327
|
Output:
|
|
1307
1328
|
outfile: The output file in RDS format of Seurat object
|
|
@@ -1321,6 +1342,9 @@ class ExprImputation(Proc):
|
|
|
1321
1342
|
- refgene: The reference gene file
|
|
1322
1343
|
rmagic_args (ns): The arguments for rmagic
|
|
1323
1344
|
- python: The python path where magic-impute is installed.
|
|
1345
|
+
- threshold (type=float): The threshold for magic imputation.
|
|
1346
|
+
Only the genes with dropout rates greater than this threshold (No. of
|
|
1347
|
+
cells with non-zero expression / total number of cells) will be imputed.
|
|
1324
1348
|
alra_args (type=json): The arguments for `RunALRA()`
|
|
1325
1349
|
|
|
1326
1350
|
Requires:
|
|
@@ -1353,11 +1377,11 @@ class ExprImputation(Proc):
|
|
|
1353
1377
|
""" # noqa: E501
|
|
1354
1378
|
|
|
1355
1379
|
input = "infile:file"
|
|
1356
|
-
output = "outfile:file:{{in.infile | stem}}.imputed.
|
|
1380
|
+
output = "outfile:file:{{in.infile | stem}}.imputed.qs"
|
|
1357
1381
|
lang = config.lang.rscript
|
|
1358
1382
|
envs = {
|
|
1359
1383
|
"tool": "alra",
|
|
1360
|
-
"rmagic_args": {"python": config.exe.magic_python},
|
|
1384
|
+
"rmagic_args": {"python": config.exe.magic_python, "threshold": 0.5},
|
|
1361
1385
|
"scimpute_args": {
|
|
1362
1386
|
"drop_thre": 0.5,
|
|
1363
1387
|
"kcluster": None,
|
|
@@ -1556,10 +1580,6 @@ class SeuratTo10X(Proc):
|
|
|
1556
1580
|
script = "file://../scripts/scrna/SeuratTo10X.R"
|
|
1557
1581
|
|
|
1558
1582
|
|
|
1559
|
-
@format_placeholder(
|
|
1560
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
1561
|
-
envs_section_each=ENVS_SECTION_EACH_INDENTED,
|
|
1562
|
-
)
|
|
1563
1583
|
class ScFGSEA(Proc):
|
|
1564
1584
|
"""Gene set enrichment analysis for cells in different groups using `fgsea`
|
|
1565
1585
|
|
|
@@ -1588,17 +1608,12 @@ class ScFGSEA(Proc):
|
|
|
1588
1608
|
Passed to `nproc` of `fgseaMultilevel()`.
|
|
1589
1609
|
mutaters (type=json): The mutaters to mutate the metadata.
|
|
1590
1610
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
1591
|
-
%(mutate_helpers_clonesize)s
|
|
1592
1611
|
|
|
1593
1612
|
group-by: The column name in metadata to group the cells.
|
|
1594
1613
|
ident-1: The first group of cells to compare
|
|
1595
1614
|
ident-2: The second group of cells to compare, if not provided, the rest of the cells that are not `NA`s in `group-by` column are used for `ident-2`.
|
|
1596
1615
|
each: The column name in metadata to separate the cells into different subsets to do the analysis.
|
|
1597
|
-
prefix_each (flag): Whether to prefix the `each` column name to the values as the case/section name.
|
|
1598
1616
|
subset: An expression to subset the cells.
|
|
1599
|
-
section: The section name for the report. Worked only when `each` is not specified. Otherwise, the section name will be constructed from `each` and its value.
|
|
1600
|
-
This allows different cases to be put into the same section in the report.
|
|
1601
|
-
%(envs_section_each)s
|
|
1602
1617
|
gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
|
|
1603
1618
|
One could also use a URL to a GMT file. For example, from <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/Pathways/>.
|
|
1604
1619
|
method (choice): The method to do the preranking.
|
|
@@ -1622,6 +1637,15 @@ class ScFGSEA(Proc):
|
|
|
1622
1637
|
If it is < 1, will apply it to `padj`, selecting pathways with `padj` < `top`.
|
|
1623
1638
|
eps (type=float): This parameter sets the boundary for calculating the p value.
|
|
1624
1639
|
See <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
|
|
1640
|
+
allpathway_plots_defaults (ns): Default options for the plots to generate for all pathways.
|
|
1641
|
+
- plot_type: The type of the plot, currently either dot or heatmap (default)
|
|
1642
|
+
- devpars (ns): The device parameters for the plots.
|
|
1643
|
+
- res (type=int): The resolution of the plots.
|
|
1644
|
+
- height (type=int): The height of the plots.
|
|
1645
|
+
- width (type=int): The width of the plots.
|
|
1646
|
+
- <more>: See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
|
|
1647
|
+
allpathway_plots (type=json): Cases of the plots to generate for all pathways.
|
|
1648
|
+
The keys are the names of the cases and the values are the dicts inherited from `allpathway_plots_defaults`.
|
|
1625
1649
|
minsize (type=int): Minimal size of a gene set to test. All pathways below the threshold are excluded.
|
|
1626
1650
|
maxsize (type=int): Maximal size of a gene set to test. All pathways above the threshold are excluded.
|
|
1627
1651
|
rest (type=json;order=98): Rest arguments for [`fgsea()`](https://rdrr.io/bioc/fgsea/man/fgsea.html)
|
|
@@ -1629,7 +1653,7 @@ class ScFGSEA(Proc):
|
|
|
1629
1653
|
cases (type=json;order=99): If you have multiple cases, you can specify them here.
|
|
1630
1654
|
The keys are the names of the cases and the values are the above options except `mutaters`.
|
|
1631
1655
|
If some options are not specified, the default values specified above will be used.
|
|
1632
|
-
If no cases are specified, the default case will be added with the name `
|
|
1656
|
+
If no cases are specified, the default case will be added with the name `GSEA`.
|
|
1633
1657
|
|
|
1634
1658
|
Requires:
|
|
1635
1659
|
bioconductor-fgsea:
|
|
@@ -1648,21 +1672,24 @@ class ScFGSEA(Proc):
|
|
|
1648
1672
|
"ident-1": None,
|
|
1649
1673
|
"ident-2": None,
|
|
1650
1674
|
"each": None,
|
|
1651
|
-
"prefix_each": True,
|
|
1652
1675
|
"subset": None,
|
|
1653
|
-
"
|
|
1654
|
-
"gmtfile": "",
|
|
1676
|
+
"gmtfile": "KEGG_2021_Human",
|
|
1655
1677
|
"method": "s2n",
|
|
1656
1678
|
"top": 20,
|
|
1657
1679
|
"minsize": 10,
|
|
1658
1680
|
"maxsize": 100,
|
|
1659
1681
|
"eps": 0,
|
|
1682
|
+
"allpathway_plots_defaults": {
|
|
1683
|
+
"plot_type": "heatmap",
|
|
1684
|
+
"devpars": {"res": 100},
|
|
1685
|
+
},
|
|
1686
|
+
"allpathway_plots": {},
|
|
1660
1687
|
"rest": {},
|
|
1661
1688
|
"cases": {},
|
|
1662
1689
|
}
|
|
1663
1690
|
script = "file://../scripts/scrna/ScFGSEA.R"
|
|
1664
1691
|
plugin_opts = {
|
|
1665
|
-
"report": "file://../reports/
|
|
1692
|
+
"report": "file://../reports/common.svelte",
|
|
1666
1693
|
"report_paging": 8,
|
|
1667
1694
|
}
|
|
1668
1695
|
|
|
@@ -1701,10 +1728,10 @@ class CellTypeAnnotation(Proc):
|
|
|
1701
1728
|
```
|
|
1702
1729
|
|
|
1703
1730
|
Input:
|
|
1704
|
-
sobjfile: The
|
|
1731
|
+
sobjfile: The single-cell object in RDS/qs/qs2/h5ad format.
|
|
1705
1732
|
|
|
1706
1733
|
Output:
|
|
1707
|
-
outfile: The rds file of seurat object with cell type annotated.
|
|
1734
|
+
outfile: The rds/qs/qs2/h5ad file of seurat object with cell type annotated.
|
|
1708
1735
|
A text file containing the mapping from the old `seurat_clusters` to the new cell types
|
|
1709
1736
|
will be generated and saved to `cluster2celltype.tsv` under the job output directory.
|
|
1710
1737
|
|
|
@@ -1761,6 +1788,8 @@ class CellTypeAnnotation(Proc):
|
|
|
1761
1788
|
at the cost of increased runtime.
|
|
1762
1789
|
- over_clustering (type=auto): The column name in metadata to use as clusters for majority voting.
|
|
1763
1790
|
Set to `False` to disable over-clustering.
|
|
1791
|
+
When `in.sobjfile` is rds/qs/qs2 (supposing we have a Seurat object), the default ident is used by default.
|
|
1792
|
+
Otherwise, it is False by default.
|
|
1764
1793
|
- assay: When converting a Seurat object to AnnData, the assay to use.
|
|
1765
1794
|
If input is h5seurat, this defaults to RNA.
|
|
1766
1795
|
If input is Seurat object in RDS, this defaults to the default assay.
|
|
@@ -1773,7 +1802,8 @@ class CellTypeAnnotation(Proc):
|
|
|
1773
1802
|
An RDS file will be generated for other tools.
|
|
1774
1803
|
- input: Use the same file type as the input.
|
|
1775
1804
|
- rds: Use RDS file.
|
|
1776
|
-
-
|
|
1805
|
+
- qs: Use qs2 file.
|
|
1806
|
+
- qs2: Use qs2 file.
|
|
1777
1807
|
- h5ad: Use AnnData file.
|
|
1778
1808
|
|
|
1779
1809
|
Requires:
|
|
@@ -1816,7 +1846,7 @@ class CellTypeAnnotation(Proc):
|
|
|
1816
1846
|
"model": None,
|
|
1817
1847
|
"python": config.lang.python,
|
|
1818
1848
|
"majority_voting": True,
|
|
1819
|
-
"over_clustering":
|
|
1849
|
+
"over_clustering": None,
|
|
1820
1850
|
"assay": None,
|
|
1821
1851
|
},
|
|
1822
1852
|
"merge": False,
|
|
@@ -1860,8 +1890,9 @@ class SeuratMap2Ref(Proc):
|
|
|
1860
1890
|
The file type is determined by the extension. `.rds` or `.RDS` for
|
|
1861
1891
|
RDS file, `.h5seurat` or `.h5` for h5seurat file.
|
|
1862
1892
|
refnorm (choice): Normalization method the reference used. The same method will be used for the query.
|
|
1863
|
-
-
|
|
1893
|
+
- LogNormalize: Using [`NormalizeData`](https://satijalab.org/seurat/reference/normalizedata).
|
|
1864
1894
|
- SCTransform: Using [`SCTransform`](https://satijalab.org/seurat/reference/sctransform).
|
|
1895
|
+
- SCT: Alias of SCTransform.
|
|
1865
1896
|
- auto: Automatically detect the normalization method.
|
|
1866
1897
|
If the default assay of reference is `SCT`, then `SCTransform` will be used.
|
|
1867
1898
|
split_by: The column name in metadata to split the query into multiple objects.
|
|
@@ -1900,9 +1931,19 @@ class SeuratMap2Ref(Proc):
|
|
|
1900
1931
|
- refdata (type=json): Extra data to transfer from the reference to the query.
|
|
1901
1932
|
- <more>: See <https://satijalab.org/seurat/reference/mapquery>.
|
|
1902
1933
|
Note that the hyphen (`-`) will be transformed into `.` for the keys.
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1934
|
+
cache (type=auto): Whether to cache the information at different steps.
|
|
1935
|
+
If `True`, the seurat object will be cached in the job output directory, which will be not cleaned up when job is rerunning.
|
|
1936
|
+
The cached seurat object will be saved as `<signature>.<kind>.RDS` file, where `<signature>` is the signature determined by
|
|
1937
|
+
the input and envs of the process.
|
|
1938
|
+
See <https://github.com/satijalab/seurat/issues/7849>, <https://github.com/satijalab/seurat/issues/5358> and
|
|
1939
|
+
<https://github.com/satijalab/seurat/issues/6748> for more details also about reproducibility issues.
|
|
1940
|
+
To not use the cached seurat object, you can either set `cache` to `False` or delete the cached file at
|
|
1941
|
+
`<signature>.RDS` in the cache directory.
|
|
1942
|
+
plots (type=json): The plots to generate.
|
|
1943
|
+
The keys are the names of the plots and the values are the arguments for the plot.
|
|
1944
|
+
The arguments will be passed to `biopipen.utils::VizSeuratMap2Ref()` to generate the plots.
|
|
1945
|
+
The plots will be saved to the output directory.
|
|
1946
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizSeuratMap2Ref.html>.
|
|
1906
1947
|
|
|
1907
1948
|
Requires:
|
|
1908
1949
|
r-seurat:
|
|
@@ -1910,8 +1951,9 @@ class SeuratMap2Ref(Proc):
|
|
|
1910
1951
|
""" # noqa: E501
|
|
1911
1952
|
|
|
1912
1953
|
input = "sobjfile:file"
|
|
1913
|
-
output = "outfile:file:{{in.sobjfile | stem}}.
|
|
1954
|
+
output = "outfile:file:{{in.sobjfile | stem}}.qs"
|
|
1914
1955
|
lang = config.lang.rscript
|
|
1956
|
+
envs_depth = 3
|
|
1915
1957
|
envs = {
|
|
1916
1958
|
"ncores": config.misc.ncores,
|
|
1917
1959
|
"use": None,
|
|
@@ -1930,21 +1972,29 @@ class SeuratMap2Ref(Proc):
|
|
|
1930
1972
|
"normalization-method": "LogNormalize",
|
|
1931
1973
|
},
|
|
1932
1974
|
"FindTransferAnchors": {
|
|
1933
|
-
"reference-reduction": "spca",
|
|
1975
|
+
# "reference-reduction": "spca",
|
|
1934
1976
|
},
|
|
1935
1977
|
"MapQuery": {
|
|
1936
|
-
"reference-reduction": "spca",
|
|
1937
|
-
"reduction-model": "wnn.umap",
|
|
1978
|
+
# "reference-reduction": "spca",
|
|
1979
|
+
# "reduction-model": "wnn.umap",
|
|
1938
1980
|
"refdata": {
|
|
1939
1981
|
# "celltype-l1": "celltype.l1",
|
|
1940
1982
|
# "celltype-l2": "celltype.l2",
|
|
1941
1983
|
# "predicted_ADT": "ADT",
|
|
1942
1984
|
},
|
|
1943
1985
|
},
|
|
1944
|
-
"
|
|
1986
|
+
"cache": config.path.tmpdir,
|
|
1987
|
+
"plots": {
|
|
1988
|
+
"Mapped Identity": {
|
|
1989
|
+
"features": "{ident}:{use}",
|
|
1990
|
+
},
|
|
1991
|
+
"Mapping Score": {
|
|
1992
|
+
"features": "{ident}.score",
|
|
1993
|
+
},
|
|
1994
|
+
},
|
|
1945
1995
|
}
|
|
1946
1996
|
script = "file://../scripts/scrna/SeuratMap2Ref.R"
|
|
1947
|
-
plugin_opts = {"report": "file://../reports/
|
|
1997
|
+
plugin_opts = {"report": "file://../reports/common.svelte"}
|
|
1948
1998
|
|
|
1949
1999
|
|
|
1950
2000
|
class RadarPlots(Proc):
|
|
@@ -2008,7 +2058,7 @@ class RadarPlots(Proc):
|
|
|
2008
2058
|
///
|
|
2009
2059
|
|
|
2010
2060
|
Input:
|
|
2011
|
-
srtobj: The seurat object in RDS format
|
|
2061
|
+
srtobj: The seurat object in RDS or qs/qs2 format
|
|
2012
2062
|
|
|
2013
2063
|
Output:
|
|
2014
2064
|
outdir: The output directory for the plots
|
|
@@ -2124,10 +2174,7 @@ class RadarPlots(Proc):
|
|
|
2124
2174
|
}
|
|
2125
2175
|
|
|
2126
2176
|
|
|
2127
|
-
@
|
|
2128
|
-
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
2129
|
-
envs_section_each=ENVS_SECTION_EACH_INDENTED,
|
|
2130
|
-
)
|
|
2177
|
+
@mark(deprecated=True)
|
|
2131
2178
|
class MetaMarkers(Proc):
|
|
2132
2179
|
"""Find markers between three or more groups of cells, using one-way ANOVA
|
|
2133
2180
|
or Kruskal-Wallis test.
|
|
@@ -2153,7 +2200,6 @@ class MetaMarkers(Proc):
|
|
|
2153
2200
|
ncores (type=int): Number of cores to use to parallelize for genes
|
|
2154
2201
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
2155
2202
|
The key-value pairs will be passed the `dplyr::mutate()` to mutate the metadata.
|
|
2156
|
-
%(mutate_helpers_clonesize)s
|
|
2157
2203
|
|
|
2158
2204
|
group-by: The column name in metadata to group the cells.
|
|
2159
2205
|
If only `group-by` is specified, and `idents` are
|
|
@@ -2187,7 +2233,6 @@ class MetaMarkers(Proc):
|
|
|
2187
2233
|
Worked only when `each` is not specified.
|
|
2188
2234
|
Otherwise, the section name will be constructed from `each` and `group-by`.
|
|
2189
2235
|
If `DEFAULT`, and it's the only section, it not included in the case/section names.
|
|
2190
|
-
%(envs_section_each)s
|
|
2191
2236
|
method (choice): The method for the test.
|
|
2192
2237
|
- anova: One-way ANOVA
|
|
2193
2238
|
- kruskal: Kruskal-Wallis test
|
|
@@ -2228,7 +2273,7 @@ class Seurat2AnnData(Proc):
|
|
|
2228
2273
|
"""Convert seurat object to AnnData
|
|
2229
2274
|
|
|
2230
2275
|
Input:
|
|
2231
|
-
sobjfile: The seurat object file, in RDS or
|
|
2276
|
+
sobjfile: The seurat object file, in RDS or qs/qs2 format
|
|
2232
2277
|
|
|
2233
2278
|
Output:
|
|
2234
2279
|
outfile: The AnnData file
|
|
@@ -2249,29 +2294,26 @@ class AnnData2Seurat(Proc):
|
|
|
2249
2294
|
"""Convert AnnData to seurat object
|
|
2250
2295
|
|
|
2251
2296
|
Input:
|
|
2252
|
-
adfile: The AnnData file
|
|
2297
|
+
adfile: The AnnData .h5ad file
|
|
2253
2298
|
|
|
2254
2299
|
Output:
|
|
2255
|
-
outfile: The seurat object file in RDS format
|
|
2300
|
+
outfile: The seurat object file in RDS or qs/qs2 format
|
|
2256
2301
|
|
|
2257
2302
|
Envs:
|
|
2258
2303
|
assay: The assay to use to convert to seurat object.
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
- h5seurat: h5seurat file
|
|
2262
|
-
dotplot_check (type=auto): Whether to do a check with `Seurat::DotPlot`
|
|
2304
|
+
dotplot_check (type=auto): Whether to do a check with a dot plot.
|
|
2305
|
+
(`scplotter::FeatureStatPlot(plot_type = "dot", ..)` will be used)
|
|
2263
2306
|
to see if the conversion is successful.
|
|
2264
2307
|
Set to `False` to disable the check.
|
|
2265
2308
|
If `True`, top 10 variable genes will be used for the check.
|
|
2266
2309
|
You can give a list of genes or a string of genes with comma (`,`) separated
|
|
2267
2310
|
to use for the check.
|
|
2268
|
-
Only works for `outtype = 'rds'`.
|
|
2269
2311
|
"""
|
|
2270
2312
|
|
|
2271
2313
|
input = "adfile:file"
|
|
2272
|
-
output = "outfile:file:{{in.adfile | stem}}.
|
|
2314
|
+
output = "outfile:file:{{in.adfile | stem}}.qs"
|
|
2273
2315
|
lang = config.lang.rscript
|
|
2274
|
-
envs = {"
|
|
2316
|
+
envs = {"assay": "RNA", "dotplot_check": True}
|
|
2275
2317
|
script = "file://../scripts/scrna/AnnData2Seurat.R"
|
|
2276
2318
|
|
|
2277
2319
|
|
|
@@ -2287,7 +2329,7 @@ class ScSimulation(Proc):
|
|
|
2287
2329
|
So this could also work as a unique identifier for the simulation (ie. Sample ID).
|
|
2288
2330
|
|
|
2289
2331
|
Output:
|
|
2290
|
-
outfile: The output Seurat object/SingleCellExperiment in
|
|
2332
|
+
outfile: The output Seurat object/SingleCellExperiment in qs/qs2 format
|
|
2291
2333
|
|
|
2292
2334
|
Envs:
|
|
2293
2335
|
ngenes (type=int): The number of genes to simulate
|
|
@@ -2450,58 +2492,56 @@ class CellCellCommunication(Proc):
|
|
|
2450
2492
|
class CellCellCommunicationPlots(Proc):
|
|
2451
2493
|
"""Visualization for cell-cell communication inference.
|
|
2452
2494
|
|
|
2453
|
-
R package [`CCPlotR`](https://github.com/Sarah145/CCPlotR) is used to visualize
|
|
2454
|
-
the results.
|
|
2455
|
-
|
|
2456
2495
|
Input:
|
|
2457
2496
|
cccfile: The output file from `CellCellCommunication`
|
|
2458
|
-
or a tab-separated file with the following columns: `source`, `target`,
|
|
2459
|
-
`ligand`, `receptor`, and `score`.
|
|
2460
|
-
If so, `in.expfile` can be provided where `exp_df` is needed.
|
|
2461
|
-
expfile: The expression file with the expression of ligands and receptors.
|
|
2462
|
-
Columns include: `cell_type`, `gene` and `mean_exp`.
|
|
2463
2497
|
|
|
2464
2498
|
Output:
|
|
2465
2499
|
outdir: The output directory for the plots.
|
|
2466
2500
|
|
|
2467
2501
|
Envs:
|
|
2468
|
-
score_col: The column name in the input file that contains the score, if
|
|
2469
|
-
the input file is from `CellCellCommunication`.
|
|
2470
|
-
Two alias columns are added in the result file of `CellCellCommunication`,
|
|
2471
|
-
`mag_score` and `spec_score`, which are the magnitude and specificity
|
|
2472
|
-
scores.
|
|
2473
2502
|
subset: An expression to pass to `dplyr::filter()` to subset the ccc data.
|
|
2503
|
+
magnitude: The column name in the data to use as the magnitude of the
|
|
2504
|
+
communication. By default, the second last column will be used.
|
|
2505
|
+
See `li.mt.show_methods()` for the available methods in LIANA. or
|
|
2506
|
+
<https://liana-py.readthedocs.io/en/latest/notebooks/basic_usage.html#Tileplot>
|
|
2507
|
+
specificity: The column name in the data to use as the specificity of the communication.
|
|
2508
|
+
By default, the last column will be used. If the method doesn't have a specificity, set it to None.
|
|
2509
|
+
devpars (ns): The parameters for the plot.
|
|
2510
|
+
- res (type=int): The resolution of the plot
|
|
2511
|
+
- height (type=int): The height of the plot
|
|
2512
|
+
- width (type=int): The width of the plot
|
|
2513
|
+
more_formats (type=list): The additional formats to save the plots.
|
|
2514
|
+
descr: The description of the plot.
|
|
2474
2515
|
cases (type=json): The cases for the plots.
|
|
2475
2516
|
The keys are the names of the cases and the values are the arguments for
|
|
2476
|
-
the plots. The arguments include
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
`width`, and `height`.
|
|
2481
|
-
* section: The section name for the report to group the plots.
|
|
2482
|
-
* <other>: Other arguments for `cc_<kind>` function in `CCPlotR`.
|
|
2483
|
-
See the documentation for more details.
|
|
2484
|
-
Or you can use `?CCPlotR::cc_<kind>` in R.
|
|
2485
|
-
"""
|
|
2517
|
+
the plots. The arguments include the ones inherited from `envs`.
|
|
2518
|
+
<more>: Other arguments passed to
|
|
2519
|
+
[scplotter::CCCPlot](https://pwwang.github.io/scplotter/reference/CCCPlot.html)
|
|
2520
|
+
""" # noqa: E501
|
|
2486
2521
|
|
|
2487
|
-
input = "cccfile:file
|
|
2488
|
-
output = "outdir:dir:{{in.cccfile | stem}}
|
|
2522
|
+
input = "cccfile:file"
|
|
2523
|
+
output = "outdir:dir:{{in.cccfile | stem}}_plots"
|
|
2489
2524
|
lang = config.lang.rscript
|
|
2490
2525
|
envs = {
|
|
2491
|
-
"score_col": "mag_score",
|
|
2492
2526
|
"subset": None,
|
|
2527
|
+
"magnitude": None,
|
|
2528
|
+
"specificity": None,
|
|
2529
|
+
"devpars": {"res": 100},
|
|
2530
|
+
"more_formats": [],
|
|
2531
|
+
"descr": "Cell-cell communication plot",
|
|
2493
2532
|
"cases": {},
|
|
2494
2533
|
}
|
|
2495
2534
|
script = "file://../scripts/scrna/CellCellCommunicationPlots.R"
|
|
2496
2535
|
plugin_opts = {
|
|
2497
|
-
"report": "file://../reports/
|
|
2536
|
+
"report": "file://../reports/common.svelte",
|
|
2498
2537
|
}
|
|
2499
2538
|
|
|
2500
2539
|
|
|
2501
2540
|
class ScVelo(Proc):
|
|
2502
2541
|
"""Velocity analysis for single-cell RNA-seq data
|
|
2503
2542
|
|
|
2504
|
-
This process is implemented based on the Python package `scvelo
|
|
2543
|
+
This process is implemented based on the Python package `scvelo` (v0.3.3).
|
|
2544
|
+
Note that it doesn't work with `numpy>=2`.
|
|
2505
2545
|
|
|
2506
2546
|
Input:
|
|
2507
2547
|
sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
|
|
@@ -2509,18 +2549,20 @@ class ScVelo(Proc):
|
|
|
2509
2549
|
Output:
|
|
2510
2550
|
outfile: The output object with the velocity embeddings and information.
|
|
2511
2551
|
In either RDS, h5seurat or h5ad format, depending on the `envs.outtype`.
|
|
2512
|
-
|
|
2552
|
+
There will be also plots generated in the output directory
|
|
2553
|
+
(parent directory of `outfile`).
|
|
2554
|
+
Note that these plots will not be used in the report, but can be used as
|
|
2555
|
+
supplementary information for the velocity analysis.
|
|
2556
|
+
To visualize the velocity embeddings, you can use the `SeuratClusterStats`
|
|
2557
|
+
process with `v_reduction` provided to one of the `envs.dimplots`.
|
|
2513
2558
|
|
|
2514
2559
|
Envs:
|
|
2515
2560
|
ncores (type=int): Number of cores to use.
|
|
2516
2561
|
group_by: The column name in metadata to group the cells.
|
|
2517
2562
|
Typically, this column should be the cluster id.
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
PCA will be performed.
|
|
2522
|
-
modes (type=auto): The modes to use for the analysis.
|
|
2523
|
-
A list or a string with comma separated values.
|
|
2563
|
+
mode (type=list): The mode to use for the velocity analysis.
|
|
2564
|
+
It should be a subset of `['deterministic', 'stochastic', 'dynamical']`,
|
|
2565
|
+
meaning that we can perform the velocity analysis in multiple modes.
|
|
2524
2566
|
fitting_by (choice): The mode to use for fitting the velocities.
|
|
2525
2567
|
- stochastic: Stochastic mode
|
|
2526
2568
|
- deterministic: Deterministic mode
|
|
@@ -2528,16 +2570,6 @@ class ScVelo(Proc):
|
|
|
2528
2570
|
(both unspliced and spliced) required for a gene.
|
|
2529
2571
|
n_neighbors (type=int): The number of neighbors to use for the velocity graph.
|
|
2530
2572
|
n_pcs (type=int): The number of PCs to use for the velocity graph.
|
|
2531
|
-
stream_smooth (type=float): Multiplication factor for scale in Gaussian kernel
|
|
2532
|
-
around grid point.
|
|
2533
|
-
stream_density (type=float): Controls the closeness of streamlines.
|
|
2534
|
-
When density = 2.0, the domain is divided into a 60x60 grid, whereas
|
|
2535
|
-
density linearly scales this grid. Each cell in the grid can have,
|
|
2536
|
-
at most, one traversing streamline. For different densities in each
|
|
2537
|
-
direction, use a tuple (density_x, density_y).
|
|
2538
|
-
arrow_size (type=float): Scaling factor for the arrow size.
|
|
2539
|
-
arrow_length (type=float): Length of arrows.
|
|
2540
|
-
arrow_density (type=float): Density of arrows.
|
|
2541
2573
|
denoise (flag): Whether to denoise the data.
|
|
2542
2574
|
denoise_topn (type=int): Number of genes with highest likelihood selected to
|
|
2543
2575
|
infer velocity directions.
|
|
@@ -2546,57 +2578,60 @@ class ScVelo(Proc):
|
|
|
2546
2578
|
infer velocity directions.
|
|
2547
2579
|
calculate_velocity_genes (flag): Whether to calculate the velocity genes.
|
|
2548
2580
|
top_n (type=int): The number of top features to plot.
|
|
2549
|
-
res (type=int): The resolution of the plots.
|
|
2550
2581
|
rscript: The path to the Rscript executable used to convert RDS file to AnnData.
|
|
2551
2582
|
if `in.sobjfile` is an RDS file, it will be converted to AnnData file
|
|
2552
2583
|
(h5ad). You need `Seurat`, `SeuratDisk` and `digest` installed.
|
|
2553
2584
|
outtype (choice): The output file type.
|
|
2554
|
-
- input
|
|
2555
|
-
-
|
|
2556
|
-
-
|
|
2557
|
-
-
|
|
2585
|
+
- <input>: The same as the input file type.
|
|
2586
|
+
- h5seurat: h5seurat file
|
|
2587
|
+
- h5ad: h5ad file
|
|
2588
|
+
- qs: qs/qs2 file
|
|
2589
|
+
- qs2: qs2 file
|
|
2590
|
+
- rds: RDS file
|
|
2558
2591
|
"""
|
|
2559
2592
|
|
|
2560
2593
|
input = "sobjfile:file"
|
|
2561
|
-
output =
|
|
2594
|
+
output = (
|
|
2595
|
+
"outfile:file:{{in.sobjfile | stem}}-scvelo."
|
|
2596
|
+
"{{ext0(in.sobjfile) if envs.outtype == '<input>' else envs.outtype}}"
|
|
2597
|
+
)
|
|
2562
2598
|
lang = config.lang.python
|
|
2563
2599
|
envs = {
|
|
2564
2600
|
"ncores": config.misc.ncores,
|
|
2565
2601
|
"group_by": "seurat_clusters",
|
|
2566
|
-
"
|
|
2567
|
-
"modes": ["stochastic", "deterministic", "dynamical"],
|
|
2602
|
+
"mode": ["deterministic", "stochastic", "dynamical"],
|
|
2568
2603
|
"fitting_by": "stochastic",
|
|
2569
2604
|
"min_shared_counts": 30,
|
|
2570
2605
|
"n_neighbors": 30,
|
|
2571
2606
|
"n_pcs": 30,
|
|
2572
|
-
"stream_smooth": 0.5,
|
|
2573
|
-
"stream_density": 2.0,
|
|
2574
|
-
"arrow_size": 5.0,
|
|
2575
|
-
"arrow_length": 5.0,
|
|
2576
|
-
"arrow_density": 0.5,
|
|
2577
2607
|
"denoise": False,
|
|
2578
2608
|
"denoise_topn": 3,
|
|
2579
2609
|
"kinetics": False,
|
|
2580
2610
|
"kinetics_topn": 100,
|
|
2581
2611
|
"calculate_velocity_genes": False,
|
|
2582
2612
|
"top_n": 6,
|
|
2583
|
-
"res": 100,
|
|
2584
2613
|
"rscript": config.lang.rscript,
|
|
2585
|
-
"outtype": "input",
|
|
2614
|
+
"outtype": "<input>",
|
|
2586
2615
|
}
|
|
2587
2616
|
script = "file://../scripts/scrna/ScVelo.py"
|
|
2588
2617
|
|
|
2589
2618
|
|
|
2590
|
-
class
|
|
2591
|
-
"""Trajectory inference using
|
|
2619
|
+
class Slingshot(Proc):
|
|
2620
|
+
"""Trajectory inference using Slingshot
|
|
2592
2621
|
|
|
2593
2622
|
This process is implemented based on the R package `slingshot`.
|
|
2594
2623
|
|
|
2595
2624
|
Input:
|
|
2596
|
-
sobjfile: The seurat object file in RDS.
|
|
2625
|
+
sobjfile: The seurat object file in RDS or qs format.
|
|
2597
2626
|
|
|
2598
2627
|
Output:
|
|
2599
2628
|
outfile: The output object with the trajectory information.
|
|
2629
|
+
The lineages are stored in the metadata of the seurat object at
|
|
2630
|
+
columns `LineageX`, where X is the lineage number. The `BranchID`
|
|
2631
|
+
column contains the branch id for each cell.
|
|
2632
|
+
One can use
|
|
2633
|
+
`scplotter::CellDimPlot(object, lineages = c("Lineage1", "Lineage2", ...))`
|
|
2634
|
+
to visualize the trajectories.
|
|
2600
2635
|
|
|
2601
2636
|
Envs:
|
|
2602
2637
|
group_by: The column name in metadata to group the cells.
|
|
@@ -2605,8 +2640,8 @@ class SlingShot(Proc):
|
|
|
2605
2640
|
dims (type=auto): The dimensions to use for the analysis.
|
|
2606
2641
|
A list or a string with comma separated values.
|
|
2607
2642
|
Consecutive numbers can be specified with a colon (`:`) or a dash (`-`).
|
|
2608
|
-
start: The starting group for the
|
|
2609
|
-
end: The ending group for the
|
|
2643
|
+
start: The starting group for the Slingshot analysis.
|
|
2644
|
+
end: The ending group for the Slingshot analysis.
|
|
2610
2645
|
prefix: The prefix to add to the column names of the resulting pseudotime variable.
|
|
2611
2646
|
reverse (flag): Logical value indicating whether to reverse the pseudotime variable.
|
|
2612
2647
|
align_start (flag): Whether to align the starting pseudotime values at the maximum pseudotime.
|
|
@@ -2614,7 +2649,7 @@ class SlingShot(Proc):
|
|
|
2614
2649
|
""" # noqa: E501
|
|
2615
2650
|
|
|
2616
2651
|
input = "sobjfile:file"
|
|
2617
|
-
output = "outfile:file:{{in.sobjfile | stem}}.
|
|
2652
|
+
output = "outfile:file:{{in.sobjfile | stem}}.qs"
|
|
2618
2653
|
lang = config.lang.rscript
|
|
2619
2654
|
envs = {
|
|
2620
2655
|
"group_by": "seurat_clusters",
|
|
@@ -2627,7 +2662,7 @@ class SlingShot(Proc):
|
|
|
2627
2662
|
"align_start": False,
|
|
2628
2663
|
"seed": 8525,
|
|
2629
2664
|
}
|
|
2630
|
-
script = "file://../scripts/scrna/
|
|
2665
|
+
script = "file://../scripts/scrna/Slingshot.R"
|
|
2631
2666
|
|
|
2632
2667
|
|
|
2633
2668
|
class LoomTo10X(Proc):
|
|
@@ -2641,6 +2676,7 @@ class LoomTo10X(Proc):
|
|
|
2641
2676
|
including the `matrix.mtx.gz`, `barcodes.tsv.gz` and `features.tsv.gz`
|
|
2642
2677
|
files.
|
|
2643
2678
|
"""
|
|
2679
|
+
|
|
2644
2680
|
input = "loomfile:file"
|
|
2645
2681
|
output = "outdir:dir:{{in.loomfile | stem}}.10X"
|
|
2646
2682
|
lang = config.lang.rscript
|