biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +6 -0
- biopipen/core/filters.py +77 -26
- biopipen/core/testing.py +6 -1
- biopipen/ns/bam.py +39 -0
- biopipen/ns/cellranger.py +5 -0
- biopipen/ns/cellranger_pipeline.py +2 -2
- biopipen/ns/cnvkit_pipeline.py +4 -1
- biopipen/ns/delim.py +33 -27
- biopipen/ns/protein.py +99 -0
- biopipen/ns/scrna.py +411 -250
- biopipen/ns/snp.py +16 -3
- biopipen/ns/tcr.py +125 -1
- biopipen/ns/vcf.py +34 -0
- biopipen/ns/web.py +5 -1
- biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
- biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
- biopipen/reports/tcr/ClonalStats.svelte +15 -0
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +2 -2
- biopipen/scripts/bam/BamSampling.py +4 -4
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +3 -3
- biopipen/scripts/bam/CNVpytor.py +10 -10
- biopipen/scripts/bam/ControlFREEC.py +11 -11
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
- biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +20 -9
- biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
- biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
- biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/SampleInfo.R +85 -139
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +4 -4
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifScan.py +8 -8
- biopipen/scripts/scrna/CellCellCommunication.py +59 -22
- biopipen/scripts/scrna/CellsDistribution.R +31 -6
- biopipen/scripts/scrna/MarkersFinder.R +272 -602
- biopipen/scripts/scrna/MetaMarkers.R +16 -7
- biopipen/scripts/scrna/RadarPlots.R +75 -35
- biopipen/scripts/scrna/SCP-plot.R +15202 -0
- biopipen/scripts/scrna/ScVelo.py +0 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
- biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
- biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
- biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +138 -81
- biopipen/scripts/scrna/SlingShot.R +71 -0
- biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
- biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
- biopipen/scripts/snp/Plink2GTMat.py +26 -11
- biopipen/scripts/snp/PlinkFilter.py +7 -7
- biopipen/scripts/snp/PlinkFromVcf.py +8 -5
- biopipen/scripts/snp/PlinkSimulation.py +4 -4
- biopipen/scripts/snp/PlinkUpdateName.py +4 -4
- biopipen/scripts/stats/ChowTest.R +48 -22
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
- biopipen/scripts/tcr/ClonalStats.R +484 -0
- biopipen/scripts/tcr/CloneResidency.R +23 -5
- biopipen/scripts/tcr/Immunarch-basic.R +8 -1
- biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
- biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
- biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
- biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
- biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
- biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
- biopipen/scripts/tcr/ScRepLoading.R +127 -0
- biopipen/scripts/tcr/TCRClusterStats.R +24 -7
- biopipen/scripts/tcr/TCRDock.py +10 -6
- biopipen/scripts/tcr/TESSA.R +6 -1
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
- biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +4 -4
- biopipen/scripts/vcf/BcftoolsView.py +5 -5
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +12 -3
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +3 -3
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
- biopipen/scripts/web/gcloud_common.py +1 -1
- biopipen/utils/gsea.R +96 -42
- biopipen/utils/misc.R +205 -7
- biopipen/utils/misc.py +17 -8
- biopipen/utils/plot.R +53 -17
- biopipen/utils/reference.py +11 -11
- biopipen/utils/repr.R +146 -0
- biopipen/utils/vcf.py +1 -1
- {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
- {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
- {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
- biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
- biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
- {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
biopipen/ns/scrna.py
CHANGED
|
@@ -97,8 +97,8 @@ class SeuratPreparing(Proc):
|
|
|
97
97
|
|
|
98
98
|
Output:
|
|
99
99
|
rdsfile: The RDS file with the Seurat object with all samples integrated.
|
|
100
|
-
Note that the cell ids are
|
|
101
|
-
saved in `<job.outdir>/
|
|
100
|
+
Note that the cell ids are prefixied with sample names.
|
|
101
|
+
QC plots will be saved in `<job.outdir>/plots`.
|
|
102
102
|
|
|
103
103
|
Envs:
|
|
104
104
|
ncores (type=int): Number of cores to use.
|
|
@@ -140,6 +140,19 @@ class SeuratPreparing(Proc):
|
|
|
140
140
|
will keep genes that are expressed in at least 3 cells.
|
|
141
141
|
///
|
|
142
142
|
|
|
143
|
+
qc_plots (type=json): The plots for QC metrics.
|
|
144
|
+
It should be a json (or python dict) with the keys as the names of the plots and
|
|
145
|
+
the values also as dicts with the following keys:
|
|
146
|
+
* kind: The kind of QC. Either `gene` or `cell` (default).
|
|
147
|
+
* devpars: The device parameters for the plot. A dict with `res`, `height`, and `width`.
|
|
148
|
+
* more_formats: The formats to save the plots other than `png`.
|
|
149
|
+
* save_code: Whether to save the code to reproduce the plot.
|
|
150
|
+
* other arguments passed to
|
|
151
|
+
[`biopipen.utils::VizSeuratCellQC`](https://pwwang.github.io/biopipen.utils.R/reference/VizSeuratCellQC.html)
|
|
152
|
+
when `kind` is `cell` or
|
|
153
|
+
[`biopipen.utils::VizSeuratGeneQC`](https://pwwang.github.io/biopipen.utils.R/reference/VizSeuratGeneQC.html)
|
|
154
|
+
when `kind` is `gene`.
|
|
155
|
+
|
|
143
156
|
use_sct (flag): Whether use SCTransform routine to integrate samples or not.
|
|
144
157
|
Before the following procedures, the `RNA` layer will be split by samples.
|
|
145
158
|
|
|
@@ -244,6 +257,7 @@ class SeuratPreparing(Proc):
|
|
|
244
257
|
r-bracer:
|
|
245
258
|
- check: {{proc.lang}} <(echo "library(bracer)")
|
|
246
259
|
""" # noqa: E501
|
|
260
|
+
|
|
247
261
|
input = "metafile:file"
|
|
248
262
|
output = "rdsfile:file:{{in.metafile | stem}}.seurat.RDS"
|
|
249
263
|
lang = config.lang.rscript
|
|
@@ -252,6 +266,28 @@ class SeuratPreparing(Proc):
|
|
|
252
266
|
"cell_qc": None, # "nFeature_RNA > 200 & percent.mt < 5",
|
|
253
267
|
"cell_qc_per_sample": False,
|
|
254
268
|
"gene_qc": {"min_cells": 0, "excludes": []},
|
|
269
|
+
"qc_plots": {
|
|
270
|
+
"Violin Plots of QC Metrics": {
|
|
271
|
+
"kind": "cell",
|
|
272
|
+
"plot_type": "violin",
|
|
273
|
+
"devpars": {"res": 100, "height": 600, "width": 1200},
|
|
274
|
+
},
|
|
275
|
+
"Scatter Plots of QC Metrics": {
|
|
276
|
+
"kind": "cell",
|
|
277
|
+
"plot_type": "scatter",
|
|
278
|
+
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
279
|
+
},
|
|
280
|
+
"Ridge Plots of QC Metrics": {
|
|
281
|
+
"kind": "cell",
|
|
282
|
+
"plot_type": "ridge",
|
|
283
|
+
"devpars": {"res": 100, "height": 800, "width": 1200},
|
|
284
|
+
},
|
|
285
|
+
# "Number of Expressing Cells for Excluded Genes (10)": {
|
|
286
|
+
# "kind": "gene",
|
|
287
|
+
# "features": 10,
|
|
288
|
+
# "devpars": {"res": 100, "height": 1200, "width": 1200}
|
|
289
|
+
# },
|
|
290
|
+
},
|
|
255
291
|
"use_sct": False,
|
|
256
292
|
"no_integration": False,
|
|
257
293
|
"NormalizeData": {},
|
|
@@ -338,6 +374,7 @@ class SeuratClustering(Proc):
|
|
|
338
374
|
r-dplyr:
|
|
339
375
|
- check: {{proc.lang}} <(echo "library(dplyr)")
|
|
340
376
|
""" # noqa: E501
|
|
377
|
+
|
|
341
378
|
input = "srtobj:file"
|
|
342
379
|
output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
|
|
343
380
|
lang = config.lang.rscript
|
|
@@ -413,6 +450,7 @@ class SeuratSubClustering(Proc):
|
|
|
413
450
|
Keys are the names of the cases and values are the dicts inherited from `envs` except `mutaters` and `cache`.
|
|
414
451
|
If empty, a case with name `subcluster` will be created with default parameters.
|
|
415
452
|
""" # noqa: E501
|
|
453
|
+
|
|
416
454
|
input = "srtobj:file"
|
|
417
455
|
output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
|
|
418
456
|
lang = config.lang.rscript
|
|
@@ -487,7 +525,10 @@ class SeuratClusterStats(Proc):
|
|
|
487
525
|
srtobj: The seurat object loaded by `SeuratClustering`
|
|
488
526
|
|
|
489
527
|
Output:
|
|
490
|
-
outdir: The output directory
|
|
528
|
+
outdir: The output directory.
|
|
529
|
+
Different types of plots will be saved in different subdirectories.
|
|
530
|
+
For example, `clustree` plots will be saved in `clustrees` subdirectory.
|
|
531
|
+
For each case in `envs.clustrees`, both the png and pdf files will be saved.
|
|
491
532
|
|
|
492
533
|
Envs:
|
|
493
534
|
mutaters (type=json): The mutaters to mutate the metadata to subset the cells.
|
|
@@ -497,101 +538,41 @@ class SeuratClusterStats(Proc):
|
|
|
497
538
|
- res (type=int): The resolution of the plots.
|
|
498
539
|
- height (type=int): The height of the plots.
|
|
499
540
|
- width (type=int): The width of the plots.
|
|
500
|
-
-
|
|
541
|
+
- more_formats (list): The formats to save the plots other than `png`.
|
|
542
|
+
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
543
|
+
- prefix (type=auto): string indicating columns containing clustering information.
|
|
501
544
|
The trailing dot is not necessary and will be added automatically.
|
|
502
|
-
When `
|
|
545
|
+
When `TRUE`, clustrees will be plotted when there is `FindClusters` or
|
|
503
546
|
`FindClusters.*` in the `obj@commands`.
|
|
504
547
|
The latter is generated by `SeuratSubClustering`.
|
|
505
|
-
This will be ignored when `envs.clustrees` is specified
|
|
506
|
-
|
|
507
|
-
|
|
548
|
+
This will be ignored when `envs.clustrees` is specified
|
|
549
|
+
(the prefix of each case must be specified separately).
|
|
550
|
+
- <more>: Other arguments passed to `scplotter::ClustreePlot`.
|
|
551
|
+
See <https://pwwang.github.io/scplotter/reference/ClustreePlot.html>
|
|
508
552
|
clustrees (type=json): The cases for clustree plots.
|
|
509
553
|
Keys are the names of the plots and values are the dicts inherited from `env.clustrees_defaults` except `prefix`.
|
|
510
554
|
There is no default case for `clustrees`.
|
|
511
|
-
hists_defaults (ns): The default parameters for histograms.
|
|
512
|
-
This will plot histograms for the number of cells along `x`.
|
|
513
|
-
For example, you can plot the number of cells along cell activity score.
|
|
514
|
-
- x: The column name in metadata to plot as the x-axis.
|
|
515
|
-
The NA values will be removed.
|
|
516
|
-
It could be either numeric or factor/character.
|
|
517
|
-
- x_order (list): The order of the x-axis, only works for factor/character `x`.
|
|
518
|
-
You can also use it to subset `x` (showing only a subset values of `x`).
|
|
519
|
-
- cells_by: A column name in metadata to group the cells.
|
|
520
|
-
The NA values will be removed. It should be a factor/character.
|
|
521
|
-
if not specified, all cells will be used.
|
|
522
|
-
- cells_order (list): The order of the cell groups for the plots.
|
|
523
|
-
It should be a list of strings. You can also use `cells_orderby` and `cells_n`
|
|
524
|
-
to determine the order.
|
|
525
|
-
- cells_orderby: An expression passed to `dplyr::arrange()` to order the cell groups.
|
|
526
|
-
- cells_n: The number of cell groups to show.
|
|
527
|
-
Ignored if `cells_order` is specified.
|
|
528
|
-
- ncol (type=int): The number of columns for the plots, split by `cells_by`.
|
|
529
|
-
- subset: An expression to subset the cells, will be passed to `dplyr::filter()`.
|
|
530
|
-
- each: Whether to plot each group separately.
|
|
531
|
-
- bins: The number of bins to use, only works for numeric `x`.
|
|
532
|
-
- plus (list): The extra elements to add to the `ggplot` object.
|
|
533
|
-
- devpars (ns): The device parameters for the plots.
|
|
534
|
-
- res (type=int): The resolution of the plots.
|
|
535
|
-
- height (type=int): The height of the plots.
|
|
536
|
-
- width (type=int): The width of the plots.
|
|
537
|
-
hists (type=json): The cases for histograms.
|
|
538
|
-
Keys are the names of the plots and values are the dicts inherited from `env.hists_defaults`.
|
|
539
|
-
There is no default case.
|
|
540
555
|
stats_defaults (ns): The default parameters for `stats`.
|
|
541
|
-
This is to do some basic statistics on the clusters. For more comprehensive analysis,
|
|
542
|
-
see
|
|
556
|
+
This is to do some basic statistics on the clusters/cells. For more comprehensive analysis,
|
|
557
|
+
see <https://pwwang.github.io/scplotter/reference/CellStatPlot.html>.
|
|
543
558
|
The parameters from the cases can overwrite the default parameters.
|
|
544
|
-
-
|
|
545
|
-
|
|
546
|
-
The total fraction of the cells of idents in each group will be 1.
|
|
547
|
-
When `group-by` is not specified, it will be the same as `all`.
|
|
548
|
-
- ident: calculate the fraction in each ident.
|
|
549
|
-
The total fraction of the cells of groups in each ident will be 1.
|
|
550
|
-
Only works when `group-by` is specified.
|
|
551
|
-
- cluster: alias of `ident`.
|
|
552
|
-
- all: calculate the fraction against all cells.
|
|
553
|
-
- none: do not calculate the fraction, use the number of cells instead.
|
|
554
|
-
- pie (flag): Also output a pie chart?
|
|
555
|
-
- circos (flag): Also output a circos plot?
|
|
556
|
-
- table (flag): Whether to output a table (in tab-delimited format) and in the report.
|
|
557
|
-
- transpose (flag): Whether to transpose the cluster and group, that is,
|
|
558
|
-
using group as the x-axis and cluster to fill the plot.
|
|
559
|
-
For circos plot, when transposed, the arrows will be drawn from the idents (by `ident`) to the
|
|
560
|
-
the groups (by `group-by`).
|
|
561
|
-
Only works when `group-by` is specified.
|
|
562
|
-
- position (choice): The position of the bars. Does not work for pie and circos plots.
|
|
563
|
-
- stack: Use `position_stack()`.
|
|
564
|
-
- fill: Use `position_fill()`.
|
|
565
|
-
- dodge: Use `position_dodge()`.
|
|
566
|
-
- auto: Use `stack` when there are more than 5 groups, otherwise use `dodge`.
|
|
567
|
-
- ident: The column name in metadata to use as the identity.
|
|
568
|
-
- group-by: The column name in metadata to group the cells.
|
|
569
|
-
Does NOT support for pie charts.
|
|
570
|
-
- split-by: The column name in metadata to split the cells into different plots.
|
|
571
|
-
Does NOT support for circos plots.
|
|
572
|
-
- subset: An expression to subset the cells, will be passed to
|
|
573
|
-
`dplyr::filter()` on metadata.
|
|
574
|
-
- circos_labels_rot (flag): Whether to rotate the labels in the circos plot.
|
|
575
|
-
In case the labels are too long.
|
|
576
|
-
- circos_devpars (ns): The device parameters for the circos plots.
|
|
577
|
-
- res (type=int): The resolution of the plots.
|
|
578
|
-
- height (type=int): The height of the plots.
|
|
579
|
-
- width (type=int): The width of the plots.
|
|
580
|
-
- pie_devpars (ns): The device parameters for the pie charts.
|
|
581
|
-
- res (type=int): The resolution of the plots.
|
|
582
|
-
- height (type=int): The height of the plots.
|
|
583
|
-
- width (type=int): The width of the plots.
|
|
584
|
-
- devpars (ns): The device parameters for the plots.
|
|
559
|
+
- subset: An expression to subset the cells, will be passed to `tidyrseurat::filter()`.
|
|
560
|
+
- devpars (ns): The device parameters for the clustree plot.
|
|
585
561
|
- res (type=int): The resolution of the plots.
|
|
586
562
|
- height (type=int): The height of the plots.
|
|
587
563
|
- width (type=int): The width of the plots.
|
|
564
|
+
- more_formats (list): The formats to save the plots other than `png`.
|
|
565
|
+
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
566
|
+
- save_data (flag): Whether to save the data used to generate the plot.
|
|
567
|
+
- <more>: Other arguments passed to `scplotter::CellStatPlot`.
|
|
568
|
+
See <https://pwwang.github.io/scplotter/reference/CellStatPlot.html>.
|
|
588
569
|
stats (type=json): The number/fraction of cells to plot.
|
|
589
570
|
Keys are the names of the plots and values are the dicts inherited from `env.stats_defaults`.
|
|
590
571
|
Here are some examples -
|
|
591
572
|
>>> {
|
|
592
573
|
>>> "nCells_All": {},
|
|
593
|
-
>>> "nCells_Sample": {"
|
|
594
|
-
>>> "fracCells_Sample": {"
|
|
574
|
+
>>> "nCells_Sample": {"group_by": "Sample"},
|
|
575
|
+
>>> "fracCells_Sample": {"scale_y": True, "group_by": "Sample", plot_type = "pie"},
|
|
595
576
|
>>> }
|
|
596
577
|
ngenes_defaults (ns): The default parameters for `ngenes`.
|
|
597
578
|
The default parameters to plot the number of genes expressed in each cell.
|
|
@@ -610,61 +591,30 @@ class SeuratClusterStats(Proc):
|
|
|
610
591
|
- features: The features to plot.
|
|
611
592
|
It can be either a string with comma separated features, a list of features, a file path with `file://` prefix with features
|
|
612
593
|
(one per line), or an integer to use the top N features from `VariantFeatures(srtobj)`.
|
|
613
|
-
-
|
|
614
|
-
|
|
615
|
-
- cluster_orderby (type=auto): The order of the clusters to show on the plot.
|
|
616
|
-
An expression passed to `dplyr::summarise()` on the grouped data frame (by `seurat_clusters`).
|
|
617
|
-
The summary stat will be passed to `dplyr::arrange()` to order the clusters. It's applied on the whole meta.data before grouping and subsetting.
|
|
594
|
+
- order_by (type=auto): The order of the clusters to show on the plot.
|
|
595
|
+
An expression passed to `dplyr::arrange()` on the grouped meta data frame (by `ident`).
|
|
618
596
|
For example, you can order the clusters by the activation score of
|
|
619
597
|
the cluster: `desc(mean(ActivationScore, na.rm = TRUE))`, suppose you have a column
|
|
620
598
|
`ActivationScore` in the metadata.
|
|
621
|
-
You may also specify the literal order of the clusters by a list of strings.
|
|
599
|
+
You may also specify the literal order of the clusters by a list of strings (at least two).
|
|
622
600
|
- subset: An expression to subset the cells, will be passed to `tidyrseurat::filter()`.
|
|
623
601
|
- devpars (ns): The device parameters for the plots. Does not work for `table`.
|
|
624
602
|
- res (type=int): The resolution of the plots.
|
|
625
603
|
- height (type=int): The height of the plots.
|
|
626
604
|
- width (type=int): The width of the plots.
|
|
627
|
-
-
|
|
628
|
-
-
|
|
629
|
-
|
|
630
|
-
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
- layer: The layer to use.
|
|
634
|
-
- reduction: The reduction to use. Only works for `feature`.
|
|
635
|
-
- section: The section to put the plot in the report.
|
|
636
|
-
If not specified, the case title will be used.
|
|
637
|
-
- ncol (type=int): The number of columns for the plots.
|
|
638
|
-
- kind (choice): The kind of the plot or table.
|
|
639
|
-
- ridge: Use `Seurat::RidgePlot`.
|
|
640
|
-
- ridgeplot: Same as `ridge`.
|
|
641
|
-
- vln: Use `Seurat::VlnPlot`.
|
|
642
|
-
- vlnplot: Same as `vln`.
|
|
643
|
-
- violin: Same as `vln`.
|
|
644
|
-
- violinplot: Same as `vln`.
|
|
645
|
-
- feature: Use `Seurat::FeaturePlot`.
|
|
646
|
-
- featureplot: Same as `feature`.
|
|
647
|
-
- dot: Use `Seurat::DotPlot`.
|
|
648
|
-
- dotplot: Same as `dot`.
|
|
649
|
-
- bar: Bar plot on an aggregated feature.
|
|
650
|
-
The features must be a single feature, which will be either an existing feature or an expression
|
|
651
|
-
passed to `dplyr::summarise()` (grouped by `ident`) on the existing features to create a new feature.
|
|
652
|
-
- barplot: Same as `bar`.
|
|
653
|
-
- heatmap: Use `Seurat::DoHeatmap`.
|
|
654
|
-
- avgheatmap: Plot the average expression of the features in each cluster as a heatmap.
|
|
655
|
-
- table: The table for the features, only gene expressions are supported.
|
|
656
|
-
(supported keys: ident, subset, and features).
|
|
605
|
+
- descr: The description of the plot, showing in the report.
|
|
606
|
+
- more_formats (list): The formats to save the plots other than `png`.
|
|
607
|
+
- save_code (flag): Whether to save the code to reproduce the plot.
|
|
608
|
+
- save_data (flag): Whether to save the data used to generate the plot.
|
|
609
|
+
- <more>: Other arguments passed to `scplotter::FeatureStatPlot`.
|
|
610
|
+
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>
|
|
657
611
|
features (type=json): The plots for features, include gene expressions, and columns from metadata.
|
|
658
|
-
Keys are the titles of the cases and values are the dicts inherited from `env.features_defaults`.
|
|
659
|
-
each Seurat function used by `kind`. Note that for argument name with `.`, you should use `-` instead.
|
|
612
|
+
Keys are the titles of the cases and values are the dicts inherited from `env.features_defaults`.
|
|
660
613
|
dimplots_defaults (ns): The default parameters for `dimplots`.
|
|
661
|
-
-
|
|
614
|
+
- group_by: The identity to use.
|
|
662
615
|
If it is from subclustering (reduction `sub_umap_<ident>` exists), this reduction will be used if `reduction`
|
|
663
616
|
is set to `dim` or `auto`.
|
|
664
|
-
-
|
|
665
|
-
- na_group: The group name for NA values, use `None` to ignore NA values.
|
|
666
|
-
- split-by: The column name in metadata to split the cells into different plots.
|
|
667
|
-
- shape-by: The column name in metadata to use as the shape.
|
|
617
|
+
- split_by: The column name in metadata to split the cells into different plots.
|
|
668
618
|
- subset: An expression to subset the cells, will be passed to `tidyrseurat::filter()`.
|
|
669
619
|
- devpars (ns): The device parameters for the plots.
|
|
670
620
|
- res (type=int): The resolution of the plots.
|
|
@@ -678,64 +628,42 @@ class SeuratClusterStats(Proc):
|
|
|
678
628
|
- umap: Use `Seurat::UMAPPlot`.
|
|
679
629
|
- tsne: Use `Seurat::TSNEPlot`.
|
|
680
630
|
- pca: Use `Seurat::PCAPlot`.
|
|
681
|
-
- <more>: See <https://
|
|
631
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/CellDimPlot.html>
|
|
682
632
|
dimplots (type=json): The dimensional reduction plots.
|
|
683
633
|
Keys are the titles of the plots and values are the dicts inherited from `env.dimplots_defaults`. It can also have other parameters from
|
|
684
|
-
[`
|
|
634
|
+
[`scplotter::CellDimPlot`](https://pwwang.github.io/scplotter/reference/CellDimPlot.html).
|
|
685
635
|
|
|
686
636
|
Requires:
|
|
687
637
|
r-seurat:
|
|
688
638
|
- check: {{proc.lang}} -e "library(Seurat)"
|
|
689
639
|
""" # noqa: E501
|
|
640
|
+
|
|
690
641
|
input = "srtobj:file"
|
|
691
642
|
output = "outdir:dir:{{in.srtobj | stem}}.cluster_stats"
|
|
692
643
|
lang = config.lang.rscript
|
|
693
644
|
envs = {
|
|
694
645
|
"mutaters": {},
|
|
695
646
|
"clustrees_defaults": {
|
|
696
|
-
"devpars": {"res": 100
|
|
697
|
-
"
|
|
647
|
+
"devpars": {"res": 100},
|
|
648
|
+
"more_formats": [],
|
|
649
|
+
"save_code": False,
|
|
650
|
+
"prefix": True,
|
|
698
651
|
},
|
|
699
652
|
"clustrees": {},
|
|
700
|
-
"hists_defaults": {
|
|
701
|
-
"x": None,
|
|
702
|
-
"x_order": [],
|
|
703
|
-
"cells_by": None,
|
|
704
|
-
"cells_order": [],
|
|
705
|
-
"cells_orderby": None,
|
|
706
|
-
"cells_n": 10,
|
|
707
|
-
"subset": None,
|
|
708
|
-
"ncol": 2,
|
|
709
|
-
"each": None,
|
|
710
|
-
"bins": 30,
|
|
711
|
-
"plus": [],
|
|
712
|
-
"devpars": {"res": 100, "height": None, "width": None},
|
|
713
|
-
},
|
|
714
|
-
"hists": {},
|
|
715
653
|
"stats_defaults": {
|
|
716
|
-
"frac": "none",
|
|
717
|
-
"pie": False,
|
|
718
|
-
"circos": False,
|
|
719
|
-
"table": False,
|
|
720
|
-
"position": "auto",
|
|
721
|
-
"transpose": False,
|
|
722
|
-
"ident": "seurat_clusters",
|
|
723
|
-
"group-by": None,
|
|
724
|
-
"split-by": None,
|
|
725
654
|
"subset": None,
|
|
726
|
-
"
|
|
727
|
-
"
|
|
728
|
-
"
|
|
729
|
-
"
|
|
655
|
+
"devpars": {"res": 100},
|
|
656
|
+
"more_formats": [],
|
|
657
|
+
"save_code": False,
|
|
658
|
+
"save_data": False,
|
|
730
659
|
},
|
|
731
660
|
"stats": {
|
|
732
|
-
"Number of cells in each cluster": {
|
|
733
|
-
"
|
|
661
|
+
"Number of cells in each cluster (Bar Chart)": {
|
|
662
|
+
"plot_type": "bar",
|
|
734
663
|
},
|
|
735
|
-
"Number of cells in each cluster by Sample": {
|
|
736
|
-
"
|
|
737
|
-
"
|
|
738
|
-
"frac": "group",
|
|
664
|
+
"Number of cells in each cluster by Sample (Bar Chart)": {
|
|
665
|
+
"plot_type": "bar",
|
|
666
|
+
"group_by": "Sample",
|
|
739
667
|
},
|
|
740
668
|
},
|
|
741
669
|
"ngenes_defaults": {
|
|
@@ -750,43 +678,31 @@ class SeuratClusterStats(Proc):
|
|
|
750
678
|
},
|
|
751
679
|
"features_defaults": {
|
|
752
680
|
"features": None,
|
|
753
|
-
"
|
|
754
|
-
"cluster_orderby": None,
|
|
681
|
+
"order_by": None,
|
|
755
682
|
"subset": None,
|
|
756
683
|
"devpars": {"res": 100},
|
|
757
|
-
"
|
|
758
|
-
"
|
|
759
|
-
"
|
|
760
|
-
"
|
|
761
|
-
"section": None,
|
|
762
|
-
"layer": None,
|
|
763
|
-
"reduction": None,
|
|
764
|
-
"kind": None,
|
|
765
|
-
"ncol": 2,
|
|
684
|
+
"descr": None,
|
|
685
|
+
"more_formats": [],
|
|
686
|
+
"save_code": False,
|
|
687
|
+
"save_data": False,
|
|
766
688
|
},
|
|
767
689
|
"features": {},
|
|
768
690
|
"dimplots_defaults": {
|
|
769
|
-
"
|
|
770
|
-
"
|
|
771
|
-
"na_group": None,
|
|
772
|
-
"split-by": None,
|
|
773
|
-
"shape-by": None,
|
|
691
|
+
"group_by": "seurat_clusters",
|
|
692
|
+
"split_by": None,
|
|
774
693
|
"subset": None,
|
|
775
694
|
"reduction": "dim",
|
|
776
|
-
"devpars": {"res": 100
|
|
695
|
+
"devpars": {"res": 100},
|
|
777
696
|
},
|
|
778
697
|
"dimplots": {
|
|
779
698
|
"Dimensional reduction plot": {
|
|
780
699
|
"label": True,
|
|
781
|
-
"
|
|
782
|
-
"repel": True,
|
|
700
|
+
"label_insitu": True,
|
|
783
701
|
},
|
|
784
702
|
},
|
|
785
703
|
}
|
|
786
704
|
script = "file://../scripts/scrna/SeuratClusterStats.R"
|
|
787
|
-
plugin_opts = {
|
|
788
|
-
"report": "file://../reports/scrna/SeuratClusterStats.svelte"
|
|
789
|
-
}
|
|
705
|
+
plugin_opts = {"report": "file://../reports/scrna/SeuratClusterStats.svelte"}
|
|
790
706
|
|
|
791
707
|
|
|
792
708
|
class ModuleScoreCalculator(Proc):
|
|
@@ -806,7 +722,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
806
722
|
srtobj: The seurat object loaded by `SeuratClustering`
|
|
807
723
|
|
|
808
724
|
Output:
|
|
809
|
-
rdsfile: The seurat object with module scores
|
|
725
|
+
rdsfile: The seurat object with module scores added to the metadata.
|
|
810
726
|
|
|
811
727
|
Envs:
|
|
812
728
|
defaults (ns): The default parameters for `modules`.
|
|
@@ -863,6 +779,7 @@ class ModuleScoreCalculator(Proc):
|
|
|
863
779
|
This requires [`SingleCellExperiment`](https://bioconductor.org/packages/release/bioc/html/SingleCellExperiment.html)
|
|
864
780
|
and [`destiny`](https://bioconductor.org/packages/release/bioc/html/destiny.html) R packages.
|
|
865
781
|
""" # noqa: E501
|
|
782
|
+
|
|
866
783
|
input = "srtobj:file"
|
|
867
784
|
output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
|
|
868
785
|
lang = config.lang.rscript
|
|
@@ -922,7 +839,8 @@ class CellsDistribution(Proc):
|
|
|
922
839
|
srtobj: The seurat object in RDS format
|
|
923
840
|
|
|
924
841
|
Output:
|
|
925
|
-
outdir: The output directory
|
|
842
|
+
outdir: The output directory.
|
|
843
|
+
The results for each case will be saved in a subdirectory.
|
|
926
844
|
|
|
927
845
|
Envs:
|
|
928
846
|
mutaters (type=json): The mutaters to mutate the metadata
|
|
@@ -988,6 +906,7 @@ class CellsDistribution(Proc):
|
|
|
988
906
|
r-tidyr:
|
|
989
907
|
- check: {{proc.lang}} -e "library(tidyr)"
|
|
990
908
|
""" # noqa: E501
|
|
909
|
+
|
|
991
910
|
input = "srtobj:file"
|
|
992
911
|
output = "outdir:dir:{{in.srtobj | stem}}.cells_distribution"
|
|
993
912
|
lang = config.lang.rscript
|
|
@@ -1043,6 +962,7 @@ class SeuratMetadataMutater(Proc):
|
|
|
1043
962
|
r-dplyr:
|
|
1044
963
|
- check: {{proc.lang}} <(echo "library(dplyr)")
|
|
1045
964
|
""" # noqa: E501
|
|
965
|
+
|
|
1046
966
|
input = "srtobj:file, metafile:file"
|
|
1047
967
|
output = "rdsfile:file:{{in.srtobj | stem}}.RDS"
|
|
1048
968
|
lang = config.lang.rscript
|
|
@@ -1067,6 +987,7 @@ class DimPlots(Proc):
|
|
|
1067
987
|
Keys are the names and values are the arguments to
|
|
1068
988
|
`Seurat::Dimplots`
|
|
1069
989
|
"""
|
|
990
|
+
|
|
1070
991
|
input = "srtobj:file, configfile:file, name:var"
|
|
1071
992
|
output = "outdir:dir:{{in.srtobj | stem}}.dimplots"
|
|
1072
993
|
lang = config.lang.rscript
|
|
@@ -1080,7 +1001,6 @@ class DimPlots(Proc):
|
|
|
1080
1001
|
|
|
1081
1002
|
@format_placeholder(
|
|
1082
1003
|
mutate_helpers_clonesize=MUTATE_HELPERS_CLONESIZE_INDENTED,
|
|
1083
|
-
envs_section_each=ENVS_SECTION_EACH_INDENTED,
|
|
1084
1004
|
)
|
|
1085
1005
|
class MarkersFinder(Proc):
|
|
1086
1006
|
"""Find markers between different groups of cells
|
|
@@ -1099,7 +1019,7 @@ class MarkersFinder(Proc):
|
|
|
1099
1019
|
by `PrepSCTFindMarkers` if data is not normalized using `SCTransform`.
|
|
1100
1020
|
|
|
1101
1021
|
Output:
|
|
1102
|
-
outdir: The output directory for the markers
|
|
1022
|
+
outdir: The output directory for the markers and plots
|
|
1103
1023
|
|
|
1104
1024
|
Envs:
|
|
1105
1025
|
ncores (type=int): Number of cores to use for parallel computing for some `Seurat` procedures.
|
|
@@ -1131,73 +1051,104 @@ class MarkersFinder(Proc):
|
|
|
1131
1051
|
to select markers with adjusted p-value < 0.05 and absolute log2
|
|
1132
1052
|
fold change > 1.
|
|
1133
1053
|
assay: The assay to use.
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
genes will be labeled. Otherwise, specify the genes to label.
|
|
1138
|
-
It could be either a string with comma separated genes, or a list
|
|
1139
|
-
of genes.
|
|
1140
|
-
section: The section name for the report. It must not contain colon (`:`).
|
|
1141
|
-
Ignored when `each` is not specified and `ident-1` is specified.
|
|
1142
|
-
When neither `each` nor `ident-1` is specified, case name will be used
|
|
1143
|
-
as section name.
|
|
1144
|
-
If `each` is specified, the section name will be constructed from
|
|
1145
|
-
`each` and case name.
|
|
1146
|
-
%(envs_section_each)s
|
|
1054
|
+
error (flag): Error out if no/not enough markers are found or no pathways are enriched.
|
|
1055
|
+
If `False`, empty results will be returned.
|
|
1056
|
+
site: The site to use for the `enrichR` enrichment analysis.
|
|
1147
1057
|
subset: An expression to subset the cells for each case.
|
|
1058
|
+
cache (type=auto): Where to cache to `FindAllMarkers` results.
|
|
1059
|
+
If `True`, cache to `outdir` of the job. If `False`, don't cache.
|
|
1060
|
+
Otherwise, specify the directory to cache to.
|
|
1148
1061
|
rest (ns): Rest arguments for `Seurat::FindMarkers()`.
|
|
1149
1062
|
Use `-` to replace `.` in the argument name. For example,
|
|
1150
1063
|
use `min-pct` instead of `min.pct`.
|
|
1151
1064
|
This only works when `use_presto` is `False`.
|
|
1152
1065
|
- <more>: See <https://satijalab.org/seurat/reference/findmarkers>
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
-
|
|
1066
|
+
allmarker_plots_defaults (ns): Default options for the plots for all markers when `ident-1` is not specified.
|
|
1067
|
+
- plot_type: The type of the plot.
|
|
1068
|
+
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1069
|
+
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1070
|
+
- more_formats (list): The extra formats to save the plot in.
|
|
1071
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1159
1072
|
- devpars (ns): The device parameters for the plots.
|
|
1160
1073
|
- res (type=int): The resolution of the plots.
|
|
1161
1074
|
- height (type=int): The height of the plots.
|
|
1162
1075
|
- width (type=int): The width of the plots.
|
|
1163
|
-
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1076
|
+
- order_by: an expression to order the markers, passed by `dplyr::arrange()`.
|
|
1077
|
+
- genes: The number of top genes to show or an expression passed to `dplyr::filter()` to filter the genes.
|
|
1078
|
+
- <more>: Other arguments passed to [`scplotter::FeatureStatPlot()`](https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html).
|
|
1079
|
+
allmarker_plots (type=json): All marker plot cases.
|
|
1080
|
+
The keys are the names of the cases and the values are the dicts inherited from `allmarker_plots_defaults`.
|
|
1081
|
+
marker_plots_defaults (ns): Default options for the plots to generate for the markers.
|
|
1082
|
+
- plot_type: The type of the plot.
|
|
1083
|
+
See <https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html>.
|
|
1084
|
+
Available types are `violin`, `box`, `bar`, `ridge`, `dim`, `heatmap` and `dot`.
|
|
1085
|
+
There are two additional types available - `volcano_pct` and `volcano_log2fc`.
|
|
1086
|
+
- more_formats (list): The extra formats to save the plot in.
|
|
1087
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1088
|
+
- devpars (ns): The device parameters for the plots.
|
|
1089
|
+
- res (type=int): The resolution of the plots.
|
|
1090
|
+
- height (type=int): The height of the plots.
|
|
1091
|
+
- width (type=int): The width of the plots.
|
|
1092
|
+
- order_by: an expression to order the markers, passed by `dplyr::arrange()`.
|
|
1093
|
+
- genes: The number of top genes to show or an expression passed to `dplyr::filter()` to filter the genes.
|
|
1094
|
+
- <more>: Other arguments passed to [`scplotter::FeatureStatPlot()`](https://pwwang.github.io/scplotter/reference/FeatureStatPlot.html).
|
|
1095
|
+
If `plot_type` is `volcano_pct` or `volcano_log2fc`, they will be passed to
|
|
1096
|
+
[`scplotter::VolcanoPlot()`](https://pwwang.github.io/plotthis/reference/VolcanoPlot.html).
|
|
1097
|
+
marker_plots (type=json): Cases of the plots to generate for the markers.
|
|
1098
|
+
Plot cases. The keys are the names of the cases and the values are the dicts inherited from `marker_plots_defaults`.
|
|
1099
|
+
enrich_plots_defaults (ns): Default options for the plots to generate for the enrichment analysis.
|
|
1100
|
+
- plot_type: The type of the plot.
|
|
1101
|
+
See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.html>.
|
|
1102
|
+
Available types are `bar`, `dot`, `lollipop`, `network`, `enrichmap` and `wordcloud`.
|
|
1103
|
+
- more_formats (list): The extra formats to save the plot in.
|
|
1104
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1105
|
+
- devpars (ns): The device parameters for the plots.
|
|
1106
|
+
- res (type=int): The resolution of the plots.
|
|
1107
|
+
- height (type=int): The height of the plots.
|
|
1108
|
+
- width (type=int): The width of the plots.
|
|
1109
|
+
- <more>: See <https://pwwang.github.io/scplotter/reference/EnrichmentPlot.htmll>.
|
|
1110
|
+
enrich_plots (type=json): Cases of the plots to generate for the enrichment analysis.
|
|
1111
|
+
The keys are the names of the cases and the values are the dicts inherited from `enrich_plots_defaults`.
|
|
1112
|
+
cases (type=json): If you have multiple cases for marker discovery, you can specify them
|
|
1113
|
+
here. The keys are the names of the cases and the values are the above options. If some options are
|
|
1114
|
+
not specified, the default values specified above (under `envs`) will be used.
|
|
1115
|
+
If no cases are specified, the default case will be added with the default values under `envs` with the name `DEFAULT`.
|
|
1116
|
+
If you want to put some cases under the same section in the report, you can specify the section name in the case name
|
|
1117
|
+
as a prefix separated by `::`. For example, `section1::case1` and `section1::case2` will be put `case1` and `case2`
|
|
1118
|
+
under the section `section1`.
|
|
1119
|
+
overlaps_defaults (ns): Default options for investigating the overlapping of significant markers between different cases.
|
|
1120
|
+
- cases (list): The cases to do the overlapping analysis, including the prefix section name.
|
|
1121
|
+
The case must have `ident-1` specified. When `each` is specified, the case will be expanded.
|
|
1122
|
+
For example, `case1` with `each = "group"`, where `group` has `g1` and `g2`, will be expanded to
|
|
1123
|
+
`case1::g1` and `case1::g2`, or `case1::group - g1` and `case1::group - g2` if `prefix_each` is `True`.
|
|
1124
|
+
There must be at least 2 cases to do the overlapping analysis.
|
|
1125
|
+
- sigmarkers: The expression to filter the significant markers for each case.
|
|
1126
|
+
If not provided, `envs.sigmarkers` will be used.
|
|
1171
1127
|
- venn (ns): The options for the Venn diagram.
|
|
1172
|
-
|
|
1128
|
+
- enabled (flag): Whether to enable the Venn diagram.
|
|
1129
|
+
Default is "auto", which means enabled when there are no more than 5 cases.
|
|
1130
|
+
- more_formats (list): The extra formats to save the plot in.
|
|
1131
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1173
1132
|
- devpars (ns): The device parameters for the plots.
|
|
1174
1133
|
- res (type=int): The resolution of the plots.
|
|
1175
1134
|
- height (type=int): The height of the plots.
|
|
1176
1135
|
- width (type=int): The width of the plots.
|
|
1136
|
+
- <more>: More arguments pased to `plotthis::VennDiagram()`.
|
|
1137
|
+
https://pwwang.github.io/plotthis/reference/venndiagram1.html
|
|
1177
1138
|
- upset (ns): The options for the UpSet plot.
|
|
1139
|
+
- enabled (flag): Whether to enable the UpSet plot.
|
|
1140
|
+
- more_formats (list): The extra formats to save the plot in.
|
|
1141
|
+
- save_code (flag): Whether to save the code to generate the plot.
|
|
1178
1142
|
- devpars (ns): The device parameters for the plots.
|
|
1179
1143
|
- res (type=int): The resolution of the plots.
|
|
1180
1144
|
- height (type=int): The height of the plots.
|
|
1181
1145
|
- width (type=int): The width of the plots.
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
The keys of this option are the names of the sections. The values are
|
|
1187
|
-
a dict of options with keys `venn` and `upset`, values will
|
|
1188
|
-
be inherited from `envs.overlap_defaults`, recursively.
|
|
1189
|
-
You can set `envs.overlap.<section>.venn` to `False`/`None` to disable
|
|
1190
|
-
the Venn diagram for the section.
|
|
1191
|
-
It works when `each` is specified. In such a case, the sections will be
|
|
1192
|
-
the case names.
|
|
1193
|
-
This does not work for the cases where `ident-1` is not specified. In case
|
|
1194
|
-
you want to do such analysis for those cases, you should enumerate the
|
|
1195
|
-
idents in different cases and specify them here.
|
|
1196
|
-
cache (type=auto): Where to cache to `FindAllMarkers` results.
|
|
1197
|
-
If `True`, cache to `outdir` of the job. If `False`, don't cache.
|
|
1198
|
-
Otherwise, specify the directory to cache to.
|
|
1199
|
-
Only works when `use_presto` is `False` (presto works fast enough).
|
|
1146
|
+
- <more>: More arguments pased to `plotthis::UpsetPlot()`.
|
|
1147
|
+
https://pwwang.github.io/plotthis/reference/upsetplot1.html
|
|
1148
|
+
overlaps (type=json): Cases for investigating the overlapping of significant markers between different cases.
|
|
1149
|
+
The keys are the names of the cases and the values are the dicts inherited from `overlaps_defaults`.
|
|
1200
1150
|
""" # noqa: E501
|
|
1151
|
+
|
|
1201
1152
|
input = "srtobj:file"
|
|
1202
1153
|
output = "outdir:dir:{{in.srtobj | stem0}}.markers"
|
|
1203
1154
|
lang = config.lang.rscript
|
|
@@ -1210,21 +1161,62 @@ class MarkersFinder(Proc):
|
|
|
1210
1161
|
"each": None,
|
|
1211
1162
|
"prefix_each": True,
|
|
1212
1163
|
"prefix_group": True,
|
|
1213
|
-
"section": "DEFAULT",
|
|
1214
1164
|
"assay": None,
|
|
1215
1165
|
"subset": None,
|
|
1166
|
+
"error": True,
|
|
1167
|
+
"site": "Enrichr",
|
|
1216
1168
|
"rest": {},
|
|
1217
1169
|
"dbs": ["KEGG_2021_Human", "MSigDB_Hallmark_2020"],
|
|
1218
1170
|
"sigmarkers": "p_val_adj < 0.05",
|
|
1219
|
-
"
|
|
1220
|
-
"
|
|
1171
|
+
"cache": config.path.tmpdir,
|
|
1172
|
+
"allmarker_plots_defaults": {
|
|
1173
|
+
"plot_type": None,
|
|
1174
|
+
"more_formats": [],
|
|
1175
|
+
"save_code": False,
|
|
1176
|
+
"devpars": {"res": 100},
|
|
1177
|
+
"order_by": "desc(abs(avg_log2FC))",
|
|
1178
|
+
"genes": 10,
|
|
1179
|
+
},
|
|
1180
|
+
"allmarker_plots": {},
|
|
1181
|
+
"marker_plots_defaults": {
|
|
1182
|
+
"plot_type": None,
|
|
1183
|
+
"more_formats": [],
|
|
1184
|
+
"save_code": False,
|
|
1185
|
+
"devpars": {"res": 100},
|
|
1186
|
+
"order_by": "desc(abs(avg_log2FC))",
|
|
1187
|
+
"genes": 10,
|
|
1188
|
+
},
|
|
1189
|
+
"marker_plots": {
|
|
1190
|
+
"Volcano Plot (diff_pct)": {"plot_type": "volcano_pct"},
|
|
1191
|
+
"Volcano Plot (log2FC)": {"plot_type": "volcano_log2fc"},
|
|
1192
|
+
"Dot Plot": {"plot_type": "dot"},
|
|
1193
|
+
},
|
|
1194
|
+
"enrich_plots_defaults": {
|
|
1195
|
+
"more_formats": [],
|
|
1196
|
+
"save_code": False,
|
|
1197
|
+
"devpars": {"res": 100},
|
|
1198
|
+
},
|
|
1199
|
+
"enrich_plots": {
|
|
1200
|
+
"Bar Plot": {"plot_type": "bar", "ncol": 1, "top_term": 10},
|
|
1201
|
+
},
|
|
1221
1202
|
"cases": {},
|
|
1222
|
-
"
|
|
1223
|
-
"
|
|
1224
|
-
"
|
|
1203
|
+
"overlaps_defaults": {
|
|
1204
|
+
"cases": [],
|
|
1205
|
+
"sigmarkers": None,
|
|
1206
|
+
"venn": {
|
|
1207
|
+
"enabled": "auto",
|
|
1208
|
+
"more_formats": [],
|
|
1209
|
+
"save_code": False,
|
|
1210
|
+
"devpars": {"res": 100},
|
|
1211
|
+
},
|
|
1212
|
+
"upset": {
|
|
1213
|
+
"enabled": True,
|
|
1214
|
+
"more_formats": [],
|
|
1215
|
+
"save_code": False,
|
|
1216
|
+
"devpars": {"res": 100},
|
|
1217
|
+
},
|
|
1225
1218
|
},
|
|
1226
|
-
"
|
|
1227
|
-
"cache": config.path.tmpdir,
|
|
1219
|
+
"overlaps": {},
|
|
1228
1220
|
}
|
|
1229
1221
|
order = 5
|
|
1230
1222
|
script = "file://../scripts/scrna/MarkersFinder.R"
|
|
@@ -1274,6 +1266,7 @@ class TopExpressingGenes(Proc):
|
|
|
1274
1266
|
If no cases are specified, the default case will be added with
|
|
1275
1267
|
the default values under `envs` with the name `DEFAULT`.
|
|
1276
1268
|
"""
|
|
1269
|
+
|
|
1277
1270
|
input = "srtobj:file"
|
|
1278
1271
|
output = "outdir:dir:{{in.srtobj | stem}}.top_expressing_genes"
|
|
1279
1272
|
lang = config.lang.rscript
|
|
@@ -1358,6 +1351,7 @@ class ExprImputation(Proc):
|
|
|
1358
1351
|
- if: {{proc.envs.tool == "alra"}}
|
|
1359
1352
|
- check: {{proc.lang}} <(echo "library(SeuratWrappers)")
|
|
1360
1353
|
""" # noqa: E501
|
|
1354
|
+
|
|
1361
1355
|
input = "infile:file"
|
|
1362
1356
|
output = "outfile:file:{{in.infile | stem}}.imputed.RDS"
|
|
1363
1357
|
lang = config.lang.rscript
|
|
@@ -1393,10 +1387,10 @@ class SCImpute(Proc):
|
|
|
1393
1387
|
infmt: The input format.
|
|
1394
1388
|
Either `seurat` or `matrix`
|
|
1395
1389
|
"""
|
|
1390
|
+
|
|
1396
1391
|
input = "infile:file, groupfile:file"
|
|
1397
1392
|
output = [
|
|
1398
|
-
"outfile:file:{{in.infile | stem | replace: '.seurat', ''}}."
|
|
1399
|
-
"{{envs.outfmt}}"
|
|
1393
|
+
"outfile:file:{{in.infile | stem | replace: '.seurat', ''}}." "{{envs.outfmt}}"
|
|
1400
1394
|
]
|
|
1401
1395
|
lang = config.lang.rscript
|
|
1402
1396
|
envs = {
|
|
@@ -1434,6 +1428,7 @@ class SeuratFilter(Proc):
|
|
|
1434
1428
|
r-dplyr:
|
|
1435
1429
|
- check: {{proc.lang}} <(echo "library('dplyr')")
|
|
1436
1430
|
"""
|
|
1431
|
+
|
|
1437
1432
|
input = "srtobj:file, filters:var"
|
|
1438
1433
|
output = "outfile:file:{{in.srtobj | stem}}.filtered.RDS"
|
|
1439
1434
|
lang = config.lang.rscript
|
|
@@ -1468,6 +1463,7 @@ class SeuratSubset(Proc):
|
|
|
1468
1463
|
r-dplyr:
|
|
1469
1464
|
- check: {{proc.lang}} <(echo "library('dplyr')")
|
|
1470
1465
|
"""
|
|
1466
|
+
|
|
1471
1467
|
input = "srtobj:file, subsets:var"
|
|
1472
1468
|
output = "outdir:dir:{{in.srtobj | stem}}.subsets"
|
|
1473
1469
|
envs = {"ignore_nas": True}
|
|
@@ -1491,6 +1487,7 @@ class SeuratSplit(Proc):
|
|
|
1491
1487
|
recell: Rename the cell ids using the `by` column
|
|
1492
1488
|
A string of R function taking the original cell ids and `by`
|
|
1493
1489
|
"""
|
|
1490
|
+
|
|
1494
1491
|
input = "srtobj:file, by:var"
|
|
1495
1492
|
output = "outdir:dir:{{in.srtobj | stem}}.subsets"
|
|
1496
1493
|
envs = {
|
|
@@ -1521,6 +1518,7 @@ class Subset10X(Proc):
|
|
|
1521
1518
|
feats_to_keep: The features/genes to keep.
|
|
1522
1519
|
The final features list will be `feats_to_keep` + `nfeats`
|
|
1523
1520
|
"""
|
|
1521
|
+
|
|
1524
1522
|
input = "indir:dir"
|
|
1525
1523
|
output = "outdir:dir:{{in.indir | stem}}"
|
|
1526
1524
|
envs = {
|
|
@@ -1550,6 +1548,7 @@ class SeuratTo10X(Proc):
|
|
|
1550
1548
|
Envs:
|
|
1551
1549
|
version: The version of 10X format
|
|
1552
1550
|
"""
|
|
1551
|
+
|
|
1553
1552
|
input = "srtobj:file"
|
|
1554
1553
|
output = "outdir:dir:{{in.srtobj | stem}}"
|
|
1555
1554
|
envs = {"version": "3", "split_by": None}
|
|
@@ -1582,7 +1581,7 @@ class ScFGSEA(Proc):
|
|
|
1582
1581
|
srtobj: The seurat object in RDS format
|
|
1583
1582
|
|
|
1584
1583
|
Output:
|
|
1585
|
-
outdir: The output directory for the results
|
|
1584
|
+
outdir: The output directory for the results and plots
|
|
1586
1585
|
|
|
1587
1586
|
Envs:
|
|
1588
1587
|
ncores (type=int): Number of cores for parallelization
|
|
@@ -1638,6 +1637,7 @@ class ScFGSEA(Proc):
|
|
|
1638
1637
|
r-seurat:
|
|
1639
1638
|
- check: {{proc.lang}} -e "library(seurat)"
|
|
1640
1639
|
""" # noqa: E501
|
|
1640
|
+
|
|
1641
1641
|
input = "srtobj:file"
|
|
1642
1642
|
output = "outdir:dir:{{(in.casefile or in.srtobj) | stem0}}.fgsea"
|
|
1643
1643
|
lang = config.lang.rscript
|
|
@@ -1704,7 +1704,9 @@ class CellTypeAnnotation(Proc):
|
|
|
1704
1704
|
sobjfile: The seurat object
|
|
1705
1705
|
|
|
1706
1706
|
Output:
|
|
1707
|
-
outfile: The rds file of seurat object with cell type annotated
|
|
1707
|
+
outfile: The rds file of seurat object with cell type annotated.
|
|
1708
|
+
A text file containing the mapping from the old `seurat_clusters` to the new cell types
|
|
1709
|
+
will be generated and saved to `cluster2celltype.tsv` under the job output directory.
|
|
1708
1710
|
|
|
1709
1711
|
Envs:
|
|
1710
1712
|
tool (choice): The tool to use for cell type annotation.
|
|
@@ -1788,6 +1790,7 @@ class CellTypeAnnotation(Proc):
|
|
|
1788
1790
|
- if: {{proc.envs.tool == 'sctype'}}
|
|
1789
1791
|
- check: {{proc.lang}} -e "library(openxlsx)"
|
|
1790
1792
|
""" # noqa: E501
|
|
1793
|
+
|
|
1791
1794
|
input = "sobjfile:file"
|
|
1792
1795
|
output = (
|
|
1793
1796
|
"outfile:file:"
|
|
@@ -1905,6 +1908,7 @@ class SeuratMap2Ref(Proc):
|
|
|
1905
1908
|
r-seurat:
|
|
1906
1909
|
- check: {{proc.lang}} -e "library(Seurat)"
|
|
1907
1910
|
""" # noqa: E501
|
|
1911
|
+
|
|
1908
1912
|
input = "sobjfile:file"
|
|
1909
1913
|
output = "outfile:file:{{in.sobjfile | stem}}.RDS"
|
|
1910
1914
|
lang = config.lang.rscript
|
|
@@ -1935,7 +1939,7 @@ class SeuratMap2Ref(Proc):
|
|
|
1935
1939
|
# "celltype-l1": "celltype.l1",
|
|
1936
1940
|
# "celltype-l2": "celltype.l2",
|
|
1937
1941
|
# "predicted_ADT": "ADT",
|
|
1938
|
-
}
|
|
1942
|
+
},
|
|
1939
1943
|
},
|
|
1940
1944
|
"MappingScore": {"ndim": 30},
|
|
1941
1945
|
}
|
|
@@ -2083,6 +2087,7 @@ class RadarPlots(Proc):
|
|
|
2083
2087
|
key `DEFAULT`.
|
|
2084
2088
|
The keys must be valid string as part of the file name.
|
|
2085
2089
|
""" # noqa: E501
|
|
2090
|
+
|
|
2086
2091
|
input = "srtobj:file"
|
|
2087
2092
|
output = "outdir:dir:{{in.srtobj | stem}}.radar_plots"
|
|
2088
2093
|
lang = config.lang.rscript
|
|
@@ -2093,7 +2098,7 @@ class RadarPlots(Proc):
|
|
|
2093
2098
|
"each": None,
|
|
2094
2099
|
"prefix_each": True,
|
|
2095
2100
|
"order": None,
|
|
2096
|
-
"colors":
|
|
2101
|
+
"colors": "biopipen",
|
|
2097
2102
|
"ident": "seurat_clusters",
|
|
2098
2103
|
"cluster_order": [],
|
|
2099
2104
|
"breakdown": None,
|
|
@@ -2193,6 +2198,7 @@ class MetaMarkers(Proc):
|
|
|
2193
2198
|
If no cases are specified, the default case will be added with
|
|
2194
2199
|
the default values under `envs` with the name `DEFAULT`.
|
|
2195
2200
|
""" # noqa: E501
|
|
2201
|
+
|
|
2196
2202
|
input = "srtobj:file"
|
|
2197
2203
|
output = "outdir:dir:{{in.srtobj | stem}}.meta_markers"
|
|
2198
2204
|
lang = config.lang.rscript
|
|
@@ -2231,6 +2237,7 @@ class Seurat2AnnData(Proc):
|
|
|
2231
2237
|
assay: The assay to use for AnnData.
|
|
2232
2238
|
If not specified, the default assay will be used.
|
|
2233
2239
|
"""
|
|
2240
|
+
|
|
2234
2241
|
input = "sobjfile:file"
|
|
2235
2242
|
output = "outfile:file:{{in.sobjfile | stem}}.h5ad"
|
|
2236
2243
|
lang = config.lang.rscript
|
|
@@ -2260,6 +2267,7 @@ class AnnData2Seurat(Proc):
|
|
|
2260
2267
|
to use for the check.
|
|
2261
2268
|
Only works for `outtype = 'rds'`.
|
|
2262
2269
|
"""
|
|
2270
|
+
|
|
2263
2271
|
input = "adfile:file"
|
|
2264
2272
|
output = "outfile:file:{{in.adfile | stem}}.RDS"
|
|
2265
2273
|
lang = config.lang.rscript
|
|
@@ -2302,6 +2310,7 @@ class ScSimulation(Proc):
|
|
|
2302
2310
|
See <https://rdrr.io/bioc/splatter/man/SplatParams.html>.
|
|
2303
2311
|
Hyphens (`-`) will be transformed into dots (`.`) for the keys.
|
|
2304
2312
|
""" # noqa: E501
|
|
2313
|
+
|
|
2305
2314
|
input = "seed:var"
|
|
2306
2315
|
output = "outfile:file:simulatied_{{in.seed}}.RDS"
|
|
2307
2316
|
lang = config.lang.rscript
|
|
@@ -2348,7 +2357,7 @@ class CellCellCommunication(Proc):
|
|
|
2348
2357
|
expression, while *_complex corresponds to the actual complex, with subunits being separated by _.
|
|
2349
2358
|
source and target columns represent the source/sender and target/receiver cell identity for each interaction, respectively
|
|
2350
2359
|
* `*_props`: represents the proportion of cells that express the entity.
|
|
2351
|
-
By default, any interactions in which either entity is not expressed in above 10
|
|
2360
|
+
By default, any interactions in which either entity is not expressed in above 10%% of cells per cell type
|
|
2352
2361
|
is considered as a false positive, under the assumption that since CCC occurs between cell types, a sufficient
|
|
2353
2362
|
proportion of cells within should express the genes.
|
|
2354
2363
|
* `*_means`: entity expression mean per cell type.
|
|
@@ -2376,6 +2385,21 @@ class CellCellCommunication(Proc):
|
|
|
2376
2385
|
- geometric_mean: alias for `Geometric_Mean`
|
|
2377
2386
|
- scseqcomm: alias for `scSeqComm`
|
|
2378
2387
|
- cellchat: alias for `CellChat`
|
|
2388
|
+
subset: An expression in string to subset the cells.
|
|
2389
|
+
When a `.rds` or `.h5seurat` file is provided for `in.sobjfile`, you can provide an expression in `R`,
|
|
2390
|
+
which will be passed to `base::subset()` in `R` to subset the cells.
|
|
2391
|
+
But you can always pass an expression in `python` to subset the cells.
|
|
2392
|
+
See <https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html#subsetting-using-metadata>.
|
|
2393
|
+
You should use `adata` to refer to the AnnData object. For example, `adata.obs.groups == "g1"` will subset the cells
|
|
2394
|
+
with `groups` equal to `g1`.
|
|
2395
|
+
subset_using: The method to subset the cells.
|
|
2396
|
+
- auto: Automatically detect the method to use.
|
|
2397
|
+
Note that this is not always accurate. We simply check if `[` is in the expression.
|
|
2398
|
+
If so, we use `python` to subset the cells; otherwise, we use `R`.
|
|
2399
|
+
- python: Use python to subset the cells.
|
|
2400
|
+
- r: Use R to subset the cells.
|
|
2401
|
+
split_by: The column name in metadata to split the cells to run the method separately.
|
|
2402
|
+
The results will be combined together with this column in the final output.
|
|
2379
2403
|
assay: The assay to use for the analysis.
|
|
2380
2404
|
Only works for Seurat object.
|
|
2381
2405
|
seed (type=int): The seed for the random number generator.
|
|
@@ -2401,6 +2425,7 @@ class CellCellCommunication(Proc):
|
|
|
2401
2425
|
See the method documentation for more details and also
|
|
2402
2426
|
`help(liana.mt.<method>.__call__)` in Python.
|
|
2403
2427
|
""" # noqa: E501
|
|
2428
|
+
|
|
2404
2429
|
input = "sobjfile:file"
|
|
2405
2430
|
output = "outfile:file:{{in.sobjfile | stem}}-ccc.txt"
|
|
2406
2431
|
lang = config.lang.python
|
|
@@ -2408,6 +2433,9 @@ class CellCellCommunication(Proc):
|
|
|
2408
2433
|
"method": "cellchat",
|
|
2409
2434
|
"assay": None,
|
|
2410
2435
|
"seed": 1337,
|
|
2436
|
+
"subset": None,
|
|
2437
|
+
"subset_using": "auto",
|
|
2438
|
+
"split_by": None,
|
|
2411
2439
|
"ncores": config.misc.ncores,
|
|
2412
2440
|
"groupby": "seurat_clusters",
|
|
2413
2441
|
"species": "human",
|
|
@@ -2455,6 +2483,7 @@ class CellCellCommunicationPlots(Proc):
|
|
|
2455
2483
|
See the documentation for more details.
|
|
2456
2484
|
Or you can use `?CCPlotR::cc_<kind>` in R.
|
|
2457
2485
|
"""
|
|
2486
|
+
|
|
2458
2487
|
input = "cccfile:file, expfile:file"
|
|
2459
2488
|
output = "outdir:dir:{{in.cccfile | stem}}-ccc_plots"
|
|
2460
2489
|
lang = config.lang.rscript
|
|
@@ -2467,3 +2496,135 @@ class CellCellCommunicationPlots(Proc):
|
|
|
2467
2496
|
plugin_opts = {
|
|
2468
2497
|
"report": "file://../reports/scrna/CellCellCommunicationPlots.svelte",
|
|
2469
2498
|
}
|
|
2499
|
+
|
|
2500
|
+
|
|
2501
|
+
class ScVelo(Proc):
|
|
2502
|
+
"""Velocity analysis for single-cell RNA-seq data
|
|
2503
|
+
|
|
2504
|
+
This process is implemented based on the Python package `scvelo`.
|
|
2505
|
+
|
|
2506
|
+
Input:
|
|
2507
|
+
sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
|
|
2508
|
+
|
|
2509
|
+
Output:
|
|
2510
|
+
outfile: The output object with the velocity embeddings and information.
|
|
2511
|
+
In either RDS, h5seurat or h5ad format, depending on the `envs.outtype`.
|
|
2512
|
+
outdir: The output directory for the plots
|
|
2513
|
+
|
|
2514
|
+
Envs:
|
|
2515
|
+
ncores (type=int): Number of cores to use.
|
|
2516
|
+
group_by: The column name in metadata to group the cells.
|
|
2517
|
+
Typically, this column should be the cluster id.
|
|
2518
|
+
reduction: The nonlinear reduction to use for the velocity analysis.
|
|
2519
|
+
Typically, `umap` will be used.
|
|
2520
|
+
If this is not provided, 'pca' will be used if exists, otherwise a
|
|
2521
|
+
PCA will be performed.
|
|
2522
|
+
modes (type=auto): The modes to use for the analysis.
|
|
2523
|
+
A list or a string with comma separated values.
|
|
2524
|
+
fitting_by (choice): The mode to use for fitting the velocities.
|
|
2525
|
+
- stochastic: Stochastic mode
|
|
2526
|
+
- deterministic: Deterministic mode
|
|
2527
|
+
min_shared_counts (type=int): Minimum number of counts
|
|
2528
|
+
(both unspliced and spliced) required for a gene.
|
|
2529
|
+
n_neighbors (type=int): The number of neighbors to use for the velocity graph.
|
|
2530
|
+
n_pcs (type=int): The number of PCs to use for the velocity graph.
|
|
2531
|
+
stream_smooth (type=float): Multiplication factor for scale in Gaussian kernel
|
|
2532
|
+
around grid point.
|
|
2533
|
+
stream_density (type=float): Controls the closeness of streamlines.
|
|
2534
|
+
When density = 2.0, the domain is divided into a 60x60 grid, whereas
|
|
2535
|
+
density linearly scales this grid. Each cell in the grid can have,
|
|
2536
|
+
at most, one traversing streamline. For different densities in each
|
|
2537
|
+
direction, use a tuple (density_x, density_y).
|
|
2538
|
+
arrow_size (type=float): Scaling factor for the arrow size.
|
|
2539
|
+
arrow_length (type=float): Length of arrows.
|
|
2540
|
+
arrow_density (type=float): Density of arrows.
|
|
2541
|
+
denoise (flag): Whether to denoise the data.
|
|
2542
|
+
denoise_topn (type=int): Number of genes with highest likelihood selected to
|
|
2543
|
+
infer velocity directions.
|
|
2544
|
+
kinetics (flag): Whether to compute the RNA velocity kinetics.
|
|
2545
|
+
kinetics_topn (type=int): Number of genes with highest likelihood selected to
|
|
2546
|
+
infer velocity directions.
|
|
2547
|
+
calculate_velocity_genes (flag): Whether to calculate the velocity genes.
|
|
2548
|
+
top_n (type=int): The number of top features to plot.
|
|
2549
|
+
res (type=int): The resolution of the plots.
|
|
2550
|
+
rscript: The path to the Rscript executable used to convert RDS file to AnnData.
|
|
2551
|
+
if `in.sobjfile` is an RDS file, it will be converted to AnnData file
|
|
2552
|
+
(h5ad). You need `Seurat`, `SeuratDisk` and `digest` installed.
|
|
2553
|
+
outtype (choice): The output file type.
|
|
2554
|
+
- input: The same as the input file type.
|
|
2555
|
+
- anndata: AnnData object
|
|
2556
|
+
- h5seurat: h5seurat object
|
|
2557
|
+
- h5ad: h5ad object
|
|
2558
|
+
"""
|
|
2559
|
+
|
|
2560
|
+
input = "sobjfile:file"
|
|
2561
|
+
output = "outfile:file:{{in.sobjfile | stem}}-scvelo.{{envs.outtype}}"
|
|
2562
|
+
lang = config.lang.python
|
|
2563
|
+
envs = {
|
|
2564
|
+
"ncores": config.misc.ncores,
|
|
2565
|
+
"group_by": "seurat_clusters",
|
|
2566
|
+
"reduction": "umap",
|
|
2567
|
+
"modes": ["stochastic", "deterministic", "dynamical"],
|
|
2568
|
+
"fitting_by": "stochastic",
|
|
2569
|
+
"min_shared_counts": 30,
|
|
2570
|
+
"n_neighbors": 30,
|
|
2571
|
+
"n_pcs": 30,
|
|
2572
|
+
"stream_smooth": 0.5,
|
|
2573
|
+
"stream_density": 2.0,
|
|
2574
|
+
"arrow_size": 5.0,
|
|
2575
|
+
"arrow_length": 5.0,
|
|
2576
|
+
"arrow_density": 0.5,
|
|
2577
|
+
"denoise": False,
|
|
2578
|
+
"denoise_topn": 3,
|
|
2579
|
+
"kinetics": False,
|
|
2580
|
+
"kinetics_topn": 100,
|
|
2581
|
+
"calculate_velocity_genes": False,
|
|
2582
|
+
"top_n": 6,
|
|
2583
|
+
"res": 100,
|
|
2584
|
+
"rscript": config.lang.rscript,
|
|
2585
|
+
"outtype": "input",
|
|
2586
|
+
}
|
|
2587
|
+
script = "file://../scripts/scrna/ScVelo.py"
|
|
2588
|
+
|
|
2589
|
+
|
|
2590
|
+
class SlingShot(Proc):
|
|
2591
|
+
"""Trajectory inference using SlingShot
|
|
2592
|
+
|
|
2593
|
+
This process is implemented based on the R package `slingshot`.
|
|
2594
|
+
|
|
2595
|
+
Input:
|
|
2596
|
+
sobjfile: The seurat object file in RDS.
|
|
2597
|
+
|
|
2598
|
+
Output:
|
|
2599
|
+
outfile: The output object with the trajectory information.
|
|
2600
|
+
|
|
2601
|
+
Envs:
|
|
2602
|
+
group_by: The column name in metadata to group the cells.
|
|
2603
|
+
Typically, this column should be the cluster id.
|
|
2604
|
+
reduction: The nonlinear reduction to use for the trajectory analysis.
|
|
2605
|
+
dims (type=auto): The dimensions to use for the analysis.
|
|
2606
|
+
A list or a string with comma separated values.
|
|
2607
|
+
Consecutive numbers can be specified with a colon (`:`) or a dash (`-`).
|
|
2608
|
+
start: The starting group for the SlingShot analysis.
|
|
2609
|
+
end: The ending group for the SlingShot analysis.
|
|
2610
|
+
prefix: The prefix to add to the column names of the resulting pseudotime variable.
|
|
2611
|
+
reverse (flag): Logical value indicating whether to reverse the pseudotime variable.
|
|
2612
|
+
align_start (flag): Whether to align the starting pseudotime values at the maximum pseudotime.
|
|
2613
|
+
seed (type=int): The seed for the random number generator.
|
|
2614
|
+
""" # noqa: E501
|
|
2615
|
+
|
|
2616
|
+
input = "sobjfile:file"
|
|
2617
|
+
output = "outfile:file:{{in.sobjfile | stem}}.RDS"
|
|
2618
|
+
lang = config.lang.rscript
|
|
2619
|
+
envs = {
|
|
2620
|
+
"group_by": "seurat_clusters",
|
|
2621
|
+
"reduction": None,
|
|
2622
|
+
"dims": [1, 2],
|
|
2623
|
+
"start": None,
|
|
2624
|
+
"end": None,
|
|
2625
|
+
"prefix": None,
|
|
2626
|
+
"reverse": False,
|
|
2627
|
+
"align_start": False,
|
|
2628
|
+
"seed": 8525,
|
|
2629
|
+
}
|
|
2630
|
+
script = "file://../scripts/scrna/SlingShot.R"
|