biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""Metabolic landscape analysis for scRNA-seq data"""
|
|
2
|
+
|
|
2
3
|
from __future__ import annotations
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Type
|
|
5
6
|
|
|
6
|
-
from diot import Diot
|
|
7
|
+
from diot import Diot # type: ignore
|
|
7
8
|
from datar.tibble import tibble
|
|
8
9
|
from pipen.utils import mark
|
|
9
10
|
from pipen_args import ProcGroup
|
|
@@ -13,7 +14,7 @@ from ..core.config import config
|
|
|
13
14
|
from ..core.proc import Proc
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
class
|
|
17
|
+
class MetabolicPathwayActivity(Proc):
|
|
17
18
|
"""This process calculates the pathway activities in different groups and subsets.
|
|
18
19
|
|
|
19
20
|
The cells are first grouped by subsets and then the metabolic activities are
|
|
@@ -22,102 +23,108 @@ class _MetabolicPathwayActivity(Proc):
|
|
|
22
23
|
For each subset, a heatmap and a violin plot will be generated.
|
|
23
24
|
The heatmap shows the pathway activities for each group and each metabolic pathway
|
|
24
25
|
|
|
25
|
-
{: width="80%"}
|
|
26
|
+
{: width="80%"}
|
|
26
27
|
|
|
27
28
|
The violin plot shows the distribution of the pathway activities for each group
|
|
28
29
|
|
|
29
|
-
{: width="45%"}
|
|
30
|
+
{: width="45%"}
|
|
31
|
+
|
|
32
|
+
You may also have a merged heatmap to show all subsets in one plot.
|
|
33
|
+
|
|
34
|
+
{: width="80%"}
|
|
35
|
+
|
|
36
|
+
Input:
|
|
37
|
+
sobjfile: The Seurat object file.
|
|
38
|
+
It should be loaded as a Seurat object
|
|
39
|
+
|
|
40
|
+
Output:
|
|
41
|
+
outdir: The output directory.
|
|
42
|
+
It will contain the pathway activity score files and plots.
|
|
30
43
|
|
|
31
44
|
Envs:
|
|
32
|
-
ntimes (type=int): Number of
|
|
45
|
+
ntimes (type=int): Number of permutations to estimate the p-values
|
|
33
46
|
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
34
47
|
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
35
|
-
heatmap_devpars (ns): Device parameters for the heatmap
|
|
36
|
-
- width (type=int): Width of the heatmap
|
|
37
|
-
- height (type=int): Height of the heatmap
|
|
38
|
-
- res (type=int): Resolution of the heatmap
|
|
39
|
-
violin_devpars (ns): Device parameters for the violin plot
|
|
40
|
-
- width (type=int): Width of the violin plot
|
|
41
|
-
- height (type=int): Height of the violin plot
|
|
42
|
-
- res (type=int): Resolution of the violin plot
|
|
43
48
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
44
49
|
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
Defaults to `ScrnaMetabolicLandscape.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
If
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
- check: {{proc.lang}} <(echo "library(reshape2)")
|
|
72
|
-
r-rcolorbrewer:
|
|
73
|
-
- check: {{proc.lang}} <(echo "library(RColorBrewer)")
|
|
74
|
-
r-ggplot2:
|
|
75
|
-
- check: {{proc.lang}} <(echo "library(ggplot2)")
|
|
76
|
-
r-ggprism:
|
|
77
|
-
- check: {{proc.lang}} <(echo "library(ggprism)")
|
|
78
|
-
r-complexheatmap:
|
|
79
|
-
- check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
|
|
80
|
-
r-parallel:
|
|
81
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
50
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
51
|
+
metadata. For example, `Response`.
|
|
52
|
+
`NA` values will be removed in this column.
|
|
53
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
54
|
+
If None, the data will not be subsetted.
|
|
55
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
56
|
+
metadata. For example, `cluster`.
|
|
57
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
58
|
+
plots (type=json): The plots to generate.
|
|
59
|
+
Names will be used as the prefix for the output files. Values will be
|
|
60
|
+
a dictionary with the following keys:
|
|
61
|
+
* `plot_type` is the type of plot to generate. One of `heatmap`,
|
|
62
|
+
`box`, `violin` or `merged_heatmap` (all subsets in one plot).
|
|
63
|
+
* `devpars` is a dictionary with the device parameters for the plot.
|
|
64
|
+
* Other arguments for `plotthis::Heatmap()`, `plotthis::BoxPlot()`
|
|
65
|
+
or `plotthis::ViolinPlot()`, depending on the `plot_type`.
|
|
66
|
+
cases (type=json): Multiple cases for the analysis.
|
|
67
|
+
If you only have one case, you can specify the parameters directly to
|
|
68
|
+
`envs.ntimes`, `envs.subset_by`, `envs.group_by`, `envs.group1`,
|
|
69
|
+
`envs.group2`, and `envs.plots`. The name of the case will be
|
|
70
|
+
`envs.subset_by`.
|
|
71
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
72
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
73
|
+
will be dictionaries with the parameters for each case, where the values
|
|
74
|
+
will be inherited from `envs.ntimes`, `envs.subset_by`, `envs.group_by`,
|
|
75
|
+
`envs.group1`, `envs.group2`, and `envs.plots`.
|
|
82
76
|
""" # noqa: E501
|
|
77
|
+
|
|
83
78
|
input = "sobjfile:file"
|
|
84
79
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
|
|
85
80
|
envs = {
|
|
86
81
|
"ntimes": 5000,
|
|
87
82
|
"ncores": config.misc.ncores,
|
|
88
|
-
"heatmap_devpars": {},
|
|
89
|
-
"violin_devpars": {},
|
|
90
83
|
"gmtfile": None,
|
|
91
|
-
"
|
|
92
|
-
"
|
|
93
|
-
"
|
|
94
|
-
|
|
84
|
+
"subset_by": None,
|
|
85
|
+
"group_by": None,
|
|
86
|
+
"plots": {
|
|
87
|
+
"Pathway Activity (violin plot)": {
|
|
88
|
+
"plot_type": "violin",
|
|
89
|
+
"add_box": True,
|
|
90
|
+
"devpars": {"res": 100},
|
|
91
|
+
},
|
|
92
|
+
"Pathway Activity (heatmap)": {
|
|
93
|
+
"plot_type": "heatmap",
|
|
94
|
+
"devpars": {"res": 100},
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
"cases": {},
|
|
95
98
|
}
|
|
96
99
|
lang = config.lang.rscript
|
|
97
100
|
script = (
|
|
98
|
-
"file://../scripts/"
|
|
99
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
101
|
+
"file://../scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R"
|
|
100
102
|
)
|
|
101
103
|
plugin_opts = {
|
|
102
|
-
"report":
|
|
103
|
-
|
|
104
|
-
"scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
105
|
-
)
|
|
104
|
+
"report":
|
|
105
|
+
"file://../reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
|
|
109
|
-
class
|
|
109
|
+
class MetabolicFeatures(Proc):
|
|
110
110
|
"""This process performs enrichment analysis for the metabolic pathways
|
|
111
111
|
for each group in each subset.
|
|
112
112
|
|
|
113
113
|
The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
|
|
114
114
|
package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.
|
|
115
115
|
|
|
116
|
+
Input:
|
|
117
|
+
sobjfile: The Seurat object file in rds.
|
|
118
|
+
It should be loaded as a Seurat object
|
|
119
|
+
|
|
120
|
+
Output:
|
|
121
|
+
outdir: The output directory.
|
|
122
|
+
It will contain the GSEA results and plots.
|
|
123
|
+
|
|
116
124
|
Envs:
|
|
117
|
-
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
If `False`, the `GSEA_R` package will be used.
|
|
125
|
+
ncores (type=int;pgarg): Number of cores to use for parallelization for
|
|
126
|
+
the comparisons for each subset and group.
|
|
127
|
+
Defaults to `ScrnaMetabolicLandscape.ncores`.
|
|
121
128
|
prerank_method (choice): Method to use for gene preranking.
|
|
122
129
|
Signal to noise: the larger the differences of the means
|
|
123
130
|
(scaled by the standard deviations); that is, the more distinct
|
|
@@ -143,148 +150,85 @@ class _MetabolicFeatures(Proc):
|
|
|
143
150
|
- ratio_of_classes: Also referred to as fold change
|
|
144
151
|
- diff_of_classes: Difference of class means
|
|
145
152
|
- log2_ratio_of_classes: Log2 ratio of class means
|
|
146
|
-
top (type=int): N top of enriched pathways to show
|
|
147
153
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
148
154
|
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
155
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
156
|
+
metadata. For example, `Response`.
|
|
157
|
+
`NA` values will be removed in this column.
|
|
158
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
159
|
+
If None, the data will not be subsetted.
|
|
160
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
161
|
+
metadata. For example, `cluster`.
|
|
162
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
163
|
+
comparisons (type=list): The comparison groups to use for the analysis.
|
|
164
|
+
If not provided, each group in the `group_by` column will be used
|
|
165
|
+
to compare with the other groups.
|
|
166
|
+
If a single group is provided as an element, it will be used to
|
|
167
|
+
compare with all the other groups.
|
|
168
|
+
For example, if we have `group_by = "cluster"` and we have
|
|
169
|
+
`1`, `2` and `3` in the `group_by` column, we could have
|
|
170
|
+
`comparisons = ["1", "2"]`, which will compare the group `1` with groups
|
|
171
|
+
`2` and `3`, and the group `2` with groups `1` and `3`. We could also
|
|
172
|
+
have `comparisons = ["1:2", "1:3"]`, which will compare the group `1` with
|
|
173
|
+
group `2` and group `1` with group `3`.
|
|
174
|
+
fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
|
|
175
|
+
For example, `{"minSize": 15, "maxSize": 500}`.
|
|
176
|
+
See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
|
|
177
|
+
plots (type=json): The plots to generate.
|
|
178
|
+
Names will be used as the title for the plot. Values will be the arguments
|
|
179
|
+
passed to `biopipen.utils::VizGSEA()` function.
|
|
180
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
|
|
181
|
+
A key `level` is supported to specify the level of the plot.
|
|
182
|
+
Possible values are `case`, which includes all subsets and groups in the
|
|
183
|
+
case; `subset`, which includes all groups in the subset; otherwise, it
|
|
184
|
+
will plot for the groups.
|
|
185
|
+
For `case`/`subset` level plots, current `plot_type` only "dot" is supported
|
|
186
|
+
for now, then the values will be passed to `plotthis::DotPlot()`
|
|
187
|
+
cases (type=json): Multiple cases for the analysis.
|
|
188
|
+
If you only have one case, you can specify the parameters directly to
|
|
189
|
+
`envs.prerank_method`, `envs.subset_by`, `envs.group_by`,
|
|
190
|
+
`envs.comparisons`, `envs.fgsea_args` and `envs.plots`.
|
|
191
|
+
The name of this default case will be `envs.subset_by`.
|
|
192
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
193
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
194
|
+
will be dictionaries with the parameters for each case, where the values
|
|
195
|
+
will be inherited from `envs.prerank_method`,
|
|
196
|
+
`envs.subset_by`, `envs.group_by`, `envs.comparisons`, `envs.fgsea_args`
|
|
197
|
+
and `envs.plots`.
|
|
167
198
|
""" # noqa: E501
|
|
199
|
+
|
|
168
200
|
input = "sobjfile:file"
|
|
169
201
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
|
|
170
202
|
lang = config.lang.rscript
|
|
171
203
|
envs = {
|
|
172
204
|
"ncores": config.misc.ncores,
|
|
173
|
-
"fgsea": True,
|
|
174
205
|
"prerank_method": "signal_to_noise",
|
|
175
|
-
"top": 10,
|
|
176
|
-
"gmtfile": None,
|
|
177
|
-
"grouping": None,
|
|
178
|
-
"grouping_prefix": "",
|
|
179
|
-
"subsetting": None,
|
|
180
|
-
"subsetting_prefix": "",
|
|
181
|
-
}
|
|
182
|
-
script = (
|
|
183
|
-
"file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
|
|
184
|
-
)
|
|
185
|
-
plugin_opts = {
|
|
186
|
-
"report": (
|
|
187
|
-
"file://../reports/"
|
|
188
|
-
"scrna_metabolic_landscape/MetabolicFeatures.svelte"
|
|
189
|
-
)
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
class _MetabolicFeaturesIntraSubset(Proc):
|
|
194
|
-
"""Intra-subset metabolic features - Enrichment analysis in details
|
|
195
|
-
|
|
196
|
-
Similar to the [`MetabolicFeatures`](./MetabolicFeatures.md) process,
|
|
197
|
-
this process performs enrichment analysis for the metabolic pathways for
|
|
198
|
-
each subset in each group, instead of each group in each subset.
|
|
199
|
-
|
|
200
|
-
See also: [`MetabolicFeatures`](./MetabolicFeatures.md)
|
|
201
|
-
|
|
202
|
-
Envs:
|
|
203
|
-
ncores (type=int; pgarg): Number of cores to use for parallelization
|
|
204
|
-
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
205
|
-
fgsea (flag): Whether to do fast gsea analysis
|
|
206
|
-
prerank_method (choice): Method to use for gene preranking
|
|
207
|
-
Signal to noise: the larger the differences of the means
|
|
208
|
-
(scaled by the standard deviations); that is, the more distinct
|
|
209
|
-
the gene expression is in each phenotype and the more the gene
|
|
210
|
-
acts as a “class marker.”.
|
|
211
|
-
Absolute signal to noise: the absolute value of the signal to
|
|
212
|
-
noise.
|
|
213
|
-
T test: Uses the difference of means scaled by the standard
|
|
214
|
-
deviation and number of samples.
|
|
215
|
-
Ratio of classes: Uses the ratio of class means to calculate
|
|
216
|
-
fold change for natural scale data.
|
|
217
|
-
Diff of classes: Uses the difference of class means to calculate
|
|
218
|
-
fold change for nature scale data
|
|
219
|
-
Log2 ratio of classes: Uses the log2 ratio of class means to
|
|
220
|
-
calculate fold change for natural scale data. This is the
|
|
221
|
-
recommended statistic for calculating fold change for log scale
|
|
222
|
-
data.
|
|
223
|
-
- signal_to_noise: Signal to noise
|
|
224
|
-
- s2n: Alias of signal_to_noise
|
|
225
|
-
- abs_signal_to_noise: absolute signal to noise
|
|
226
|
-
- abs_s2n: Alias of abs_signal_to_noise
|
|
227
|
-
- t_test: T test
|
|
228
|
-
- ratio_of_classes: Also referred to as fold change
|
|
229
|
-
- diff_of_classes: Difference of class means
|
|
230
|
-
- log2_ratio_of_classes: Log2 ratio of class means
|
|
231
|
-
top (type=int): N top of enriched pathways to show
|
|
232
|
-
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
233
|
-
Defaults to `ScrnaMetabolicLandscape.gmtfile`
|
|
234
|
-
grouping (type=auto;pgarg;readonly): Defines the basic groups to
|
|
235
|
-
investigate the metabolic activity.
|
|
236
|
-
Defaults to `ScrnaMetabolicLandscape.grouping`
|
|
237
|
-
grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
|
|
238
|
-
names.
|
|
239
|
-
Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
|
|
240
|
-
subsetting (type=auto;pgarg;readonly): How do we subset the data.
|
|
241
|
-
Another column(s) in the metadata.
|
|
242
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting`
|
|
243
|
-
subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
|
|
244
|
-
subset names.
|
|
245
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
|
|
246
|
-
subsetting_comparison (type=json;pgarg;readonly): How do we compare the
|
|
247
|
-
subsets.
|
|
248
|
-
Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`
|
|
249
|
-
|
|
250
|
-
Requires:
|
|
251
|
-
r-parallel:
|
|
252
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
253
|
-
r-scater:
|
|
254
|
-
- check: {{proc.lang}} <(echo "library(scater)")
|
|
255
|
-
r-fgsea:
|
|
256
|
-
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
257
|
-
"""
|
|
258
|
-
input = "sobjfile:file"
|
|
259
|
-
output = (
|
|
260
|
-
"outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
|
|
261
|
-
)
|
|
262
|
-
lang = config.lang.rscript
|
|
263
|
-
envs = {
|
|
264
|
-
"ncores": config.misc.ncores,
|
|
265
206
|
"gmtfile": None,
|
|
266
|
-
"
|
|
267
|
-
"
|
|
268
|
-
"
|
|
269
|
-
"
|
|
270
|
-
"
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
207
|
+
"subset_by": None,
|
|
208
|
+
"group_by": None,
|
|
209
|
+
"comparisons": [],
|
|
210
|
+
"fgsea_args": {},
|
|
211
|
+
"plots": {
|
|
212
|
+
"Summary Plot": {
|
|
213
|
+
"plot_type": "summary",
|
|
214
|
+
"top_term": 10,
|
|
215
|
+
"devpars": {"res": 100},
|
|
216
|
+
},
|
|
217
|
+
"Enrichment Plots": {
|
|
218
|
+
"plot_type": "gsea",
|
|
219
|
+
"top_term": 10,
|
|
220
|
+
"devpars": {"res": 100},
|
|
221
|
+
},
|
|
222
|
+
},
|
|
223
|
+
"cases": {},
|
|
274
224
|
}
|
|
275
|
-
script =
|
|
276
|
-
"file://../scripts/scrna_metabolic_landscape/"
|
|
277
|
-
"MetabolicFeaturesIntraSubset.R"
|
|
278
|
-
)
|
|
225
|
+
script = "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
|
|
279
226
|
plugin_opts = {
|
|
280
|
-
"report":
|
|
281
|
-
"file://../reports/scrna_metabolic_landscape/"
|
|
282
|
-
"MetabolicFeaturesIntraSubset.svelte"
|
|
283
|
-
)
|
|
227
|
+
"report": "file://../reports/scrna_metabolic_landscape/MetabolicFeatures.svelte"
|
|
284
228
|
}
|
|
285
229
|
|
|
286
230
|
|
|
287
|
-
class
|
|
231
|
+
class MetabolicPathwayHeterogeneity(Proc):
|
|
288
232
|
"""Calculate Metabolic Pathway heterogeneity.
|
|
289
233
|
|
|
290
234
|
For each subset, the normalized enrichment score (NES) of each metabolic pathway
|
|
@@ -296,8 +240,7 @@ class _MetabolicPathwayHeterogeneity(Proc):
|
|
|
296
240
|
The heterogeneity can be reflected by the NES values and the p-values in
|
|
297
241
|
different groups for the metabolic pathways.
|
|
298
242
|
|
|
299
|
-

|
|
300
|
-
|
|
243
|
+

|
|
301
244
|
|
|
302
245
|
Envs:
|
|
303
246
|
gmtfile (pgarg): The GMT file with the metabolic pathways.
|
|
@@ -307,43 +250,33 @@ class _MetabolicPathwayHeterogeneity(Proc):
|
|
|
307
250
|
the enriched pathways
|
|
308
251
|
ncores (type=int;pgarg): Number of cores to use for parallelization
|
|
309
252
|
Defaults to `ScrnaMetabolicLandscape.ncores`
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
- check: {{proc.lang}} <(echo "library(parallel)")
|
|
336
|
-
r-dplyr:
|
|
337
|
-
- check: {{proc.lang}} <(echo "library(dplyr)")
|
|
338
|
-
r-tibble:
|
|
339
|
-
- check: {{proc.lang}} <(echo "library(tibble)")
|
|
340
|
-
r-enrichr:
|
|
341
|
-
- check: {{proc.lang}} <(echo "library(enrichR)")
|
|
342
|
-
r-data.table:
|
|
343
|
-
- check: {{proc.lang}} <(echo "library(data.table)")
|
|
344
|
-
r-fgsea:
|
|
345
|
-
- check: {{proc.lang}} <(echo "library(fgsea)")
|
|
253
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
254
|
+
metadata. For example, `Response`.
|
|
255
|
+
`NA` values will be removed in this column.
|
|
256
|
+
Defaults to `ScrnaMetabolicLandscape.subset_by`
|
|
257
|
+
If None, the data will not be subsetted.
|
|
258
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
259
|
+
metadata. For example, `cluster`.
|
|
260
|
+
Defaults to `ScrnaMetabolicLandscape.group_by`
|
|
261
|
+
fgsea_args (type=json): Other arguments for the `fgsea::fgsea()` function.
|
|
262
|
+
For example, `{"minSize": 15, "maxSize": 500}`.
|
|
263
|
+
See <https://rdrr.io/bioc/fgsea/man/fgsea.html> for more details.
|
|
264
|
+
plots (type=json): The plots to generate.
|
|
265
|
+
Names will be used as the title for the plot. Values will be the arguments
|
|
266
|
+
passed to `biopipen.utils::VizGSEA()` function.
|
|
267
|
+
See <https://pwwang.github.io/biopipen.utils.R/reference/VizGSEA.html>.
|
|
268
|
+
cases (type=json): Multiple cases for the analysis.
|
|
269
|
+
If you only have one case, you can specify the parameters directly to
|
|
270
|
+
`envs.subset_by`, `envs.group_by`, `envs.fgsea_args`, `envs.plots`,
|
|
271
|
+
`envs.select_pcs`, and `envs.pathway_pval_cutoff`.
|
|
272
|
+
The name of this default case will be `envs.subset_by`.
|
|
273
|
+
If you have multiple cases, you can specify the parameters for each case
|
|
274
|
+
in a dictionary. The keys will be the names of the cases and the values
|
|
275
|
+
will be dictionaries with the parameters for each case, where the values
|
|
276
|
+
will be inherited from `envs.subset_by`, `envs.group_by`, `envs.fgsea_args`,
|
|
277
|
+
`envs.plots`, `envs.select_pcs`, and `envs.pathway_pval_cutoff`.
|
|
346
278
|
""" # noqa: E501
|
|
279
|
+
|
|
347
280
|
input = "sobjfile:file"
|
|
348
281
|
output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
|
|
349
282
|
lang = config.lang.rscript
|
|
@@ -352,11 +285,16 @@ class _MetabolicPathwayHeterogeneity(Proc):
|
|
|
352
285
|
"select_pcs": 0.8,
|
|
353
286
|
"pathway_pval_cutoff": 0.01,
|
|
354
287
|
"ncores": config.misc.ncores,
|
|
355
|
-
"
|
|
356
|
-
"
|
|
357
|
-
"
|
|
358
|
-
"
|
|
359
|
-
|
|
288
|
+
"subset_by": None,
|
|
289
|
+
"group_by": None,
|
|
290
|
+
"fgsea_args": {"scoreType": "std", "nproc": 1},
|
|
291
|
+
"plots": {
|
|
292
|
+
"Pathway Heterogeneity": {
|
|
293
|
+
"plot_type": "dot",
|
|
294
|
+
"devpars": {"res": 100},
|
|
295
|
+
},
|
|
296
|
+
},
|
|
297
|
+
"cases": {},
|
|
360
298
|
}
|
|
361
299
|
script = (
|
|
362
300
|
"file://../scripts/scrna_metabolic_landscape/"
|
|
@@ -401,46 +339,19 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
401
339
|
dependent on other processes, this option will be used to determine
|
|
402
340
|
whether the input is a seurat object or not.
|
|
403
341
|
noimpute (flag): Whether to do imputation for the dropouts.
|
|
404
|
-
If
|
|
342
|
+
If True, the values will be left as is.
|
|
405
343
|
gmtfile: The GMT file with the metabolic pathways. The gene names should
|
|
406
344
|
match the gene names in the gene list in RNAData or
|
|
407
|
-
the Seurat object
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
will be
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
subsetting_prefix (type=auto): Working as a prefix to subset names
|
|
418
|
-
For example, if we have `subsetting_prefix = "timepoint"` and
|
|
419
|
-
we have `pre` and `post` in the `subsetting` column, the subsets
|
|
420
|
-
will be named as `timepoint_pre` and `timepoint_post`
|
|
421
|
-
If `subsetting` is a list, then this should also be a same-length
|
|
422
|
-
list. If a single string is given, it will be repeated to a list
|
|
423
|
-
with the same length as `subsetting`
|
|
424
|
-
subsetting_comparison (type=json): What kind of comparisons are we
|
|
425
|
-
doing to compare cells from different subsets.
|
|
426
|
-
It should be dict with keys as the names of the comparisons and
|
|
427
|
-
values as the 2 comparison groups from the `subsetting` column.
|
|
428
|
-
For example, if we have `pre` and `post` in the `subsetting` column,
|
|
429
|
-
we could have
|
|
430
|
-
`subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
|
|
431
|
-
The second group will be the control group in the comparison.
|
|
432
|
-
If we also have `1`, `2` and `3` in the `grouping` column,
|
|
433
|
-
by default, the comparisons are done within each subset for
|
|
434
|
-
each group. For example, for group `1`, groups `2` and `3`
|
|
435
|
-
will be used as control, and for group `2`, groups `1` and `3`
|
|
436
|
-
will be used as control, and for group `3`, groups `1` and `2`
|
|
437
|
-
will be used as control. It is similar to `Seurat::FindMarkers`
|
|
438
|
-
procedure. With this option, the comparisons are also done to
|
|
439
|
-
compare cells from different subsets within each group. With the
|
|
440
|
-
example above, we will have `pre_vs_post` comparisons within
|
|
441
|
-
each group.
|
|
442
|
-
If `subsetting` is a list, this must be a list of dicts with the
|
|
443
|
-
same length.
|
|
345
|
+
the Seurat object.
|
|
346
|
+
You can also provide a URL to the GMT file.
|
|
347
|
+
For example, from
|
|
348
|
+
<https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
|
|
349
|
+
subset_by (pgarg;readonly): Subset the data by the given column in the
|
|
350
|
+
metadata. For example, `Response`.
|
|
351
|
+
`NA` values will be removed in this column.
|
|
352
|
+
If None, the data will not be subsetted.
|
|
353
|
+
group_by (pgarg;readonly): Group the data by the given column in the
|
|
354
|
+
metadata. For example, `cluster`.
|
|
444
355
|
mutaters (type=json): Add new columns to the metadata for
|
|
445
356
|
grouping/subsetting.
|
|
446
357
|
They are passed to `sobj@meta.data |> mutate(...)`. For example,
|
|
@@ -450,65 +361,25 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
450
361
|
ncores (type=int): Number of cores to use for parallelization for
|
|
451
362
|
each process
|
|
452
363
|
"""
|
|
364
|
+
|
|
453
365
|
DEFAULTS = Diot(
|
|
454
366
|
metafile=None,
|
|
455
367
|
is_seurat=None,
|
|
456
368
|
gmtfile=None,
|
|
457
|
-
grouping=None,
|
|
458
|
-
grouping_prefix="",
|
|
459
|
-
subsetting=None,
|
|
460
|
-
subsetting_prefix=None,
|
|
461
|
-
subsetting_comparison={},
|
|
462
369
|
mutaters=None,
|
|
463
|
-
noimpute=
|
|
370
|
+
noimpute=True,
|
|
464
371
|
ncores=config.misc.ncores,
|
|
372
|
+
subset_by=None,
|
|
373
|
+
group_by=None,
|
|
465
374
|
)
|
|
466
375
|
|
|
467
376
|
def post_init(self):
|
|
468
377
|
"""Load runtime processes"""
|
|
469
378
|
if self.opts.metafile:
|
|
470
379
|
suffix = Path(self.opts.metafile).suffix
|
|
471
|
-
self.opts.is_seurat = suffix in (".rds", ".RDS")
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
|
|
475
|
-
self.opts.subsetting = [self.opts.subsetting]
|
|
476
|
-
|
|
477
|
-
# Make sure the grouping is a list with the same length as subsetting
|
|
478
|
-
if (
|
|
479
|
-
self.opts.subsetting
|
|
480
|
-
and not isinstance(self.opts.subsetting_prefix, list)
|
|
481
|
-
):
|
|
482
|
-
self.opts.subsetting_prefix = [
|
|
483
|
-
self.opts.subsetting_prefix
|
|
484
|
-
] * len(self.opts.subsetting)
|
|
485
|
-
|
|
486
|
-
# Make sure the lengths of subsetting and subsetting_comparison the same
|
|
487
|
-
if self.opts.subsetting:
|
|
488
|
-
if len(self.opts.subsetting) == 1 and isinstance(
|
|
489
|
-
self.opts.subsetting_comparison, dict
|
|
490
|
-
):
|
|
491
|
-
self.opts.subsetting_comparison = [
|
|
492
|
-
self.opts.subsetting_comparison
|
|
493
|
-
]
|
|
494
|
-
|
|
495
|
-
if len(self.opts.subsetting) > 1 and not isinstance(
|
|
496
|
-
self.opts.subsetting_comparison, list
|
|
497
|
-
):
|
|
498
|
-
raise ValueError(
|
|
499
|
-
"The length of `subsetting` is larger than 1, "
|
|
500
|
-
"but `subsetting_comparison` is not a list of dicts."
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
if len(self.opts.subsetting) != len(
|
|
504
|
-
self.opts.subsetting_comparison
|
|
505
|
-
):
|
|
506
|
-
raise ValueError(
|
|
507
|
-
"The length of `subsetting` and `subsetting_comparison` "
|
|
508
|
-
"are not the same"
|
|
509
|
-
)
|
|
510
|
-
|
|
511
|
-
@ProcGroup.add_proc
|
|
380
|
+
self.opts.is_seurat = suffix in (".rds", ".RDS", ".qs", ".qs2")
|
|
381
|
+
|
|
382
|
+
@ProcGroup.add_proc # type: ignore
|
|
512
383
|
def p_input(self) -> Type[Proc]:
|
|
513
384
|
"""Build MetabolicInputs process"""
|
|
514
385
|
from .misc import File2Proc
|
|
@@ -526,8 +397,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
526
397
|
|
|
527
398
|
return MetabolicInput
|
|
528
399
|
|
|
529
|
-
@ProcGroup.add_proc
|
|
530
|
-
def p_preparing(self) -> Type[Proc]:
|
|
400
|
+
@ProcGroup.add_proc # type: ignore
|
|
401
|
+
def p_preparing(self) -> Type[Proc] | None:
|
|
531
402
|
"""Build SeuratPreparing process"""
|
|
532
403
|
if self.opts.is_seurat:
|
|
533
404
|
return None
|
|
@@ -539,11 +410,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
539
410
|
|
|
540
411
|
return MetabolicSeuratPreparing
|
|
541
412
|
|
|
542
|
-
@ProcGroup.add_proc
|
|
413
|
+
@ProcGroup.add_proc # type: ignore
|
|
543
414
|
def p_clustering(self) -> Type[Proc]:
|
|
544
415
|
"""Build SeuratClustering process"""
|
|
545
416
|
if self.opts.is_seurat:
|
|
546
|
-
return self.p_input
|
|
417
|
+
return self.p_input # type: ignore
|
|
547
418
|
|
|
548
419
|
from .scrna import SeuratClustering
|
|
549
420
|
|
|
@@ -552,11 +423,11 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
552
423
|
|
|
553
424
|
return MetabolicSeuratClustering
|
|
554
425
|
|
|
555
|
-
@ProcGroup.add_proc
|
|
426
|
+
@ProcGroup.add_proc # type: ignore
|
|
556
427
|
def p_mutater(self) -> Type[Proc]:
|
|
557
428
|
"""Build SeuratMetadataMutater process"""
|
|
558
429
|
if not self.opts.mutaters:
|
|
559
|
-
return self.p_clustering
|
|
430
|
+
return self.p_clustering # type: ignore
|
|
560
431
|
|
|
561
432
|
from .scrna import SeuratMetadataMutater
|
|
562
433
|
|
|
@@ -565,102 +436,77 @@ class ScrnaMetabolicLandscape(ProcGroup):
|
|
|
565
436
|
input_data = lambda ch: tibble(
|
|
566
437
|
srtobj=ch.iloc[:, 0],
|
|
567
438
|
metafile=[None],
|
|
568
|
-
mutaters=[self.opts.mutaters],
|
|
569
439
|
)
|
|
440
|
+
envs = {"mutaters": self.opts.mutaters}
|
|
570
441
|
|
|
571
442
|
return MetabolicSeuratMetadataMutater
|
|
572
443
|
|
|
573
|
-
@ProcGroup.add_proc
|
|
444
|
+
@ProcGroup.add_proc # type: ignore
|
|
574
445
|
def p_expr_impute(self) -> Type[Proc]:
|
|
575
446
|
"""Build process"""
|
|
576
447
|
if self.opts.noimpute:
|
|
577
|
-
return self.p_mutater
|
|
448
|
+
return self.p_mutater # type: ignore
|
|
578
449
|
|
|
579
|
-
from .scrna import
|
|
450
|
+
from .scrna import ExprImputation
|
|
580
451
|
|
|
581
|
-
@annotate.format_doc(indent=3)
|
|
582
|
-
class
|
|
452
|
+
@annotate.format_doc(indent=3) # type: ignore
|
|
453
|
+
class MetabolicExprImputation(ExprImputation):
|
|
583
454
|
"""{{Summary}}
|
|
584
455
|
|
|
585
456
|
You can turn off the imputation by setting the `noimpute` option
|
|
586
457
|
of the process group to `True`.
|
|
587
458
|
"""
|
|
459
|
+
|
|
588
460
|
requires = self.p_mutater
|
|
589
461
|
|
|
590
|
-
return
|
|
462
|
+
return MetabolicExprImputation
|
|
591
463
|
|
|
592
|
-
@ProcGroup.add_proc
|
|
464
|
+
@ProcGroup.add_proc # type: ignore
|
|
593
465
|
def p_pathway_activity(self) -> Type[Proc]:
|
|
594
466
|
"""Build MetabolicPathwayActivity process"""
|
|
595
|
-
return Proc.from_proc(
|
|
596
|
-
|
|
467
|
+
return Proc.from_proc( # type: ignore
|
|
468
|
+
MetabolicPathwayActivity,
|
|
597
469
|
"MetabolicPathwayActivity",
|
|
598
|
-
requires=self.p_expr_impute,
|
|
470
|
+
requires=self.p_expr_impute, # type: ignore
|
|
599
471
|
order=-1,
|
|
472
|
+
envs_depth=5,
|
|
600
473
|
envs={
|
|
601
474
|
"ncores": self.opts.ncores,
|
|
602
475
|
"gmtfile": self.opts.gmtfile,
|
|
603
|
-
"
|
|
604
|
-
"
|
|
605
|
-
"subsetting": self.opts.subsetting,
|
|
606
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
476
|
+
"group_by": self.opts.group_by,
|
|
477
|
+
"subset_by": self.opts.subset_by,
|
|
607
478
|
},
|
|
608
479
|
)
|
|
609
480
|
|
|
610
|
-
@ProcGroup.add_proc
|
|
481
|
+
@ProcGroup.add_proc # type: ignore
|
|
611
482
|
def p_pathway_heterogeneity(self) -> Type[Proc]:
|
|
612
483
|
"""Build MetabolicPathwayHeterogeneity process"""
|
|
613
|
-
return Proc.from_proc(
|
|
614
|
-
|
|
484
|
+
return Proc.from_proc( # type: ignore
|
|
485
|
+
MetabolicPathwayHeterogeneity,
|
|
615
486
|
"MetabolicPathwayHeterogeneity",
|
|
616
|
-
requires=self.
|
|
487
|
+
requires=self.p_mutater, # type: ignore
|
|
488
|
+
envs_depth=5,
|
|
617
489
|
envs={
|
|
618
490
|
"ncores": self.opts.ncores,
|
|
619
491
|
"gmtfile": self.opts.gmtfile,
|
|
620
|
-
"
|
|
621
|
-
"
|
|
622
|
-
"subsetting": self.opts.subsetting,
|
|
623
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
492
|
+
"group_by": self.opts.group_by,
|
|
493
|
+
"subset_by": self.opts.subset_by,
|
|
624
494
|
},
|
|
625
495
|
)
|
|
626
496
|
|
|
627
|
-
@ProcGroup.add_proc
|
|
497
|
+
@ProcGroup.add_proc # type: ignore
|
|
628
498
|
def p_features(self) -> Type[Proc]:
|
|
629
499
|
"""Build MetabolicFeatures process"""
|
|
630
|
-
return Proc.from_proc(
|
|
631
|
-
|
|
500
|
+
return Proc.from_proc( # type: ignore
|
|
501
|
+
MetabolicFeatures,
|
|
632
502
|
"MetabolicFeatures",
|
|
633
|
-
requires=self.p_expr_impute,
|
|
634
|
-
|
|
635
|
-
"ncores": self.opts.ncores,
|
|
636
|
-
"gmtfile": self.opts.gmtfile,
|
|
637
|
-
"grouping": self.opts.grouping,
|
|
638
|
-
"grouping_prefix": self.opts.grouping_prefix,
|
|
639
|
-
"subsetting": self.opts.subsetting,
|
|
640
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
641
|
-
},
|
|
642
|
-
)
|
|
643
|
-
|
|
644
|
-
@ProcGroup.add_proc
|
|
645
|
-
def p_features_intra_subset(self) -> Type[Proc]:
|
|
646
|
-
"""Build MetabolicFeaturesIntraSubset process"""
|
|
647
|
-
if self.opts.subsetting_comparison and not self.opts.subsetting:
|
|
648
|
-
raise ValueError(
|
|
649
|
-
"Cannot use `subsetting_comparison` without `subsetting`."
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
return Proc.from_proc(
|
|
653
|
-
_MetabolicFeaturesIntraSubset,
|
|
654
|
-
"MetabolicFeaturesIntraSubset",
|
|
655
|
-
requires=self.p_expr_impute,
|
|
503
|
+
requires=self.p_expr_impute, # type: ignore
|
|
504
|
+
envs_depth=5,
|
|
656
505
|
envs={
|
|
657
506
|
"ncores": self.opts.ncores,
|
|
658
507
|
"gmtfile": self.opts.gmtfile,
|
|
659
|
-
"
|
|
660
|
-
"
|
|
661
|
-
"subsetting": self.opts.subsetting,
|
|
662
|
-
"subsetting_prefix": self.opts.subsetting_prefix,
|
|
663
|
-
"subsetting_comparison": self.opts.subsetting_comparison,
|
|
508
|
+
"group_by": self.opts.group_by,
|
|
509
|
+
"subset_by": self.opts.subset_by,
|
|
664
510
|
},
|
|
665
511
|
)
|
|
666
512
|
|