biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +290 -288
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +348 -217
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +157 -75
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +5 -4
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
library(ggplot2)
|
|
2
|
-
library(ggprism)
|
|
3
1
|
library(dplyr)
|
|
4
2
|
library(tidyr)
|
|
5
3
|
library(tibble)
|
|
6
|
-
library(
|
|
4
|
+
library(plotthis)
|
|
7
5
|
|
|
8
6
|
tmadfiles = {{in.tmadfiles | r}}
|
|
9
7
|
metafile = {{in.metafile | r}}
|
|
@@ -47,7 +45,7 @@ if (!is.null(group_cols)) {
|
|
|
47
45
|
}
|
|
48
46
|
|
|
49
47
|
data = data.frame(Sample = sams, tMAD = tmads)
|
|
50
|
-
if (file.exists(metafile) && length(meta_cols) > 0) {
|
|
48
|
+
if (is.character(metafile) && file.exists(metafile) && length(meta_cols) > 0) {
|
|
51
49
|
metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
|
|
52
50
|
if (!is.null(metadf$Sample)) {
|
|
53
51
|
metadf$Sample = as.character(metadf$Sample)
|
|
@@ -63,20 +61,12 @@ if (file.exists(metafile) && length(meta_cols) > 0) {
|
|
|
63
61
|
write.table(data, file=file.path(outdir, "tMAD.txt"), sep="\t", quote=F, row.names=F)
|
|
64
62
|
|
|
65
63
|
# bar plot for all samples without grouping
|
|
66
|
-
p
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
axis.title.y = element_text(size=12),
|
|
73
|
-
axis.text.y = element_text(size=12),
|
|
74
|
-
legend.position = "none",
|
|
75
|
-
) +
|
|
76
|
-
labs(
|
|
77
|
-
x = NULL,
|
|
78
|
-
y = "tMAD",
|
|
79
|
-
)
|
|
64
|
+
p <- BarPlot(
|
|
65
|
+
data = data,
|
|
66
|
+
x = "Sample",
|
|
67
|
+
y = "tMAD",
|
|
68
|
+
x_text_angle = 90
|
|
69
|
+
)
|
|
80
70
|
|
|
81
71
|
png(file.path(outdir, "tMAD.png"), width=400 + length(sams) * 12, height=800, res=100)
|
|
82
72
|
print(p)
|
|
@@ -88,41 +78,30 @@ if (!is.null(group_cols)) {
|
|
|
88
78
|
if (!grepl(",", group_col, fixed = TRUE)) {
|
|
89
79
|
# Bar plot with this group_col, but with different fill colors
|
|
90
80
|
# for each group, and samples from the same group are next to each other
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
axis.title.x = element_blank(),
|
|
100
|
-
axis.title.y = element_text(size=12),
|
|
101
|
-
axis.text.y = element_text(size=12),
|
|
102
|
-
) +
|
|
103
|
-
labs(
|
|
104
|
-
x = NULL,
|
|
105
|
-
y = "tMAD",
|
|
106
|
-
)
|
|
81
|
+
gdata <- data %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=unique(Sample)))
|
|
82
|
+
p <- BarPlot(
|
|
83
|
+
data = gdata,
|
|
84
|
+
x = "Sample",
|
|
85
|
+
y = "tMAD",
|
|
86
|
+
fill = group_col,
|
|
87
|
+
x_text_angle = 90
|
|
88
|
+
)
|
|
107
89
|
|
|
108
90
|
png(file.path(outdir, paste0("tMAD_", group_col, "_bar.png")), width=400 + length(sams) * 12, height=600, res=100)
|
|
109
91
|
print(p)
|
|
110
92
|
dev.off()
|
|
111
93
|
|
|
112
94
|
# Box plot overlays with violin plot with this group_col
|
|
113
|
-
p
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
x = group_col,
|
|
124
|
-
y = "tMAD",
|
|
125
|
-
)
|
|
95
|
+
p <- ViolinPlot(
|
|
96
|
+
data = gdata,
|
|
97
|
+
x = group_col,
|
|
98
|
+
y = "tMAD",
|
|
99
|
+
x_text_angle = 90,
|
|
100
|
+
add_box = TRUE,
|
|
101
|
+
add_point = TRUE,
|
|
102
|
+
comparisons = TRUE,
|
|
103
|
+
sig_label = "p.format"
|
|
104
|
+
)
|
|
126
105
|
|
|
127
106
|
png(file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")), width=1000, height=600, res=100)
|
|
128
107
|
print(p)
|
|
@@ -137,25 +116,17 @@ if (!is.null(group_cols)) {
|
|
|
137
116
|
# concatenate them together using patch work, with ncol=2
|
|
138
117
|
# calcuate the height and width of the plot based on the number of
|
|
139
118
|
# groups
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
axis.title.x = element_blank(),
|
|
152
|
-
axis.title.y = element_text(size=12),
|
|
153
|
-
axis.text.y = element_text(size=12),
|
|
154
|
-
) +
|
|
155
|
-
labs(x = NULL, y = "tMAD") +
|
|
156
|
-
ggtitle(.y[[group_col1]][1])
|
|
157
|
-
p
|
|
158
|
-
})
|
|
119
|
+
gdata <- data %>% arrange(!!sym(group_col1), !!sym(group_col2)) %>%
|
|
120
|
+
mutate(Sample=factor(Sample, levels=unique(Sample)))
|
|
121
|
+
p <- BarPlot(
|
|
122
|
+
data = gdata,
|
|
123
|
+
x = "Sample",
|
|
124
|
+
y = "tMAD",
|
|
125
|
+
split_by = group_col1,
|
|
126
|
+
fill = group_col2,
|
|
127
|
+
x_text_angle = 90,
|
|
128
|
+
ncol = 2
|
|
129
|
+
)
|
|
159
130
|
|
|
160
131
|
png(
|
|
161
132
|
file.path(outdir, paste0("tMAD_", group_col, "_bar.png")),
|
|
@@ -163,26 +134,22 @@ if (!is.null(group_cols)) {
|
|
|
163
134
|
height=length(unique(data[[group_col1]])) * 200,
|
|
164
135
|
res=100
|
|
165
136
|
)
|
|
166
|
-
print(
|
|
137
|
+
print(p)
|
|
167
138
|
dev.off()
|
|
168
139
|
|
|
169
140
|
# Do the same for Voilin + boxplot
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
labs(x = group_col2, y = "tMAD") +
|
|
183
|
-
ggtitle(.y[[group_col1]][1])
|
|
184
|
-
p
|
|
185
|
-
})
|
|
141
|
+
p <- ViolinPlot(
|
|
142
|
+
data = gdata,
|
|
143
|
+
x = group_col2,
|
|
144
|
+
y = "tMAD",
|
|
145
|
+
split_by = group_col1,
|
|
146
|
+
x_text_angle = 90,
|
|
147
|
+
add_box = TRUE,
|
|
148
|
+
add_point = TRUE,
|
|
149
|
+
comparisons = TRUE,
|
|
150
|
+
sig_label = "p.format",
|
|
151
|
+
ncol = 2
|
|
152
|
+
)
|
|
186
153
|
|
|
187
154
|
png(
|
|
188
155
|
file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")),
|
|
@@ -190,7 +157,7 @@ if (!is.null(group_cols)) {
|
|
|
190
157
|
height=length(unique(data[[group_col1]])) * 200,
|
|
191
158
|
res=100
|
|
192
159
|
)
|
|
193
|
-
print(
|
|
160
|
+
print(p)
|
|
194
161
|
dev.off()
|
|
195
162
|
}
|
|
196
163
|
}
|
|
@@ -5,10 +5,10 @@ from pathlib import Path, PosixPath # for as_path
|
|
|
5
5
|
|
|
6
6
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
7
7
|
|
|
8
|
-
bamfiles = {{in.bamfiles | repr}} # pyright: ignore # noqa
|
|
9
|
-
atfile = {{in.atfile |
|
|
8
|
+
bamfiles = {{in.bamfiles | each: str | repr}} # pyright: ignore # noqa
|
|
9
|
+
atfile = {{in.atfile | quote}} # pyright: ignore
|
|
10
10
|
|
|
11
|
-
targetfile = {{out.targetfile |
|
|
11
|
+
targetfile = {{out.targetfile | quote}} # pyright: ignore
|
|
12
12
|
covfile = {{out.targetfile | as_path | attr: "with_suffix" | call: ".cnn" | repr}} # pyright: ignore
|
|
13
13
|
|
|
14
14
|
cnvkit: str = {{envs.cnvkit | repr}} # pyright: ignore
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
-
from diot import Diot
|
|
3
|
+
from diot import Diot # type: ignore[import]
|
|
4
4
|
|
|
5
5
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
6
6
|
|
|
7
|
-
segfiles = {{in.segfiles | repr}} # pyright: ignore # noqa # noqa
|
|
7
|
+
segfiles = {{in.segfiles | default: [] | each: str | repr}} # pyright: ignore # noqa # noqa
|
|
8
8
|
sample_sex = {{in.sample_sex | repr}} # pyright: ignore
|
|
9
|
-
outdir: str = {{out.outdir |
|
|
9
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
10
10
|
cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
|
|
11
11
|
convert = {{envs.convert | quote}} # pyright: ignore
|
|
12
12
|
convert_args = {{envs.convert_args | repr}} # pyright: ignore
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
3
3
|
|
|
4
|
-
covfiles = {{in.covfiles | repr}} # pyright: ignore # noqa
|
|
5
|
-
target_file = {{in.target_file |
|
|
6
|
-
antitarget_file = {{in.antitarget_file |
|
|
4
|
+
covfiles = {{in.covfiles | default: [] | each: str | repr}} # pyright: ignore # noqa
|
|
5
|
+
target_file = {{in.target_file | quote: quote_none=False}} # pyright: ignore
|
|
6
|
+
antitarget_file = {{in.antitarget_file | quote: quote_none=False}} # pyright: ignore
|
|
7
7
|
sample_sex = {{in.sample_sex | repr}} # pyright: ignore
|
|
8
8
|
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
9
9
|
reffile: str = {{envs.ref | quote}} # pyright: ignore
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
|
+
library(gglogger)
|
|
3
4
|
library(biopipen.utils)
|
|
4
5
|
library(plotthis)
|
|
5
6
|
|
|
@@ -132,7 +133,7 @@ if (length(stats) > 0) {
|
|
|
132
133
|
case$data <- mutdata
|
|
133
134
|
}
|
|
134
135
|
|
|
135
|
-
p <- do_call(
|
|
136
|
+
p <- do_call(plot_fn, case)
|
|
136
137
|
save_plot(p, info$prefix, devpars, formats = more_formats)
|
|
137
138
|
if (save_code) {
|
|
138
139
|
save_plotcode(
|
|
@@ -155,3 +156,5 @@ if (length(stats) > 0) {
|
|
|
155
156
|
)
|
|
156
157
|
}
|
|
157
158
|
}
|
|
159
|
+
|
|
160
|
+
reporter$save(joboutdir)
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
{{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
|
|
1
|
+
library(biopipen.utils)
|
|
3
2
|
|
|
4
|
-
infile <- {{in.infile |
|
|
5
|
-
outfile <- {{out.outfile |
|
|
3
|
+
infile <- {{in.infile | r}}
|
|
4
|
+
outfile <- {{out.outfile | r}}
|
|
6
5
|
notfound <- {{envs.notfound | r}}
|
|
7
6
|
genecol <- {{envs.genecol | r}}
|
|
8
7
|
output <- {{envs.output | r}}
|
|
@@ -11,6 +10,8 @@ infmt <- {{envs.infmt | r}}
|
|
|
11
10
|
outfmt <- {{envs.outfmt | r}}
|
|
12
11
|
species <- {{envs.species | r}}
|
|
13
12
|
|
|
13
|
+
log <- get_logger()
|
|
14
|
+
|
|
14
15
|
if (is.na(notfound)) {
|
|
15
16
|
notfound = "na"
|
|
16
17
|
}
|
|
@@ -18,7 +19,7 @@ if (is.na(notfound)) {
|
|
|
18
19
|
df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
|
|
19
20
|
|
|
20
21
|
if (genecol == 0) {
|
|
21
|
-
|
|
22
|
+
log$warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
|
|
22
23
|
genecol <- 1
|
|
23
24
|
}
|
|
24
25
|
|
|
@@ -27,12 +28,13 @@ if (dup == "combine") { dup <- ";" }
|
|
|
27
28
|
|
|
28
29
|
genes <- df[[genecol]]
|
|
29
30
|
converted <- gene_name_conversion(
|
|
30
|
-
genes=genes,
|
|
31
|
-
species=species,
|
|
32
|
-
infmt=infmt,
|
|
33
|
-
outfmt=outfmt,
|
|
34
|
-
notfound=notfound,
|
|
35
|
-
dup=dup
|
|
31
|
+
genes = genes,
|
|
32
|
+
species = species,
|
|
33
|
+
infmt = infmt,
|
|
34
|
+
outfmt = outfmt,
|
|
35
|
+
notfound = notfound,
|
|
36
|
+
dup = dup,
|
|
37
|
+
suppress_messages = FALSE
|
|
36
38
|
)
|
|
37
39
|
# <genecol> <outfmt>
|
|
38
40
|
# 1 1255_g_at GUCA1A
|
|
@@ -50,7 +52,7 @@ if (notfound == "skip" || notfound == "ignore") {
|
|
|
50
52
|
|
|
51
53
|
if (output == "append") {
|
|
52
54
|
if (outfmt %in% colnames(df)) {
|
|
53
|
-
|
|
55
|
+
log$warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
|
|
54
56
|
outcol <- paste(outfmt, "_1", sep="")
|
|
55
57
|
}
|
|
56
58
|
df[[outcol]] <- converted[[outfmt]]
|
biopipen/scripts/gsea/Enrichr.R
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
{{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
|
|
3
3
|
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
4
4
|
|
|
5
|
-
infile = {{in.infile |
|
|
6
|
-
outdir = {{out.outdir |
|
|
5
|
+
infile = {{in.infile | r}}
|
|
6
|
+
outdir = {{out.outdir | r}}
|
|
7
7
|
genecol = {{envs.genecol | r}}
|
|
8
8
|
genename = {{envs.genename | r}}
|
|
9
9
|
dbs = {{envs.dbs | r}}
|
biopipen/scripts/gsea/FGSEA.R
CHANGED
|
@@ -1,58 +1,192 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
|
|
4
|
-
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
5
|
-
|
|
6
|
-
infile = {{in.infile | quote}}
|
|
7
|
-
metafile = {{in.metafile | quote}}
|
|
8
|
-
gmtfile = {{in.gmtfile | quote}}
|
|
9
|
-
{% if in.configfile %}
|
|
10
|
-
config = {{in.config | read | toml_loads | r}}
|
|
11
|
-
{% else %}
|
|
12
|
-
config = list()
|
|
13
|
-
{% endif %}
|
|
14
|
-
outdir = {{out.outdir | quote}}
|
|
15
|
-
envs = {{envs | r}}
|
|
16
|
-
clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
|
|
17
|
-
classes <- if (is.null(config$classes)) envs$classes else config$classes
|
|
18
|
-
|
|
19
|
-
if (is.null(clscol)) {
|
|
20
|
-
stop("No `clscol` specified.")
|
|
21
|
-
}
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(biopipen.utils)
|
|
22
3
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
}
|
|
4
|
+
# input & output
|
|
5
|
+
infile = {{in.infile | r}}
|
|
6
|
+
metafile = {{in.metafile | r}}
|
|
7
|
+
outdir = {{out.outdir | r}}
|
|
8
|
+
joboutdir = {{job.outdir | r}}
|
|
9
|
+
|
|
10
|
+
# envs
|
|
11
|
+
ncores = {{envs.ncores | r}}
|
|
12
|
+
case = {{envs.case | r}}
|
|
13
|
+
control = {{envs.control | r}}
|
|
14
|
+
gmtfile = {{envs.gmtfile | r}}
|
|
15
|
+
method = {{envs.method | r}}
|
|
16
|
+
clscol = {{envs.clscol | r}}
|
|
17
|
+
top = {{envs.top | r}}
|
|
18
|
+
eps = {{envs.eps | r}}
|
|
19
|
+
minsize = {{envs.minSize | default: envs.minsize | r}}
|
|
20
|
+
maxsize = {{envs.maxSize | default: envs.maxsize | r}}
|
|
21
|
+
rest = {{envs.rest | r}}
|
|
22
|
+
cases = {{envs.cases | r}}
|
|
23
|
+
|
|
24
|
+
log <- get_logger()
|
|
25
|
+
reporter <- get_reporter()
|
|
26
|
+
|
|
27
|
+
defaults <- list(
|
|
28
|
+
case = case,
|
|
29
|
+
control = control,
|
|
30
|
+
gmtfile = gmtfile,
|
|
31
|
+
method = method,
|
|
32
|
+
clscol = clscol,
|
|
33
|
+
top = top,
|
|
34
|
+
eps = eps,
|
|
35
|
+
minsize = minsize,
|
|
36
|
+
maxsize = maxsize,
|
|
37
|
+
rest = rest
|
|
38
|
+
)
|
|
39
|
+
cases <- expand_cases(cases, defaults, default_case = "GSEA")
|
|
40
|
+
|
|
41
|
+
log$info("Reading input file ...")
|
|
42
|
+
indata <- read.table(infile, header=TRUE, stringsAsFactors=FALSE, row.names=1, sep="\t", quote="", check.names=FALSE)
|
|
26
43
|
|
|
27
|
-
if (is.
|
|
28
|
-
|
|
44
|
+
if (!is.null(metafile)) {
|
|
45
|
+
log$info("Reading metadata file ...")
|
|
46
|
+
metadata <- read.table(metafile, header=TRUE, stringsAsFactors=FALSE, row.names=NULL, sep="\t", quote="", check.names=FALSE)
|
|
29
47
|
} else {
|
|
30
|
-
|
|
48
|
+
metadata <- NULL
|
|
31
49
|
}
|
|
32
50
|
|
|
33
|
-
|
|
34
|
-
|
|
51
|
+
do_case <- function(name) {
|
|
52
|
+
log$info("Processing case: {name} ...")
|
|
53
|
+
case <- cases[[name]]
|
|
54
|
+
info <- case_info(name, outdir, create = TRUE)
|
|
35
55
|
|
|
36
|
-
|
|
56
|
+
if (is.null(case$case) && is.null(case$control)) {
|
|
57
|
+
stop("Either `case` or `control` must be specified in the case.")
|
|
58
|
+
}
|
|
59
|
+
if (is.null(case$gmtfile)) {
|
|
60
|
+
stop("`gmtfile` must be specified in the case.")
|
|
61
|
+
}
|
|
62
|
+
if (is.null(case$clscol)) {
|
|
63
|
+
stop("`clscol` must be specified in the case.")
|
|
64
|
+
}
|
|
65
|
+
if (!is.null(metadata) && length(case$clscol) > 1) {
|
|
66
|
+
stop("When `in.metafile` is specified, `envs.clscol` must be a single column name.")
|
|
67
|
+
}
|
|
68
|
+
if (!is.null(metadata)) {
|
|
69
|
+
samples <- colnames(indata)
|
|
70
|
+
if (!"Sample" %in% colnames(metadata)) {
|
|
71
|
+
colnames(metadata)[1] <- "Sample"
|
|
72
|
+
}
|
|
73
|
+
metadata <- metadata[match(samples, metadata$Sample), , drop=FALSE]
|
|
74
|
+
case$clscol <- as.character(metadata[[case$clscol]])
|
|
75
|
+
}
|
|
76
|
+
if (length(unique(case$clscol)) < 2) {
|
|
77
|
+
stop("The `clscol` must have at least two unique values.")
|
|
78
|
+
}
|
|
79
|
+
if (length(unique(case$clscol)) == 2) {
|
|
80
|
+
case$case <- case$case %||% setdiff(unique(case$clscol), case$control)
|
|
81
|
+
case$control <- case$control %||% setdiff(unique(case$clscol), case$case)
|
|
82
|
+
} else {
|
|
83
|
+
if (is.null(case$case) || is.null(case$control)) {
|
|
84
|
+
stop("When `clscol` has more than two unique values, both `case` and `control` must be specified.")
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
log$info("- Running pre-ranking ...")
|
|
88
|
+
ranks <- RunGSEAPreRank(
|
|
89
|
+
indata,
|
|
90
|
+
classes = case$clscol,
|
|
91
|
+
case = case$case,
|
|
92
|
+
control = case$control,
|
|
93
|
+
method = case$method
|
|
94
|
+
)
|
|
95
|
+
if (all(is.na(ranks))) {
|
|
96
|
+
if (length(case$clscol) < 10) {
|
|
97
|
+
log$warn(" Ignoring this case because all gene ranks are NA and there are <10 samples.")
|
|
98
|
+
reporter$add2(
|
|
99
|
+
list(
|
|
100
|
+
kind = "error",
|
|
101
|
+
content = paste0("Not enough samples (n = ", length(case$clscol), ") to run fgsea.")
|
|
102
|
+
),
|
|
103
|
+
hs = c(info$section, info$name)
|
|
104
|
+
)
|
|
105
|
+
return(NULL)
|
|
106
|
+
} else {
|
|
107
|
+
stop(paste0(
|
|
108
|
+
"All gene ranks are NA (# samples = ",
|
|
109
|
+
length(case$clscol),
|
|
110
|
+
"). ",
|
|
111
|
+
"It's probably due to high missing rate in the data. ",
|
|
112
|
+
"You may want to try a different `envs$method` for pre-ranking."
|
|
113
|
+
))
|
|
114
|
+
}
|
|
115
|
+
}
|
|
37
116
|
|
|
38
|
-
|
|
39
|
-
ranks
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
117
|
+
log$info("- Running GSEA ...")
|
|
118
|
+
case$rest$ranks <- ranks
|
|
119
|
+
case$rest$genesets <- ParseGMT(case$gmtfile)
|
|
120
|
+
case$rest$minSize <- case$rest$minSize %||% case$rest$minsize %||% case$minsize
|
|
121
|
+
case$rest$maxSize <- case$rest$maxSize %||% case$rest$maxsize %||% case$maxsize
|
|
122
|
+
case$rest$eps <- case$eps
|
|
123
|
+
case$rest$nproc <- case$ncores
|
|
124
|
+
case$rest$minsize <- NULL
|
|
125
|
+
case$rest$maxsize <- NULL
|
|
126
|
+
result <- do_call(RunGSEA, case$rest)
|
|
127
|
+
write.table(
|
|
128
|
+
result,
|
|
129
|
+
file.path(info$prefix, "fgsea.tsv"),
|
|
130
|
+
row.names = FALSE,
|
|
131
|
+
col.names = TRUE,
|
|
132
|
+
sep = "\t",
|
|
133
|
+
quote = FALSE
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
p_summary <- VizGSEA(
|
|
137
|
+
result,
|
|
138
|
+
plot_type = "summary",
|
|
139
|
+
top_term = case$top
|
|
140
|
+
)
|
|
141
|
+
save_plot(
|
|
142
|
+
p_summary,
|
|
143
|
+
file.path(info$prefix, "summary"),
|
|
144
|
+
devpars = list(res = 100, height = attr(p_summary, "height") * 100, width = attr(p_summary, "width") * 100),
|
|
145
|
+
formats = "png"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
p_gsea <- VizGSEA(
|
|
149
|
+
result,
|
|
150
|
+
plot_type = "gsea",
|
|
151
|
+
gs = result$pathway[1:min(case$top, nrow(result))]
|
|
152
|
+
)
|
|
153
|
+
save_plot(
|
|
154
|
+
p_gsea,
|
|
155
|
+
file.path(info$prefix, "pathways"),
|
|
156
|
+
devpars = list(res = 100, height = attr(p_gsea, "height") * 100, width = attr(p_gsea, "width") * 100),
|
|
157
|
+
formats = "png"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
reporter$add2(
|
|
162
|
+
list(
|
|
163
|
+
name = "Table",
|
|
164
|
+
contents = list(
|
|
165
|
+
list(kind = "descr", content = paste0(
|
|
166
|
+
"Showing top 50 pathways by padj in descending order. ",
|
|
167
|
+
"Use 'Download the entire data' button to download all pathways."
|
|
168
|
+
)),
|
|
169
|
+
list(kind = "table", src = file.path(info$prefix, "fgsea"), data = list(nrows = 50))
|
|
170
|
+
)
|
|
171
|
+
),
|
|
172
|
+
list(
|
|
173
|
+
name = "Summary Plot",
|
|
174
|
+
contents = list(
|
|
175
|
+
list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
|
|
176
|
+
list(kind = "image", src = file.path(info$prefix, "summary.png"))
|
|
177
|
+
)
|
|
178
|
+
),
|
|
179
|
+
list(
|
|
180
|
+
name = "GSEA Plots",
|
|
181
|
+
contents = list(
|
|
182
|
+
list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
|
|
183
|
+
list(kind = "image", src = file.path(info$prefix, "pathways.png"))
|
|
184
|
+
)
|
|
185
|
+
),
|
|
186
|
+
hs = c(info$section, info$name),
|
|
187
|
+
ui = "tabs"
|
|
188
|
+
)
|
|
189
|
+
}
|
|
46
190
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
envs$inopts = NULL
|
|
50
|
-
envs$metaopts = NULL
|
|
51
|
-
envs$method = NULL
|
|
52
|
-
envs$clscol = NULL
|
|
53
|
-
envs$classes = NULL
|
|
54
|
-
envs$ncores = NULL
|
|
55
|
-
envs$top = NULL
|
|
56
|
-
# the rest are the arguments for `fgsea()`
|
|
57
|
-
|
|
58
|
-
runFGSEA(ranks, gmtfile, top, outdir, envs)
|
|
191
|
+
sapply(names(cases), do_case)
|
|
192
|
+
reporter$save(joboutdir)
|
biopipen/scripts/gsea/PreRank.R
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
|
|
4
4
|
{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
|
|
5
5
|
|
|
6
|
-
infile = {{in.infile |
|
|
7
|
-
metafile = {{in.metafile |
|
|
6
|
+
infile = {{in.infile | r}}
|
|
7
|
+
metafile = {{in.metafile | r}}
|
|
8
8
|
{% if in.configfile %}
|
|
9
9
|
config = {{in.config | read | toml_loads | r}}
|
|
10
10
|
{% else %}
|
|
11
11
|
config = list()
|
|
12
12
|
{% endif %}
|
|
13
|
-
outfile = {{out.outfile |
|
|
13
|
+
outfile = {{out.outfile | r}}
|
|
14
14
|
envs = {{envs | r}}
|
|
15
15
|
clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
|
|
16
16
|
classes <- if (is.null(config$classes)) envs$classes else config$classes
|