biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -168
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +127 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +18 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
- biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
- biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
- biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
- biopipen/scripts/tcr/Immunarch.R +7 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +66 -41
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -5,6 +5,7 @@ library(future)
|
|
|
5
5
|
library(bracer)
|
|
6
6
|
library(ggplot2)
|
|
7
7
|
library(tidyseurat)
|
|
8
|
+
library(slugify)
|
|
8
9
|
|
|
9
10
|
metafile = {{in.metafile | quote}}
|
|
10
11
|
rdsfile = {{out.rdsfile | quote}}
|
|
@@ -15,6 +16,18 @@ set.seed(8525)
|
|
|
15
16
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
16
17
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
17
18
|
|
|
19
|
+
add_report(
|
|
20
|
+
list(
|
|
21
|
+
kind = "descr",
|
|
22
|
+
name = "Filters applied",
|
|
23
|
+
content = paste0(
|
|
24
|
+
"<p>Cell filters: ", html_escape(envs$cell_qc), "</p>",
|
|
25
|
+
"<p>Gene filters: ", html_escape(envs$gene_qc), "</p>"
|
|
26
|
+
)
|
|
27
|
+
),
|
|
28
|
+
h1 = "Filters and QC"
|
|
29
|
+
)
|
|
30
|
+
|
|
18
31
|
metadata = read.table(
|
|
19
32
|
metafile,
|
|
20
33
|
header = TRUE,
|
|
@@ -57,7 +70,7 @@ rename_files = function(e, sample, path) {
|
|
|
57
70
|
}
|
|
58
71
|
|
|
59
72
|
load_sample = function(sample) {
|
|
60
|
-
|
|
73
|
+
log_info("- Loading sample: {sample} ...")
|
|
61
74
|
mdata = as.data.frame(metadata)[metadata$Sample == sample, , drop=TRUE]
|
|
62
75
|
path = as.character(mdata$RNAData)
|
|
63
76
|
if (is.na(path) || !is.character(path) || nchar(path) == 0) {
|
|
@@ -105,10 +118,10 @@ load_sample = function(sample) {
|
|
|
105
118
|
# Load data
|
|
106
119
|
samples = as.character(metadata$Sample)
|
|
107
120
|
|
|
108
|
-
|
|
121
|
+
log_info("Reading samples individually ...")
|
|
109
122
|
obj_list = lapply(samples, load_sample)
|
|
110
123
|
|
|
111
|
-
|
|
124
|
+
log_info("Merging samples ...")
|
|
112
125
|
if (length(obj_list) >= 2) {
|
|
113
126
|
y = c()
|
|
114
127
|
for (i in 2:length(obj_list)) y = c(y, obj_list[[i]])
|
|
@@ -117,7 +130,7 @@ if (length(obj_list) >= 2) {
|
|
|
117
130
|
sobj = obj_list[[1]]
|
|
118
131
|
}
|
|
119
132
|
|
|
120
|
-
|
|
133
|
+
log_info("Adding metadata for QC ...")
|
|
121
134
|
sobj$percent.mt = PercentageFeatureSet(sobj, pattern = "^MT-")
|
|
122
135
|
sobj$percent.ribo = PercentageFeatureSet(sobj, pattern = "^RP[SL]")
|
|
123
136
|
sobj$percent.hb = PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
|
|
@@ -126,7 +139,7 @@ sobj$percent.plat = PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
|
|
|
126
139
|
dim_df = data.frame(When = "Before_QC", nCells = ncol(sobj), nGenes = nrow(sobj))
|
|
127
140
|
|
|
128
141
|
if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
|
|
129
|
-
|
|
142
|
+
log_warn("No cell QC criteria is provided. All cells will be kept.")
|
|
130
143
|
envs$cell_qc = "TRUE"
|
|
131
144
|
}
|
|
132
145
|
|
|
@@ -136,9 +149,21 @@ plotsdir = file.path(joboutdir, "plots")
|
|
|
136
149
|
dir.create(plotsdir, showWarnings = FALSE)
|
|
137
150
|
|
|
138
151
|
# Violin plots
|
|
139
|
-
|
|
152
|
+
log_info("Plotting violin plots ...")
|
|
153
|
+
add_report(
|
|
154
|
+
list(
|
|
155
|
+
kind = "descr",
|
|
156
|
+
content = paste(
|
|
157
|
+
"The violin plots for each feature. The cells are grouped by sample.",
|
|
158
|
+
"The cells that fail the QC criteria are colored in red, and",
|
|
159
|
+
"the cells that pass the QC criteria are colored in black.",
|
|
160
|
+
"The cells that fail the QC criteria are filtered out in the returned Seurat object."
|
|
161
|
+
)
|
|
162
|
+
),
|
|
163
|
+
h1 = "Violin Plots"
|
|
164
|
+
)
|
|
140
165
|
for (feat in feats) {
|
|
141
|
-
|
|
166
|
+
log_info("- For feature: {feat}")
|
|
142
167
|
vln_p = VlnPlot(
|
|
143
168
|
sobj,
|
|
144
169
|
cols = rep("white", length(samples)),
|
|
@@ -150,20 +175,43 @@ for (feat in feats) {
|
|
|
150
175
|
aes(color = .QC),
|
|
151
176
|
data = vln_p$data,
|
|
152
177
|
position = position_jitterdodge(jitter.width = 0.4, dodge.width = 0.9)
|
|
153
|
-
) + scale_color_manual(values = c("
|
|
178
|
+
) + scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
|
|
154
179
|
|
|
180
|
+
vlnplot = file.path(plotsdir, paste0(slugify(feat, tolower = FALSE), ".vln.png"))
|
|
155
181
|
png(
|
|
156
|
-
|
|
182
|
+
vlnplot,
|
|
157
183
|
width = 800 + length(samples) * 15, height = 600, res = 100
|
|
158
184
|
)
|
|
159
185
|
print(vln_p)
|
|
160
186
|
dev.off()
|
|
187
|
+
|
|
188
|
+
add_report(
|
|
189
|
+
list(
|
|
190
|
+
src = vlnplot,
|
|
191
|
+
name = feat,
|
|
192
|
+
descr = paste0("Distribution of ", feat, " for each sample.")
|
|
193
|
+
),
|
|
194
|
+
h1 = "Violin Plots",
|
|
195
|
+
ui = "table_of_images"
|
|
196
|
+
)
|
|
161
197
|
}
|
|
162
198
|
|
|
163
199
|
# Scatter plots against nCount_RNA
|
|
164
|
-
|
|
200
|
+
log_info("Plotting scatter plots ...")
|
|
201
|
+
add_report(
|
|
202
|
+
list(
|
|
203
|
+
kind = "descr",
|
|
204
|
+
content = paste(
|
|
205
|
+
"The scatter plots for each feature against nCount_RNA. ",
|
|
206
|
+
"The cells that fail the QC criteria are colored in red, and",
|
|
207
|
+
"the cells that pass the QC criteria are colored in black.",
|
|
208
|
+
"The cells that fail the QC criteria are filtered out in the returned Seurat object."
|
|
209
|
+
)
|
|
210
|
+
),
|
|
211
|
+
h1 = "Scatter Plots"
|
|
212
|
+
)
|
|
165
213
|
for (feat in setdiff(feats, "nCount_RNA")) {
|
|
166
|
-
|
|
214
|
+
log_info("- For feature: {feat}, against nCount_RNA")
|
|
167
215
|
scat_p = FeatureScatter(
|
|
168
216
|
sobj,
|
|
169
217
|
feature1 = "nCount_RNA",
|
|
@@ -171,22 +219,30 @@ for (feat in setdiff(feats, "nCount_RNA")) {
|
|
|
171
219
|
group.by = ".QC"
|
|
172
220
|
) +
|
|
173
221
|
NoLegend() +
|
|
174
|
-
scale_color_manual(values = c("
|
|
222
|
+
scale_color_manual(values = c("#181818", pal_biopipen()(1)), breaks = c(TRUE, FALSE))
|
|
175
223
|
|
|
176
|
-
png
|
|
177
|
-
|
|
178
|
-
width = 800, height = 600, res = 100
|
|
179
|
-
)
|
|
224
|
+
scatfile = file.path(plotsdir, paste0(slugify(feat, tolower = FALSE), "-nCount_RNA.scatter.png"))
|
|
225
|
+
png(scatfile, width = 800, height = 600, res = 100)
|
|
180
226
|
print(scat_p)
|
|
181
227
|
dev.off()
|
|
228
|
+
|
|
229
|
+
add_report(
|
|
230
|
+
list(
|
|
231
|
+
src = scatfile,
|
|
232
|
+
name = paste0(feat, " vs nCount_RNA"),
|
|
233
|
+
descr = paste0("Scatter plot for ", feat, " against nCount_RNA")
|
|
234
|
+
),
|
|
235
|
+
h1 = "Scatter Plots",
|
|
236
|
+
ui = "table_of_images"
|
|
237
|
+
)
|
|
182
238
|
}
|
|
183
239
|
|
|
184
240
|
# Do the filtering
|
|
185
|
-
|
|
241
|
+
log_info("Filtering cells using QC criteria ...")
|
|
186
242
|
sobj = sobj %>% filter(.QC)
|
|
187
243
|
sobj$.QC = NULL
|
|
188
244
|
|
|
189
|
-
|
|
245
|
+
log_info("Filtering genes ...")
|
|
190
246
|
if (is.list(envs$gene_qc)) {
|
|
191
247
|
if ("min_cells" %in% names(envs$gene_qc)) {
|
|
192
248
|
genes = rownames(sobj)[Matrix::rowSums(sobj) >= envs$gene_qc$min_cells]
|
|
@@ -202,8 +258,26 @@ dim_df = rbind(
|
|
|
202
258
|
)
|
|
203
259
|
)
|
|
204
260
|
|
|
261
|
+
log_info("Saving dimension table ...")
|
|
205
262
|
write.table(dim_df, file = file.path(plotsdir, "dim.txt"),
|
|
206
263
|
row.names = FALSE, quote = FALSE, sep = "\t")
|
|
207
264
|
|
|
208
|
-
|
|
265
|
+
add_report(
|
|
266
|
+
list(
|
|
267
|
+
kind = "descr",
|
|
268
|
+
content = paste(
|
|
269
|
+
"The dimension table for the Seurat object. The table contains the number of cells and genes before and after QC."
|
|
270
|
+
)
|
|
271
|
+
),
|
|
272
|
+
list(
|
|
273
|
+
kind = "table",
|
|
274
|
+
data = list(path = file.path(plotsdir, "dim.txt"))
|
|
275
|
+
),
|
|
276
|
+
h1 = "Filters and QC"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
log_info("Saving filtered seurat object ...")
|
|
209
281
|
saveRDS(sobj, rdsfile)
|
|
282
|
+
|
|
283
|
+
save_report(joboutdir)
|
|
@@ -5,11 +5,14 @@ library(tibble)
|
|
|
5
5
|
library(enrichR)
|
|
6
6
|
library(rlang)
|
|
7
7
|
library(dplyr)
|
|
8
|
+
library(slugify)
|
|
9
|
+
library(ggprism)
|
|
8
10
|
|
|
9
11
|
setEnrichrSite("Enrichr")
|
|
10
12
|
|
|
11
13
|
srtfile <- {{in.srtobj | r}}
|
|
12
14
|
outdir <- {{out.outdir | r}}
|
|
15
|
+
joboutdir <- {{job.outdir | r}}
|
|
13
16
|
mutaters <- {{ envs.mutaters | r }}
|
|
14
17
|
ident <- {{ envs.ident | r }}
|
|
15
18
|
group.by <- {{ envs["group-by"] | r }} # nolint
|
|
@@ -22,16 +25,16 @@ cases <- {{ envs.cases | r: todot = "-" }} # nolint
|
|
|
22
25
|
|
|
23
26
|
set.seed(8525)
|
|
24
27
|
|
|
25
|
-
|
|
28
|
+
log_info("Loading Seurat object ...")
|
|
26
29
|
srtobj <- readRDS(srtfile)
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
log_info("Mutate meta data if needed ...")
|
|
29
32
|
if (!is.null(mutaters) && length(mutaters)) {
|
|
30
33
|
srtobj@meta.data <- srtobj@meta.data %>%
|
|
31
34
|
mutate(!!!lapply(mutaters, parse_expr))
|
|
32
35
|
}
|
|
33
36
|
|
|
34
|
-
|
|
37
|
+
log_info("Expanding cases ...")
|
|
35
38
|
if (is.null(cases) || length(cases) == 0) {
|
|
36
39
|
cases <- list(
|
|
37
40
|
DEFAULT = list(
|
|
@@ -61,11 +64,14 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
61
64
|
|
|
62
65
|
# Expand each and ident
|
|
63
66
|
newcases <- list()
|
|
67
|
+
sections <- c()
|
|
64
68
|
for (name in names(cases)) { # nolint
|
|
65
69
|
case <- cases[[name]]
|
|
66
70
|
if (is.null(case$each) && !is.null(case$ident)) {
|
|
71
|
+
sections <- c(sections, case$section)
|
|
67
72
|
newcases[[paste0(case$section, ":", name)]] <- case
|
|
68
73
|
} else if (is.null(case$each)) {
|
|
74
|
+
sections <- c(sections, name)
|
|
69
75
|
idents <- srtobj@meta.data %>%
|
|
70
76
|
pull(case$group.by) %>%
|
|
71
77
|
unique() %>%
|
|
@@ -93,15 +99,21 @@ for (name in names(cases)) { # nolint
|
|
|
93
99
|
na.omit()
|
|
94
100
|
for (ident in idents) {
|
|
95
101
|
kname <- if (name == "DEFAULT") "" else paste0("-", name)
|
|
102
|
+
sections <- c(sections, paste0(each, kname))
|
|
96
103
|
key <- paste0(each, kname, ":", ident)
|
|
97
104
|
if (case$prefix_each) {
|
|
98
|
-
key <- paste0(
|
|
105
|
+
key <- paste0(
|
|
106
|
+
ifelse(case$each == "seurat_clusters", "Cluster", case$each),
|
|
107
|
+
" - ",
|
|
108
|
+
key
|
|
109
|
+
)
|
|
99
110
|
}
|
|
100
111
|
newcases[[key]] <- case
|
|
101
112
|
newcases[[key]]$ident <- ident
|
|
102
113
|
newcases[[key]]$group.by <- by # nolint
|
|
103
114
|
}
|
|
104
115
|
} else {
|
|
116
|
+
sections <- c(sections, case$each)
|
|
105
117
|
key <- paste0(case$each, ":", each)
|
|
106
118
|
if (name != "DEFAULT") {
|
|
107
119
|
key <- paste0(key, " - ", name)
|
|
@@ -112,11 +124,33 @@ for (name in names(cases)) { # nolint
|
|
|
112
124
|
}
|
|
113
125
|
}
|
|
114
126
|
cases <- newcases
|
|
127
|
+
single_section <- length(unique(sections)) == 1
|
|
128
|
+
|
|
129
|
+
casename_info <- function(casename, create = FALSE) {
|
|
130
|
+
sec_case_names <- strsplit(casename, ":")[[1]]
|
|
131
|
+
cname <- paste(sec_case_names[-1], collapse = ":")
|
|
132
|
+
|
|
133
|
+
out <- list(
|
|
134
|
+
casename = casename,
|
|
135
|
+
section = sec_case_names[1],
|
|
136
|
+
case = cname,
|
|
137
|
+
section_slug = slugify(sec_case_names[1], tolower = FALSE),
|
|
138
|
+
case_slug = slugify(cname, tolower = FALSE)
|
|
139
|
+
)
|
|
140
|
+
out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
|
|
141
|
+
if (create) {
|
|
142
|
+
dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
|
|
143
|
+
}
|
|
144
|
+
out
|
|
145
|
+
}
|
|
115
146
|
|
|
116
147
|
do_enrich <- function(expr, odir) {
|
|
117
|
-
|
|
148
|
+
log_info(" Saving expressions ...")
|
|
149
|
+
expr <- expr %>% as.data.frame()
|
|
150
|
+
colnames(expr) <- c("Expression")
|
|
151
|
+
expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
|
|
118
152
|
write.table(
|
|
119
|
-
expr
|
|
153
|
+
expr,
|
|
120
154
|
file.path(odir, "expr.txt"),
|
|
121
155
|
sep = "\t",
|
|
122
156
|
row.names = TRUE,
|
|
@@ -124,7 +158,7 @@ do_enrich <- function(expr, odir) {
|
|
|
124
158
|
quote = FALSE
|
|
125
159
|
)
|
|
126
160
|
write.table(
|
|
127
|
-
expr %>%
|
|
161
|
+
expr %>% head(n),
|
|
128
162
|
file.path(odir, "exprn.txt"),
|
|
129
163
|
sep = "\t",
|
|
130
164
|
row.names = TRUE,
|
|
@@ -132,8 +166,8 @@ do_enrich <- function(expr, odir) {
|
|
|
132
166
|
quote = FALSE
|
|
133
167
|
)
|
|
134
168
|
|
|
135
|
-
|
|
136
|
-
enriched <- enrichr(
|
|
169
|
+
log_info(" Running enrichment ...")
|
|
170
|
+
enriched <- enrichr(head(expr$Gene, n), dbs) # nolint
|
|
137
171
|
for (db in dbs) {
|
|
138
172
|
write.table(
|
|
139
173
|
enriched[[db]],
|
|
@@ -147,29 +181,77 @@ do_enrich <- function(expr, odir) {
|
|
|
147
181
|
file.path(odir, paste0("Enrichr-", db, ".png")),
|
|
148
182
|
res = 100, height = 1000, width = 1000
|
|
149
183
|
)
|
|
150
|
-
print(
|
|
184
|
+
print(
|
|
185
|
+
plotEnrich(enriched[[db]], showTerms = 20, title = db) +
|
|
186
|
+
theme_prism()
|
|
187
|
+
)
|
|
151
188
|
dev.off()
|
|
152
189
|
}
|
|
153
190
|
}
|
|
154
191
|
|
|
155
192
|
do_case <- function(casename) {
|
|
156
|
-
|
|
193
|
+
log_info("- Running for case: {casename} ...")
|
|
157
194
|
case <- cases[[casename]]
|
|
158
|
-
|
|
159
|
-
section <- parts[1]
|
|
160
|
-
casename <- paste(parts[-1], collapse = ":")
|
|
195
|
+
info <- casename_info(casename, create = TRUE)
|
|
161
196
|
|
|
162
|
-
|
|
197
|
+
log_info(" Calculating average expression ...")
|
|
163
198
|
avgexpr <- AverageExpression(
|
|
164
199
|
srtobj,
|
|
165
200
|
group.by = case$group.by
|
|
166
201
|
)$RNA[, case$ident, drop = FALSE]
|
|
167
202
|
avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
|
|
168
203
|
|
|
169
|
-
|
|
170
|
-
|
|
204
|
+
do_enrich(avgexpr, info$casedir)
|
|
205
|
+
|
|
206
|
+
add_case_report(info)
|
|
207
|
+
}
|
|
171
208
|
|
|
172
|
-
|
|
209
|
+
add_case_report <- function(info) {
|
|
210
|
+
log_info(" Adding case report ...")
|
|
211
|
+
h1 = ifelse(
|
|
212
|
+
info$section == "DEFAULT",
|
|
213
|
+
info$case,
|
|
214
|
+
ifelse(
|
|
215
|
+
single_section,
|
|
216
|
+
paste0(
|
|
217
|
+
ifelse(info$section == "seurat_clusters", "Cluster", info$section),
|
|
218
|
+
" - ",
|
|
219
|
+
info$case
|
|
220
|
+
),
|
|
221
|
+
info$section
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
h2 = ifelse(
|
|
225
|
+
info$section == "DEFAULT",
|
|
226
|
+
"#",
|
|
227
|
+
ifelse(single_section, "#", info$case)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
add_report(
|
|
231
|
+
list(
|
|
232
|
+
kind = "descr",
|
|
233
|
+
content = paste0("Top ", n, " expressing genes")
|
|
234
|
+
),
|
|
235
|
+
list(
|
|
236
|
+
kind = "table",
|
|
237
|
+
src = file.path(info$casedir, "exprn.txt")
|
|
238
|
+
),
|
|
239
|
+
h1 = h1,
|
|
240
|
+
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
241
|
+
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
add_report(
|
|
245
|
+
list(
|
|
246
|
+
kind = "descr",
|
|
247
|
+
content = paste0("Enrichment analysis for the top ", n, " expressing genes")
|
|
248
|
+
),
|
|
249
|
+
list(kind = "enrichr", dir = info$casedir),
|
|
250
|
+
h1 = h1,
|
|
251
|
+
h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
|
|
252
|
+
h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
|
|
253
|
+
)
|
|
173
254
|
}
|
|
174
255
|
|
|
175
256
|
sapply(sort(names(cases)), do_case)
|
|
257
|
+
save_report(joboutdir)
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
1
2
|
source("{{biopipen_dir}}/utils/gsea.R")
|
|
2
3
|
|
|
3
4
|
library(parallel)
|
|
4
5
|
library(Seurat)
|
|
6
|
+
library(slugify)
|
|
5
7
|
|
|
6
8
|
sobjfile <- {{ in.sobjfile | r }}
|
|
7
9
|
outdir <- {{ out.outdir | r }}
|
|
10
|
+
joboutdir <- {{ job.outdir | r }}
|
|
8
11
|
gmtfile <- {{ envs.gmtfile | r }}
|
|
9
12
|
ncores <- {{ envs.ncores | r }}
|
|
10
13
|
fgsea <- {{ envs.fgsea | r }}
|
|
@@ -37,10 +40,10 @@ pathways <- gmt_pathways(gmtfile)
|
|
|
37
40
|
metabolics <- unique(as.vector(unname(unlist(pathways))))
|
|
38
41
|
sobj <- readRDS(sobjfile)
|
|
39
42
|
|
|
40
|
-
do_one_group <- function(obj, group, outputdir) {
|
|
41
|
-
|
|
43
|
+
do_one_group <- function(obj, group, outputdir, h1) {
|
|
44
|
+
log_info(paste("- Processing group", grouping, ":", group))
|
|
42
45
|
groupname = paste0(grouping_prefix, group)
|
|
43
|
-
odir = file.path(outputdir, groupname)
|
|
46
|
+
odir = file.path(outputdir, slugify(groupname, tolower = FALSE))
|
|
44
47
|
dir.create(odir, showWarnings = FALSE)
|
|
45
48
|
|
|
46
49
|
classes = as.character(obj@meta.data[[grouping]])
|
|
@@ -65,19 +68,24 @@ do_one_group <- function(obj, group, outputdir) {
|
|
|
65
68
|
}
|
|
66
69
|
}, error=function(e) {
|
|
67
70
|
unlink(odir, recursive = T, force = T)
|
|
68
|
-
|
|
69
|
-
|
|
71
|
+
log_warn(paste("Unable to run for", group))
|
|
72
|
+
log_warn(e)
|
|
70
73
|
})
|
|
71
74
|
|
|
75
|
+
add_report(
|
|
76
|
+
list(kind = "fgsea", dir = odir),
|
|
77
|
+
h1 = ifelse(is.null(h1), groupname, h1),
|
|
78
|
+
h2 = ifelse(is.null(h1), "#", groupname)
|
|
79
|
+
)
|
|
72
80
|
}
|
|
73
81
|
|
|
74
82
|
do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
75
|
-
|
|
83
|
+
log_info(paste("Processing subset", subset_col, ":", s))
|
|
76
84
|
if (is.null(s)) {
|
|
77
85
|
outputdir <- file.path(outdir, "ALL")
|
|
78
86
|
subset_obj <- sobj
|
|
79
87
|
} else {
|
|
80
|
-
outputdir <- file.path(outdir, paste0(subset_prefix, s))
|
|
88
|
+
outputdir <- file.path(outdir, slugify(paste0(subset_prefix, s), tolower = FALSE))
|
|
81
89
|
subset_code <- paste0("subset(sobj, subset = ", subset_col, "=='", s, "')")
|
|
82
90
|
subset_obj <- eval(parse(text = subset_code))
|
|
83
91
|
}
|
|
@@ -85,9 +93,13 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
85
93
|
|
|
86
94
|
subset_obj <- subset(subset_obj, features = intersect(rownames(subset_obj), metabolics))
|
|
87
95
|
|
|
96
|
+
h1 <- NULL
|
|
97
|
+
if (!is.null(s)) {
|
|
98
|
+
h1 <- paste0(subset_prefix, s)
|
|
99
|
+
}
|
|
88
100
|
groups = subset_obj@meta.data[[grouping]]
|
|
89
101
|
x = mclapply(as.character(unique(groups)), function(group) {
|
|
90
|
-
do_one_group(subset_obj, group, outputdir)
|
|
102
|
+
do_one_group(subset_obj, group, outputdir, h1)
|
|
91
103
|
}, mc.cores = ncores)
|
|
92
104
|
if (any(unlist(lapply(x, class)) == "try-error")) {
|
|
93
105
|
stop("mclapply error")
|
|
@@ -110,3 +122,4 @@ if (is.null(subsetting_cols)) {
|
|
|
110
122
|
}
|
|
111
123
|
}
|
|
112
124
|
|
|
125
|
+
save_report(joboutdir)
|
|
@@ -4,9 +4,11 @@ source("{{biopipen_dir}}/utils/gsea.R")
|
|
|
4
4
|
library(parallel)
|
|
5
5
|
library(scater)
|
|
6
6
|
library(Seurat)
|
|
7
|
+
library(slugify)
|
|
7
8
|
|
|
8
9
|
sobjfile <- {{ in.sobjfile | r }}
|
|
9
10
|
outdir <- {{ out.outdir | r }}
|
|
11
|
+
joboutdir <- {{ job.outdir | r }}
|
|
10
12
|
gmtfile <- {{ envs.gmtfile | r }}
|
|
11
13
|
ncores <- {{ envs.ncores | r }}
|
|
12
14
|
fgsea <- {{ envs.fgsea | r }}
|
|
@@ -47,7 +49,8 @@ do_one_comparison <- function(
|
|
|
47
49
|
control,
|
|
48
50
|
groupdir,
|
|
49
51
|
subset_col,
|
|
50
|
-
subset_prefix
|
|
52
|
+
subset_prefix,
|
|
53
|
+
groupname
|
|
51
54
|
) {
|
|
52
55
|
print(paste(" Design:", compname, "(", case, ",", control, ")"))
|
|
53
56
|
case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
|
|
@@ -68,6 +71,11 @@ do_one_comparison <- function(
|
|
|
68
71
|
})
|
|
69
72
|
if (is.null(control_obj)) {
|
|
70
73
|
print(" Skip (not enough cells in control)")
|
|
74
|
+
add_report(
|
|
75
|
+
list(kind = "error", content = "Not enough cells in control"),
|
|
76
|
+
h1 = groupname,
|
|
77
|
+
h2 = compname
|
|
78
|
+
)
|
|
71
79
|
return (NULL)
|
|
72
80
|
}
|
|
73
81
|
exprs_case = GetAssayData(case_obj)
|
|
@@ -77,6 +85,11 @@ do_one_comparison <- function(
|
|
|
77
85
|
dir.create(odir, showWarnings = FALSE)
|
|
78
86
|
if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
|
|
79
87
|
print(" Skip (not enough cells)")
|
|
88
|
+
add_report(
|
|
89
|
+
list(kind = "error", content = "Not enough cells"),
|
|
90
|
+
h1 = groupname,
|
|
91
|
+
h2 = compname
|
|
92
|
+
)
|
|
80
93
|
return (NULL)
|
|
81
94
|
}
|
|
82
95
|
if (fgsea) {
|
|
@@ -95,6 +108,12 @@ do_one_comparison <- function(
|
|
|
95
108
|
outdir = odir,
|
|
96
109
|
envs = list(nproc = 1)
|
|
97
110
|
)
|
|
111
|
+
|
|
112
|
+
add_report(
|
|
113
|
+
list(kind = "fgsea", dir = odir),
|
|
114
|
+
h1 = groupname,
|
|
115
|
+
h2 = compname
|
|
116
|
+
)
|
|
98
117
|
} else {
|
|
99
118
|
runGSEA(
|
|
100
119
|
cbind(exprs_case, exprs_control),
|
|
@@ -114,7 +133,7 @@ do_one_group <- function(group) {
|
|
|
114
133
|
)
|
|
115
134
|
obj = eval(parse(text = group_code))
|
|
116
135
|
groupname = paste0(grouping_prefix, group)
|
|
117
|
-
groupdir = file.path(outdir, groupname)
|
|
136
|
+
groupdir = file.path(outdir, slugify(groupname, tolower = FALSE))
|
|
118
137
|
dir.create(groupdir, showWarnings = FALSE)
|
|
119
138
|
|
|
120
139
|
for (i in seq_along(subsetting_comparison)) {
|
|
@@ -132,7 +151,8 @@ do_one_group <- function(group) {
|
|
|
132
151
|
sci[[compname]][2],
|
|
133
152
|
groupdir,
|
|
134
153
|
subsetting_cols[i],
|
|
135
|
-
subsetting_prefix[i]
|
|
154
|
+
subsetting_prefix[i],
|
|
155
|
+
groupname
|
|
136
156
|
)
|
|
137
157
|
}
|
|
138
158
|
)
|
|
@@ -148,3 +168,5 @@ if (ncores == 1) {
|
|
|
148
168
|
stop("mclapply error")
|
|
149
169
|
}
|
|
150
170
|
}
|
|
171
|
+
|
|
172
|
+
save_report(joboutdir)
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
1
2
|
source("{{biopipen_dir}}/utils/gsea.R")
|
|
2
3
|
source("{{biopipen_dir}}/utils/plot.R")
|
|
3
4
|
|
|
@@ -7,9 +8,11 @@ library(ggprism)
|
|
|
7
8
|
library(Matrix)
|
|
8
9
|
library(sparseMatrixStats)
|
|
9
10
|
library(Seurat)
|
|
11
|
+
library(slugify)
|
|
10
12
|
|
|
11
13
|
sobjfile <- {{ in.sobjfile | r }}
|
|
12
14
|
outdir <- {{ out.outdir | r }}
|
|
15
|
+
joboutdir <- {{ job.outdir | r }}
|
|
13
16
|
gmtfile <- {{ envs.gmtfile | r }}
|
|
14
17
|
select_pcs <- {{ envs.select_pcs | r }}
|
|
15
18
|
ncores <- {{ envs.ncores | r }}
|
|
@@ -43,12 +46,12 @@ metabolics <- unique(as.vector(unname(unlist(pathways))))
|
|
|
43
46
|
sobj <- readRDS(sobjfile)
|
|
44
47
|
|
|
45
48
|
do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
46
|
-
|
|
49
|
+
log_info(paste0(" Handling subset value: ", s, " ..."))
|
|
47
50
|
if (is.null(s)) {
|
|
48
51
|
subset_dir = file.path(outdir, "ALL")
|
|
49
52
|
subset_obj = sobj
|
|
50
53
|
} else {
|
|
51
|
-
subset_dir = file.path(outdir, paste0(subset_prefix, s))
|
|
54
|
+
subset_dir = file.path(outdir, slugify(paste0(subset_prefix, s), tolower = FALSE))
|
|
52
55
|
subset_code = paste0("subset(sobj, subset = ", subset_col, " == '", s, "')")
|
|
53
56
|
subset_obj = eval(parse(text = subset_code))
|
|
54
57
|
}
|
|
@@ -214,10 +217,16 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
214
217
|
)
|
|
215
218
|
|
|
216
219
|
ggsave(file.path(subset_dir, "PC_variance_plot.pdf"), p, device = "pdf", useDingbats = FALSE)
|
|
220
|
+
|
|
221
|
+
add_report(
|
|
222
|
+
list(kind = "descr", content = "Metabolic pathways enriched in genes with highest contribution to the metabolic heterogeneities"),
|
|
223
|
+
list(kind = "image", src = bubblefile),
|
|
224
|
+
h1 = ifelse(is.null(s), "Metabolic pathway heterogeneity", paste0(subset_prefix, s))
|
|
225
|
+
)
|
|
217
226
|
}
|
|
218
227
|
|
|
219
228
|
do_one_subset_col <- function(subset_col, subset_prefix) {
|
|
220
|
-
|
|
229
|
+
log_info(paste0("- Handling subset column: ", subset_col, " ..."))
|
|
221
230
|
if (is.null(subset_col)) {
|
|
222
231
|
do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
|
|
223
232
|
}
|
|
@@ -240,3 +249,5 @@ if (is.null(subsetting_cols)) {
|
|
|
240
249
|
do_one_subset_col(subsetting_cols[i], subsetting_prefix[i])
|
|
241
250
|
}
|
|
242
251
|
}
|
|
252
|
+
|
|
253
|
+
save_report(joboutdir)
|
|
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
|
|
|
11
11
|
sobjfile = {{in.sobjfile | r}}
|
|
12
12
|
outfile = {{out.outfile | r}}
|
|
13
13
|
metacols = {{envs.metacols | r}}
|
|
14
|
+
prefix = {{envs.prefix | r}}
|
|
14
15
|
|
|
15
16
|
immdata = readRDS(immfile)
|
|
16
17
|
sobj = readRDS(sobjfile)
|
|
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
|
|
|
31
32
|
|
|
32
33
|
cldata %>%
|
|
33
34
|
separate_rows(Barcode, sep=";") %>%
|
|
34
|
-
mutate(Barcode = glue("{
|
|
35
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
|
|
35
36
|
|
|
36
37
|
}))
|
|
37
38
|
|