biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -168
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +127 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +18 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
- biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
- biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
- biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
- biopipen/scripts/tcr/Immunarch.R +7 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +66 -41
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -12,16 +12,19 @@ library(ggplot2)
|
|
|
12
12
|
library(ggprism)
|
|
13
13
|
library(parallel)
|
|
14
14
|
library(tidyseurat)
|
|
15
|
+
library(slugify)
|
|
15
16
|
|
|
16
17
|
setEnrichrSite("Enrichr")
|
|
17
18
|
|
|
18
19
|
srtfile <- {{ in.srtobj | quote }}
|
|
19
20
|
outdir <- {{ out.outdir | quote }}
|
|
21
|
+
joboutdir <- {{ job.outdir | quote }}
|
|
20
22
|
ncores <- {{ envs.ncores | int }}
|
|
21
23
|
mutaters <- {{ envs.mutaters | r }}
|
|
22
24
|
idents <- {{ envs.idents | r }}
|
|
23
25
|
group_by <- {{ envs["group-by"] | r }}
|
|
24
26
|
each <- {{ envs.each | r }}
|
|
27
|
+
subset <- {{ envs.subset | r }}
|
|
25
28
|
prefix_each <- {{ envs.prefix_each | r }}
|
|
26
29
|
p_adjust <- {{ envs.p_adjust | r }}
|
|
27
30
|
section <- {{ envs.section | r }}
|
|
@@ -32,15 +35,15 @@ cases <- {{ envs.cases | r: todot = "-" }}
|
|
|
32
35
|
|
|
33
36
|
set.seed(8525)
|
|
34
37
|
|
|
35
|
-
|
|
38
|
+
log_info("- Reading Seurat object ...")
|
|
36
39
|
srtobj <- readRDS(srtfile)
|
|
37
40
|
|
|
38
|
-
|
|
41
|
+
log_info("- Mutate meta data if needed ...")
|
|
39
42
|
if (!is.null(mutaters) && length(mutaters)) {
|
|
40
43
|
srtobj@meta.data <- srtobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
|
|
41
44
|
}
|
|
42
45
|
|
|
43
|
-
|
|
46
|
+
log_info("- Expanding cases ...")
|
|
44
47
|
if (is.null(cases) || length(cases) == 0) {
|
|
45
48
|
cases <- list(
|
|
46
49
|
DEFAULT = list(
|
|
@@ -49,6 +52,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
49
52
|
each = each,
|
|
50
53
|
prefix_each = prefix_each,
|
|
51
54
|
p_adjust = p_adjust,
|
|
55
|
+
subset = subset,
|
|
52
56
|
section = section,
|
|
53
57
|
dbs = dbs,
|
|
54
58
|
sigmarkers = sigmarkers,
|
|
@@ -65,6 +69,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
65
69
|
prefix_each = prefix_each,
|
|
66
70
|
p_adjust = p_adjust,
|
|
67
71
|
section = section,
|
|
72
|
+
subset = subset,
|
|
68
73
|
dbs = dbs,
|
|
69
74
|
sigmarkers = sigmarkers,
|
|
70
75
|
method = method
|
|
@@ -74,12 +79,19 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
74
79
|
}
|
|
75
80
|
|
|
76
81
|
newcases <- list()
|
|
82
|
+
sections <- c()
|
|
77
83
|
for (name in names(cases)) {
|
|
78
84
|
case <- cases[[name]]
|
|
79
85
|
if (is.null(case$each)) {
|
|
86
|
+
sections <- c(sections, case$section)
|
|
80
87
|
newcases[[paste0(case$section, ":", name)]] <- case
|
|
81
88
|
} else {
|
|
82
|
-
|
|
89
|
+
if (is.null(case$subset)) {
|
|
90
|
+
eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
|
|
91
|
+
} else {
|
|
92
|
+
eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>% pull(case$each) %>% unique() %>% na.omit()
|
|
93
|
+
}
|
|
94
|
+
sections <- c(sections, case$each)
|
|
83
95
|
for (each in eachs) {
|
|
84
96
|
by = make.names(paste0(".", name, "_", case$each, "_", each))
|
|
85
97
|
idents <- case$idents
|
|
@@ -111,98 +123,98 @@ for (name in names(cases)) {
|
|
|
111
123
|
}
|
|
112
124
|
}
|
|
113
125
|
cases <- newcases
|
|
126
|
+
single_section <- length(unique(sections)) == 1
|
|
127
|
+
|
|
128
|
+
casename_info <- function(casename, create = FALSE) {
|
|
129
|
+
sec_case_names <- strsplit(casename, ":")[[1]]
|
|
130
|
+
cname <- paste(sec_case_names[-1], collapse = ":")
|
|
114
131
|
|
|
132
|
+
out <- list(
|
|
133
|
+
casename = casename,
|
|
134
|
+
section = sec_case_names[1],
|
|
135
|
+
case = cname,
|
|
136
|
+
section_slug = slugify(sec_case_names[1], tolower = FALSE),
|
|
137
|
+
case_slug = slugify(cname, tolower = FALSE)
|
|
138
|
+
)
|
|
139
|
+
out$casedir <- file.path(outdir, out$section_slug, out$case_slug)
|
|
140
|
+
if (create) {
|
|
141
|
+
dir.create(out$casedir, showWarnings = FALSE, recursive = TRUE)
|
|
142
|
+
}
|
|
143
|
+
out
|
|
144
|
+
}
|
|
115
145
|
|
|
116
146
|
# Do enrichment analysis for a case using Enrichr
|
|
117
147
|
# Args:
|
|
118
148
|
# case: case name
|
|
119
149
|
# markers: markers dataframe
|
|
120
150
|
# sig: The expression to filter significant markers
|
|
121
|
-
do_enrich <- function(
|
|
122
|
-
|
|
123
|
-
parts <- strsplit(case, ":")[[1]]
|
|
124
|
-
sec <- parts[1]
|
|
125
|
-
case <- paste0(parts[-1], collapse = ":")
|
|
126
|
-
casedir <- file.path(outdir, sec, case)
|
|
127
|
-
dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
|
|
151
|
+
do_enrich <- function(info, markers, sig) {
|
|
152
|
+
log_info(" Running enrichment for case: {info$casename}")
|
|
128
153
|
if (nrow(markers) == 0) {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return()
|
|
154
|
+
msg <- paste0("No markers found for case: ", info$casename)
|
|
155
|
+
log_warn(" {msg}")
|
|
156
|
+
return(msg)
|
|
132
157
|
}
|
|
133
158
|
markers_sig <- markers %>% filter(!!parse_expr(sig))
|
|
134
159
|
if (nrow(markers_sig) == 0) {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return()
|
|
160
|
+
msg <- paste0("No significant markers found for case: ", info$casename)
|
|
161
|
+
log_warn(" {msg}")
|
|
162
|
+
return(msg)
|
|
138
163
|
}
|
|
139
164
|
write.table(
|
|
140
165
|
markers_sig,
|
|
141
|
-
file.path(casedir, "markers.txt"),
|
|
166
|
+
file.path(info$casedir, "markers.txt"),
|
|
142
167
|
sep = "\t",
|
|
143
168
|
row.names = FALSE,
|
|
144
169
|
col.names = TRUE,
|
|
145
170
|
quote = FALSE
|
|
146
171
|
)
|
|
172
|
+
|
|
147
173
|
if (nrow(markers_sig) < 5) {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
}
|
|
173
|
-
} else {
|
|
174
|
-
enriched <- enrichr(markers_sig$gene, dbs)
|
|
175
|
-
for (db in dbs) {
|
|
176
|
-
write.table(
|
|
177
|
-
enriched[[db]],
|
|
178
|
-
file.path(casedir, paste0("Enrichr-", db, ".txt")),
|
|
179
|
-
sep = "\t",
|
|
180
|
-
row.names = FALSE,
|
|
181
|
-
col.names = TRUE,
|
|
182
|
-
quote = FALSE
|
|
183
|
-
)
|
|
184
|
-
png(
|
|
185
|
-
file.path(casedir, paste0("Enrichr-", db, ".png")),
|
|
186
|
-
res = 100, height = 1000, width = 1000
|
|
187
|
-
)
|
|
188
|
-
print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
|
|
189
|
-
dev.off()
|
|
190
|
-
}
|
|
174
|
+
msg <- paste0("Too few significant markers found for case: ", info$casename)
|
|
175
|
+
log_warn(msg)
|
|
176
|
+
return(msg)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
enriched <- enrichr(markers_sig$gene, dbs)
|
|
180
|
+
for (db in dbs) {
|
|
181
|
+
write.table(
|
|
182
|
+
enriched[[db]],
|
|
183
|
+
file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
|
|
184
|
+
sep = "\t",
|
|
185
|
+
row.names = FALSE,
|
|
186
|
+
col.names = TRUE,
|
|
187
|
+
quote = FALSE
|
|
188
|
+
)
|
|
189
|
+
png(
|
|
190
|
+
file.path(info$casedir, paste0("Enrichr-", db, ".png")),
|
|
191
|
+
res = 100, height = 600, width = 800
|
|
192
|
+
)
|
|
193
|
+
print(
|
|
194
|
+
plotEnrich(enriched[[db]], showTerms = 20, title = db) +
|
|
195
|
+
theme_prism()
|
|
196
|
+
)
|
|
197
|
+
dev.off()
|
|
191
198
|
}
|
|
192
199
|
}
|
|
193
200
|
|
|
194
201
|
|
|
195
202
|
do_case <- function(casename) {
|
|
196
|
-
|
|
203
|
+
log_info("- Dealing with case: {casename} ...")
|
|
204
|
+
info <- casename_info(casename, create = TRUE)
|
|
197
205
|
case <- cases[[casename]]
|
|
206
|
+
|
|
198
207
|
sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
|
|
208
|
+
if (!is.null(case$subset)) {
|
|
209
|
+
sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)), !!parse_expr(case$subset))
|
|
210
|
+
}
|
|
199
211
|
df <- GetAssayData(sobj, slot = "data", assay = "RNA")
|
|
200
212
|
genes <- rownames(df)
|
|
201
213
|
# rows: cells, cols: genes
|
|
202
214
|
df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
|
|
203
215
|
colnames(df)[ncol(df)] <- "GROUP"
|
|
204
216
|
|
|
205
|
-
|
|
217
|
+
log_info(" Running tests for case...")
|
|
206
218
|
test_result <- mclapply(genes, function(gene) {
|
|
207
219
|
fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
|
|
208
220
|
res <- tryCatch({
|
|
@@ -230,28 +242,97 @@ do_case <- function(casename) {
|
|
|
230
242
|
markers <- do_call(rbind, test_result)
|
|
231
243
|
markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
|
|
232
244
|
markers <- markers %>% arrange(p_adjust)
|
|
233
|
-
do_enrich(casename, markers, case$sigmarkers)
|
|
234
245
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
|
|
246
|
+
msg <- do_enrich(info, markers, case$sigmarkers)
|
|
247
|
+
if (is.null(msg)) {
|
|
248
|
+
log_info(" Plotting top 10 genes ...")
|
|
249
|
+
markers <- markers %>% head(10)
|
|
250
|
+
plotdir <- file.path(info$casedir, "expr_plots")
|
|
251
|
+
dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
|
|
242
252
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
253
|
+
# Plot the top 10 genes in each group with violin plots
|
|
254
|
+
geneplots = list()
|
|
255
|
+
for (gene in markers$gene) {
|
|
256
|
+
outfile = file.path(plotdir, paste0(slugify(gene, tolower = FALSE), ".png"))
|
|
257
|
+
p = ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
|
|
258
|
+
geom_violin(alpha = .8) +
|
|
259
|
+
geom_boxplot(width=0.1, fill="white") +
|
|
260
|
+
theme_prism() +
|
|
261
|
+
ylab(paste0("Expression of ", gene))
|
|
262
|
+
png(outfile, res = 100, height = 600, width = 800)
|
|
263
|
+
print(p)
|
|
264
|
+
dev.off()
|
|
265
|
+
|
|
266
|
+
geneplots[[length(geneplots) + 1]] <- list(
|
|
267
|
+
kind = "table_image",
|
|
268
|
+
src = outfile,
|
|
269
|
+
name = gene
|
|
270
|
+
)
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
add_report(
|
|
274
|
+
list(
|
|
275
|
+
kind = "descr",
|
|
276
|
+
content = paste0(
|
|
277
|
+
"Top 100 genes selected by ",
|
|
278
|
+
"<code>", case$method, "</code> across ",
|
|
279
|
+
"<code>", case$group_by, "</code> and filtered by ",
|
|
280
|
+
"<code>", html_escape(case$sigmarkers), "</code>"
|
|
281
|
+
)
|
|
282
|
+
),
|
|
283
|
+
h1 = ifelse(
|
|
284
|
+
info$section == "DEFAULT",
|
|
285
|
+
info$case,
|
|
286
|
+
ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
|
|
287
|
+
),
|
|
288
|
+
h2 = ifelse(single_section, "Meta-Markers", info$case),
|
|
289
|
+
h3 = ifelse(single_section, "#", "Meta-Markers")
|
|
290
|
+
)
|
|
291
|
+
add_report(
|
|
292
|
+
list(
|
|
293
|
+
name = "Meta-Markers",
|
|
294
|
+
contents = list(list(
|
|
295
|
+
kind = "table",
|
|
296
|
+
src = file.path(info$casedir, "markers.txt"),
|
|
297
|
+
data = list(nrows = 100)
|
|
298
|
+
))
|
|
299
|
+
),
|
|
300
|
+
list(
|
|
301
|
+
name = "Volin Plots (Top 10)",
|
|
302
|
+
ui = "table_of_images:4",
|
|
303
|
+
contents = geneplots
|
|
304
|
+
),
|
|
305
|
+
h1 = ifelse(
|
|
306
|
+
info$section == "DEFAULT",
|
|
307
|
+
info$case,
|
|
308
|
+
ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
|
|
309
|
+
),
|
|
310
|
+
h2 = ifelse(single_section, "Meta-Markers", info$case),
|
|
311
|
+
h3 = ifelse(single_section, "#", "Meta-Markers"),
|
|
312
|
+
ui = "tabs"
|
|
313
|
+
)
|
|
314
|
+
add_report(
|
|
315
|
+
list(kind = "enrichr", dir = info$casedir),
|
|
316
|
+
h1 = ifelse(
|
|
317
|
+
info$section == "DEFAULT",
|
|
318
|
+
info$case,
|
|
319
|
+
ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
|
|
320
|
+
),
|
|
321
|
+
h2 = ifelse(single_section, "Enrichment Analysis", info$case),
|
|
322
|
+
h3 = ifelse(single_section, "#", "Enrichment Analysis")
|
|
323
|
+
)
|
|
324
|
+
} else {
|
|
325
|
+
add_report(
|
|
326
|
+
list(kind = "error", content = msg),
|
|
327
|
+
h1 = ifelse(
|
|
328
|
+
info$section == "DEFAULT",
|
|
329
|
+
info$case,
|
|
330
|
+
ifelse(single_section, paste0(info$section, " - ", info$case), info$section)
|
|
331
|
+
),
|
|
332
|
+
h2 = ifelse(single_section, "#", info$case)
|
|
333
|
+
)
|
|
254
334
|
}
|
|
255
335
|
}
|
|
256
336
|
|
|
257
337
|
sapply(sort(names(cases)), do_case)
|
|
338
|
+
save_report(joboutdir)
|