biopipen 0.23.8__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/proc.py +7 -0
- biopipen/ns/cellranger.py +2 -2
- biopipen/ns/scrna.py +13 -20
- biopipen/ns/tcr.py +8 -6
- biopipen/scripts/scrna/SeuratClustering.R +102 -85
- biopipen/scripts/scrna/SeuratPreparing.R +13 -6
- biopipen/scripts/scrna/SeuratSubClustering.R +81 -97
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/tcr/Immunarch-basic.R +2 -3
- biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
- biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
- biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
- biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
- biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
- biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
- biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
- biopipen/scripts/tcr/Immunarch.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
- biopipen/scripts/tcr/TCRClustering.R +6 -2
- biopipen/scripts/tcr/TESSA.R +3 -1
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/utils/caching.R +44 -0
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/METADATA +8 -7
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/RECORD +29 -27
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/WHEEL +0 -0
- {biopipen-0.23.8.dist-info → biopipen-0.24.0.dist-info}/entry_points.txt +0 -0
|
@@ -54,7 +54,7 @@ do_one_comparison <- function(
|
|
|
54
54
|
subset_prefix,
|
|
55
55
|
groupname
|
|
56
56
|
) {
|
|
57
|
-
|
|
57
|
+
log_info(paste(" Design: {compname} ({case}, {control})"))
|
|
58
58
|
case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
|
|
59
59
|
case_obj = tryCatch({
|
|
60
60
|
eval(parse(text = case_code))
|
|
@@ -62,7 +62,7 @@ do_one_comparison <- function(
|
|
|
62
62
|
NULL
|
|
63
63
|
})
|
|
64
64
|
if (is.null(case_obj)) {
|
|
65
|
-
|
|
65
|
+
log_warn(" Skip (not enough cells in case)")
|
|
66
66
|
return (NULL)
|
|
67
67
|
}
|
|
68
68
|
control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
|
|
@@ -72,7 +72,7 @@ do_one_comparison <- function(
|
|
|
72
72
|
NULL
|
|
73
73
|
})
|
|
74
74
|
if (is.null(control_obj)) {
|
|
75
|
-
|
|
75
|
+
log_warn(" Skip (not enough cells in control)")
|
|
76
76
|
add_report(
|
|
77
77
|
list(kind = "error", content = "Not enough cells in control"),
|
|
78
78
|
h1 = groupname,
|
|
@@ -86,7 +86,7 @@ do_one_comparison <- function(
|
|
|
86
86
|
odir = file.path(groupdir, paste0(subset_prefix, compname))
|
|
87
87
|
dir.create(odir, showWarnings = FALSE)
|
|
88
88
|
if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
|
|
89
|
-
|
|
89
|
+
log_warn(" Skip (not enough cells)")
|
|
90
90
|
add_report(
|
|
91
91
|
list(kind = "error", content = "Not enough cells"),
|
|
92
92
|
h1 = groupname,
|
|
@@ -131,7 +131,7 @@ do_one_comparison <- function(
|
|
|
131
131
|
}
|
|
132
132
|
|
|
133
133
|
do_one_group <- function(group) {
|
|
134
|
-
|
|
134
|
+
log_info("- Group: {group} ...")
|
|
135
135
|
|
|
136
136
|
genes = intersect(metabolics, rownames(sobj))
|
|
137
137
|
group_code = paste0(
|
|
@@ -71,7 +71,7 @@ num_of_pathways <- function(gmtfile, overlapgenes) {
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
74
|
-
|
|
74
|
+
log_info(" Processing subset: {s} ...")
|
|
75
75
|
if (is.null(s)) {
|
|
76
76
|
subset_dir <- file.path(outdir, "ALL")
|
|
77
77
|
dir.create(subset_dir, showWarnings = FALSE)
|
|
@@ -118,7 +118,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
118
118
|
|
|
119
119
|
for (pi in seq_along(pathway_names)) {
|
|
120
120
|
p <- pathway_names[pi]
|
|
121
|
-
|
|
121
|
+
log_info(" * Pathway ({pi}): {p} ...")
|
|
122
122
|
genes <- pathways[[p]]
|
|
123
123
|
genes_comm <- intersect(genes, rownames(subset_obj))
|
|
124
124
|
genes_expressed <- names(rowSums(subset_obj)[rowSums(subset_obj) > 0])
|
|
@@ -312,7 +312,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
312
312
|
}
|
|
313
313
|
|
|
314
314
|
do_one_subset_col <- function(subset_col, subset_prefix) {
|
|
315
|
-
|
|
315
|
+
log_info("- Handling subset column: {subset_col} ...")
|
|
316
316
|
if (is.null(subset_col)) {
|
|
317
317
|
do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
|
|
318
318
|
} else {
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Basic analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
volumes = {{envs.volumes | r}}
|
|
10
9
|
lens = {{envs.lens | r}}
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Clonality analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
top_clones = {{envs.top_clones | r}}
|
|
10
9
|
rare_clones = {{envs.rare_clones | r}}
|
|
@@ -1,30 +1,34 @@
|
|
|
1
1
|
# Diversity estimation
|
|
2
|
+
source("{{biopipen_dir}}/scripts/tcr/immunarch-patched.R")
|
|
2
3
|
# https://immunarch.com/articles/web_only/v6_diversity.html
|
|
3
4
|
|
|
4
5
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
6
|
+
log_info("# Diversity estimation")
|
|
7
|
+
log_info("-----------------------------------")
|
|
8
8
|
|
|
9
9
|
# Other variables are loaded in the parent template
|
|
10
10
|
# immdata is already loaded, meta is mutated
|
|
11
|
-
div_method = {{envs.divs.method | r}}
|
|
12
|
-
div_by = {{envs.divs.by | r}}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
11
|
+
div_method = {{envs.divs.method | default: "gini" | r}}
|
|
12
|
+
div_by = {{envs.divs.by | default: None | r}}
|
|
13
|
+
div_plot_type = {{envs.divs.plot_type | default: "bar" | r}}
|
|
14
|
+
div_order = {{envs.divs.order | default: [] | r}}
|
|
15
|
+
div_args = {{envs.divs.args | default: {} | r: todot="-"}}
|
|
16
|
+
div_test = {{envs.divs.test | default: None | r}}
|
|
17
|
+
div_cases = {{envs.divs.cases | default: {} | r: todot="-"}}
|
|
18
|
+
div_devpars = {{envs.divs.devpars | default: None | r}}
|
|
19
|
+
div_separate_by = {{envs.divs.separate_by | default: None | r}}
|
|
20
|
+
div_split_by = {{envs.divs.split_by | default: None | r}}
|
|
21
|
+
div_split_order = {{envs.divs.split_order | default: None | r}}
|
|
22
|
+
div_align_x = {{envs.divs.align_x | default: False | r}}
|
|
23
|
+
div_align_y = {{envs.divs.align_y | default: False | r}}
|
|
24
|
+
div_subset = {{envs.divs.subset | default: None | r}}
|
|
25
|
+
div_log = {{envs.divs.log | default: False | r}}
|
|
26
|
+
div_ncol = {{envs.divs.ncol | default: 2 | r}}
|
|
27
|
+
div_ymin = {{envs.divs.ymin | default: None | r}}
|
|
28
|
+
div_ymax = {{envs.divs.ymax | default: None | r}}
|
|
29
|
+
|
|
30
|
+
div_test = div_test %||% list(method = "none", padjust = "none")
|
|
31
|
+
div_devpars = div_devpars %||% list(res = 100, width = 800, height = 800)
|
|
28
32
|
|
|
29
33
|
div_dir = file.path(outdir, "diversity")
|
|
30
34
|
dir.create(div_dir, showWarnings = FALSE)
|
|
@@ -38,6 +42,7 @@ update_case = function(case, name) {
|
|
|
38
42
|
if (!is.null(case$by) && nchar(case$by) > 0) {
|
|
39
43
|
case$by = unlist(strsplit(case$by, ",")) %>% trimws()
|
|
40
44
|
}
|
|
45
|
+
case$plot_type <- case$plot_type %||% div_plot_type
|
|
41
46
|
case$order <- case$order %||% div_order
|
|
42
47
|
case$args <- case$args %||% div_args
|
|
43
48
|
for (name in names(case$args)) {
|
|
@@ -85,23 +90,6 @@ update_case = function(case, name) {
|
|
|
85
90
|
return (case)
|
|
86
91
|
}
|
|
87
92
|
|
|
88
|
-
# See https://github.com/immunomind/immunarch/pull/341
|
|
89
|
-
vis.immunr_gini <- function(.data, .by = NA, .meta = NA,
|
|
90
|
-
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
91
|
-
.points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
|
|
92
|
-
# repDiversity(..., .method = "gini") generates a matrix
|
|
93
|
-
.data = data.frame(Sample = rownames(.data), Value = .data[, 1])
|
|
94
|
-
vis_bar(
|
|
95
|
-
.data = .data, .by = .by, .meta = .meta,
|
|
96
|
-
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
97
|
-
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
98
|
-
.defgroupby = "Sample", .grouping.var = "Group",
|
|
99
|
-
.labs = c(NA, "Gini coefficient"),
|
|
100
|
-
.title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
|
|
101
|
-
.legend = NA, .leg.title = NA
|
|
102
|
-
)
|
|
103
|
-
}
|
|
104
|
-
|
|
105
93
|
if (is.null(div_cases) || length(div_cases) == 0) {
|
|
106
94
|
if (is.null(div_method) || length(div_method) == 0 || nchar(div_method) == 0) {
|
|
107
95
|
stop("No method is specified for diversity estimation")
|
|
@@ -176,6 +164,15 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
176
164
|
col.names = TRUE
|
|
177
165
|
)
|
|
178
166
|
|
|
167
|
+
.meta_vals <- function(meta, cols) {
|
|
168
|
+
if (length(cols) == 1) {
|
|
169
|
+
return (meta[[cols]])
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
vlist = lapply(cols, function(.x) meta[[.x]])
|
|
173
|
+
do.call(function(...) paste(..., sep = "; "), vlist)
|
|
174
|
+
}
|
|
175
|
+
|
|
179
176
|
# plot
|
|
180
177
|
# by, order, separate_by, align_y
|
|
181
178
|
n_seps = 1
|
|
@@ -189,11 +186,19 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
189
186
|
metas = metas[intersect(case$split_order, names(metas))]
|
|
190
187
|
}
|
|
191
188
|
ps = lapply(metas, function(meta) {
|
|
192
|
-
.test = length(unique(meta
|
|
193
|
-
p = vis(
|
|
189
|
+
.test = length(unique(.meta_vals(meta, case$by))) > 1
|
|
190
|
+
p = vis(
|
|
191
|
+
filter_div(div, meta$Sample),
|
|
192
|
+
.by = case$by,
|
|
193
|
+
.meta = meta,
|
|
194
|
+
.test = .test,
|
|
195
|
+
.plot.type = case$plot_type
|
|
196
|
+
)
|
|
194
197
|
p = p + xlab(paste0(case$separate_by, ": ", meta[[case$separate_by]][1], ")"))
|
|
195
198
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
196
|
-
p = p + scale_x_discrete(
|
|
199
|
+
p = p + scale_x_discrete(
|
|
200
|
+
limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
|
|
201
|
+
)
|
|
197
202
|
}
|
|
198
203
|
if (!is.null(case$ymin) && !is.null(case$ymax)) {
|
|
199
204
|
p = p + ylim(c(case$ymin, case$ymax))
|
|
@@ -217,10 +222,18 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
217
222
|
}
|
|
218
223
|
.i = 0
|
|
219
224
|
ps = lapply(metas, function(meta) {
|
|
220
|
-
nby = length(unique(meta
|
|
221
|
-
p = vis(
|
|
225
|
+
nby = length(unique(.meta_vals(meta, case$by)))
|
|
226
|
+
p = vis(
|
|
227
|
+
filter_div(div, meta$Sample),
|
|
228
|
+
.by = case$by,
|
|
229
|
+
.meta = meta,
|
|
230
|
+
.test = nby > 1,
|
|
231
|
+
.plot.type = case$plot_type
|
|
232
|
+
)
|
|
222
233
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
223
|
-
p = p + scale_x_discrete(
|
|
234
|
+
p = p + scale_x_discrete(
|
|
235
|
+
limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
|
|
236
|
+
)
|
|
224
237
|
}
|
|
225
238
|
p = p + xlab(meta[[case$split_by]][1]) + theme(
|
|
226
239
|
axis.text.x = element_blank(),
|
|
@@ -253,10 +266,10 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
253
266
|
plots = lapply(ps, function(x) x$p + ylim(c(ymin, ymax)))
|
|
254
267
|
p = wrap_plots(plots, widths = widths, guides = "collect")
|
|
255
268
|
} else {
|
|
256
|
-
.test = length(unique(d$meta
|
|
257
|
-
p = vis(div, .by = case$by, .meta = d$meta, .test = .test)
|
|
269
|
+
.test = length(unique(.meta_vals(d$meta, case$by))) > 1
|
|
270
|
+
p = vis(div, .by = case$by, .meta = d$meta, .test = .test, .plot.type = case$plot_type)
|
|
258
271
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
259
|
-
p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta
|
|
272
|
+
p = p + scale_x_discrete(limits = intersect(case$order, unique(.meta_vals(d$meta, case$by))))
|
|
260
273
|
}
|
|
261
274
|
}
|
|
262
275
|
} else if (!is.null(case$separate_by)) {
|
|
@@ -333,7 +346,9 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
333
346
|
} else {
|
|
334
347
|
p = vis(div)
|
|
335
348
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
336
|
-
p = p + scale_x_discrete(
|
|
349
|
+
p = p + scale_x_discrete(
|
|
350
|
+
limits = intersect(case$order, unique(.meta_vals(d$meta, case$by)))
|
|
351
|
+
)
|
|
337
352
|
}
|
|
338
353
|
}
|
|
339
354
|
|
|
@@ -351,7 +366,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
351
366
|
}
|
|
352
367
|
if (is.null(width)) {
|
|
353
368
|
if (!is.null(case$by) && length(case$by) > 0) {
|
|
354
|
-
width = 200 * length(unique(d$meta
|
|
369
|
+
width = 200 * length(unique(.meta_vals(d$meta, case$by))) + 120
|
|
355
370
|
} else {
|
|
356
371
|
width = 100 * length(unique(d$meta$Sample)) + 120
|
|
357
372
|
}
|
|
@@ -400,7 +415,11 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
400
415
|
"where all values are the same (for example, where everyone has ",
|
|
401
416
|
"the same income). A Gini coefficient of one (or 100 percents ) ",
|
|
402
417
|
"expresses maximal inequality among values (for example where only ",
|
|
403
|
-
"one person has all the income).")
|
|
418
|
+
"one person has all the income)."),
|
|
419
|
+
d50 = paste0(
|
|
420
|
+
"the D50 index. ",
|
|
421
|
+
"It is the number of types that are needed to cover 50% of the total
|
|
422
|
+
abundance.")
|
|
404
423
|
)
|
|
405
424
|
)
|
|
406
425
|
),
|
|
@@ -705,6 +724,8 @@ run_div_case = function(casename) {
|
|
|
705
724
|
run_general(casename, d, case, ddir)
|
|
706
725
|
} else if (case$method == "gini") {
|
|
707
726
|
run_general(casename, d, case, ddir, "V1")
|
|
727
|
+
} else if (case$method == "d50") {
|
|
728
|
+
run_general(casename, d, case, ddir, "Clones")
|
|
708
729
|
} else {
|
|
709
730
|
stop(paste0("Unknown diversity method: ", case$method))
|
|
710
731
|
}
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Gene usage analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
gene_usages = {{ envs.gene_usages | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# K-mer analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
kmers = {{ envs.kmers | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Overlap analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
overlaps = {{ envs.overlaps | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Spectratyping analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
spects = {{ envs.spects | r }}
|
|
10
9
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
log_info("")
|
|
2
|
-
log_info("
|
|
3
|
-
log_info("
|
|
4
|
-
log_info("#####################################")
|
|
2
|
+
log_info("# Clonotype tracking")
|
|
3
|
+
log_info("-----------------------------------")
|
|
5
4
|
|
|
6
5
|
trackings = {{ envs.trackings | r }}
|
|
7
6
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
log_info("")
|
|
2
|
-
log_info("
|
|
3
|
-
log_info("
|
|
4
|
-
log_info("#####################################")
|
|
2
|
+
log_info("# VJ Junction Circos Plots")
|
|
3
|
+
log_info("-----------------------------------")
|
|
5
4
|
|
|
6
5
|
# Already required by immunarch
|
|
7
6
|
library(circlize)
|
biopipen/scripts/tcr/Immunarch.R
CHANGED
|
@@ -34,7 +34,7 @@ log_info("Expanding immdata ...")
|
|
|
34
34
|
exdata = expand_immdata(immdata)
|
|
35
35
|
|
|
36
36
|
log_info("Loading metadata if provided ...")
|
|
37
|
-
if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
|
|
37
|
+
if (!is.null(metafile) && (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS"))) {
|
|
38
38
|
meta = readRDS(metafile)@meta.data
|
|
39
39
|
} else if (!is.null(metafile) && nchar(metafile) > 0) {
|
|
40
40
|
meta = read.table(metafile, sep = "\t", header = TRUE, row.names = 1)
|
|
@@ -144,6 +144,7 @@ for (i in seq_len(nrow(metadata))) {
|
|
|
144
144
|
# file.symlink(normalizePath(annofile), file.path(datadir, paste0(sample, ext)))
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
+
log_info("Loading TCR data ...")
|
|
147
148
|
immdata = repLoad(datadir, .mode=mode)
|
|
148
149
|
if (mode == "single") {
|
|
149
150
|
data = immdata$data
|
|
@@ -178,6 +179,7 @@ immdata$prefix = prefix
|
|
|
178
179
|
|
|
179
180
|
saveRDS(immdata, file=rdsfile)
|
|
180
181
|
|
|
182
|
+
log_info("Saving cell-level data ...")
|
|
181
183
|
exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
182
184
|
distinct(Sample, Barcode, .keep_all = TRUE) %>%
|
|
183
185
|
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# python = Sys.which({{envs.python | r}})
|
|
4
4
|
# Sys.setenv(RETICULATE_PYTHON = python)
|
|
5
5
|
# library(reticulate)
|
|
6
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
6
7
|
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
7
8
|
|
|
8
9
|
library(immunarch)
|
|
@@ -97,7 +98,7 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
|
|
|
97
98
|
}
|
|
98
99
|
|
|
99
100
|
run_clustcr = function() {
|
|
100
|
-
|
|
101
|
+
log_info("Running ClusTCR ...")
|
|
101
102
|
clustcr_dir = file.path(outdir, "ClusTCR_Output")
|
|
102
103
|
dir.create(clustcr_dir, showWarnings = FALSE)
|
|
103
104
|
clustcr_file = prepare_clustcr(clustcr_dir)
|
|
@@ -110,6 +111,7 @@ run_clustcr = function() {
|
|
|
110
111
|
)
|
|
111
112
|
print("Running:")
|
|
112
113
|
print(clustcr_cmd)
|
|
114
|
+
log_debug("- Running command: {clustcr_cmd}")
|
|
113
115
|
rc = system(clustcr_cmd)
|
|
114
116
|
if (rc != 0) {
|
|
115
117
|
quit(status=rc)
|
|
@@ -196,7 +198,7 @@ clean_giana_output = function(giana_outfile, giana_infile) {
|
|
|
196
198
|
}
|
|
197
199
|
|
|
198
200
|
run_giana = function() {
|
|
199
|
-
|
|
201
|
+
log_info("Running GIANA ...")
|
|
200
202
|
giana_srcdir = prepare_giana()
|
|
201
203
|
giana_input = prepare_input()
|
|
202
204
|
giana_outdir = file.path(outdir, "GIANA_Output")
|
|
@@ -228,6 +230,7 @@ run_giana = function() {
|
|
|
228
230
|
)
|
|
229
231
|
print("Running:")
|
|
230
232
|
print(giana_cmd)
|
|
233
|
+
log_debug("- Running command: {giana_cmd}")
|
|
231
234
|
rc = system(giana_cmd)
|
|
232
235
|
if (rc != 0) {
|
|
233
236
|
quit(status=rc)
|
|
@@ -276,4 +279,5 @@ if (tolower(tool) == "clustcr") {
|
|
|
276
279
|
stop(paste("Unknown tool:", tool))
|
|
277
280
|
}
|
|
278
281
|
|
|
282
|
+
log_info("Saving results ...")
|
|
279
283
|
attach_to_immdata(out)
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
library(immunarch)
|
|
2
|
+
|
|
3
|
+
vis.immunr_gini <- function(.data, .by = NA, .meta = NA,
|
|
4
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
5
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
6
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
7
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
8
|
+
.data = data.frame(Sample = rownames(.data), Value = .data[, 1])
|
|
9
|
+
if (.plot.type == "bar") {
|
|
10
|
+
vis_bar(
|
|
11
|
+
.data = .data, .by = .by, .meta = .meta,
|
|
12
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
13
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
14
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
15
|
+
.labs = c(NA, "Gini coefficient"),
|
|
16
|
+
.title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
|
|
17
|
+
.legend = .legend, .leg.title = NA
|
|
18
|
+
)
|
|
19
|
+
} else {
|
|
20
|
+
vis_box(
|
|
21
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
22
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
23
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
24
|
+
.labs = c(NA, "Gini coefficient"),
|
|
25
|
+
.title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
|
|
26
|
+
.legend = .legend, .leg.title = NA, .melt = FALSE
|
|
27
|
+
)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
vis.immunr_div <- function(.data, .by = NA, .meta = NA,
|
|
32
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
33
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
34
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
35
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
36
|
+
if (.plot.type == "bar") {
|
|
37
|
+
immunarch:::vis.immunr_div(.data = .data,.by = .by, .meta = .meta,
|
|
38
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
39
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
40
|
+
.legend = .legend)
|
|
41
|
+
} else {
|
|
42
|
+
vis_box(
|
|
43
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
44
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
45
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
46
|
+
.labs = c(NA, "Effective number of clonoypes"),
|
|
47
|
+
.title = "True diversity", .subtitle = "Sample diversity estimation using the true diversity index",
|
|
48
|
+
.legend = NA, .leg.title = NA, .melt = FALSE
|
|
49
|
+
)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
vis.immunr_chao1 <- function(.data, .by = NA, .meta = NA,
|
|
54
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
55
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
56
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
57
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
58
|
+
if (.plot.type == "bar") {
|
|
59
|
+
immunarch:::vis.immunr_chao1(.data = .data,.by = .by, .meta = .meta,
|
|
60
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
61
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
62
|
+
.legend = .legend)
|
|
63
|
+
} else {
|
|
64
|
+
.data <- data.frame(Sample = row.names(.data), Value = .data[, 1])
|
|
65
|
+
vis_box(
|
|
66
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
67
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
68
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
69
|
+
.labs = c(NA, "Chao1"),
|
|
70
|
+
.title = "Chao1", .subtitle = "Sample diversity estimation using Chao1",
|
|
71
|
+
.legend = NA, .leg.title = NA, .melt = FALSE
|
|
72
|
+
)
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
vis.immunr_ginisimp <- function(.data, .by = NA, .meta = NA,
|
|
77
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
78
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
79
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
80
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
81
|
+
if (.plot.type == "bar") {
|
|
82
|
+
immunarch:::vis.immunr_ginisimp(.data = .data,.by = .by, .meta = .meta,
|
|
83
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
84
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
85
|
+
.legend = .legend)
|
|
86
|
+
} else {
|
|
87
|
+
vis_box(
|
|
88
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
89
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
90
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
91
|
+
.labs = c(NA, "Gini-Simpson index"),
|
|
92
|
+
.title = "Gini-Simpson index", .subtitle = "Sample diversity estimation using the Gini-Simpson index",
|
|
93
|
+
.legend = .legend, .leg.title = NA, .melt = FALSE
|
|
94
|
+
)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
vis.immunr_invsimp <- function(.data, .by = NA, .meta = NA,
|
|
99
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
100
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
101
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
102
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
103
|
+
if (.plot.type == "bar") {
|
|
104
|
+
immunarch:::vis.immunr_invsimp(.data = .data,.by = .by, .meta = .meta,
|
|
105
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
106
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
107
|
+
.legend = .legend)
|
|
108
|
+
} else {
|
|
109
|
+
vis_box(
|
|
110
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
111
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
112
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
113
|
+
.labs = c(NA, "Inverse Simpson index"),
|
|
114
|
+
.title = "Inverse Simpson index", .subtitle = "Sample diversity estimation using the inverse Simpson index",
|
|
115
|
+
.legend = .legend, .leg.title = NA, .melt = FALSE
|
|
116
|
+
)
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
vis.immunr_dxx <- function(.data, .by = NA, .meta = NA,
|
|
121
|
+
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
122
|
+
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
|
|
123
|
+
.legend = NA, .plot.type = "bar", ...) {
|
|
124
|
+
# repDiversity(..., .method = "gini") generates a matrix
|
|
125
|
+
if (.plot.type == "bar") {
|
|
126
|
+
immunarch:::vis.immunr_dxx(.data = .data,.by = .by, .meta = .meta,
|
|
127
|
+
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
128
|
+
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
129
|
+
.legend = .legend)
|
|
130
|
+
} else {
|
|
131
|
+
perc_value <- round(.data[1, 2][1])
|
|
132
|
+
.data <- data.frame(Sample = row.names(.data), Value = .data[, 1])
|
|
133
|
+
vis_box(
|
|
134
|
+
.data = .data, .by = .by, .meta = .meta, .test = .test,
|
|
135
|
+
.points = .points, .signif.label.size = .signif.label.size,
|
|
136
|
+
.defgroupby = "Sample", .grouping.var = "Group",
|
|
137
|
+
.labs = c(NA, paste0("D", perc_value)),
|
|
138
|
+
.title = paste0("D", perc_value, " diversity index"), .subtitle = paste0("Number of clonotypes occupying the ", perc_value, "% of repertoires"),
|
|
139
|
+
.legend = .legend, .leg.title = NA, .melt = FALSE
|
|
140
|
+
)
|
|
141
|
+
}
|
|
142
|
+
}
|
biopipen/utils/caching.R
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
library(digest)
|
|
2
|
+
|
|
3
|
+
#' Get signatures and cached data
|
|
4
|
+
#'
|
|
5
|
+
#' @param x An object to infer signature from
|
|
6
|
+
#' @param kind A string indicating the kind of the object
|
|
7
|
+
#' Used as part of the filename of the cached file
|
|
8
|
+
#' @param cache_dir A string indicating the directory to store cached files
|
|
9
|
+
#'
|
|
10
|
+
#' @return A list containing the signature, digested signature and cached data
|
|
11
|
+
get_cached <- function(x, kind, cache_dir) {
|
|
12
|
+
if (is.null(cache_dir) || isFALSE(cache_dir)) {
|
|
13
|
+
return(list(sig = NULL, dig = NULL, data = NULL))
|
|
14
|
+
}
|
|
15
|
+
# Get signature of an object
|
|
16
|
+
sig <- capture.output(str(x))
|
|
17
|
+
dig <- digest::digest(sig, algo = "md5")
|
|
18
|
+
dig <- substr(dig, 1, 8)
|
|
19
|
+
cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
|
|
20
|
+
if (!file.exists(cached_file)) {
|
|
21
|
+
return(list(sig = sig, dig = dig, data = NULL))
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
list(sig = sig, dig = dig, data = readRDS(cached_file))
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
#' Save an object to cache
|
|
28
|
+
#'
|
|
29
|
+
#' @param to_cache An list to cache,
|
|
30
|
+
#' including the signature, digested signature and data
|
|
31
|
+
#' @param kind A string indicating the kind of the object
|
|
32
|
+
#' Used as part of the filename of the cached file
|
|
33
|
+
#' @param cache_dir A string indicating the directory to store cached files
|
|
34
|
+
save_to_cache <- function(to_cache, kind, cache_dir) {
|
|
35
|
+
if (is.null(cache_dir) || isFALSE(cache_dir)) { return() }
|
|
36
|
+
dig <- to_cache$dig
|
|
37
|
+
sig <- to_cache$sig
|
|
38
|
+
data <- to_cache$data
|
|
39
|
+
# Save an object to cache
|
|
40
|
+
sig_file <- file.path(cache_dir, paste0(dig, ".", kind , ".signature.txt"))
|
|
41
|
+
writeLines(c(as.character(Sys.time()), "", sig), sig_file)
|
|
42
|
+
cached_file <- file.path(cache_dir, paste0(dig, ".", kind, ".RDS"))
|
|
43
|
+
saveRDS(data, cached_file)
|
|
44
|
+
}
|