biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -168
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +127 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +18 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
- biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
- biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
- biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
- biopipen/scripts/tcr/Immunarch.R +7 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +66 -41
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -165,7 +165,7 @@ filter_div = function(div, samples) {
|
|
|
165
165
|
# case: the case with argument to be run
|
|
166
166
|
# ddir: the directory to save the results
|
|
167
167
|
# value_col: the column name of the value
|
|
168
|
-
run_general = function(d, case, ddir, value_col = "Value") {
|
|
168
|
+
run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
169
169
|
args = case$args
|
|
170
170
|
args$.data = d$data
|
|
171
171
|
args$.method = case$method
|
|
@@ -282,6 +282,63 @@ run_general = function(d, case, ddir, value_col = "Value") {
|
|
|
282
282
|
print(p)
|
|
283
283
|
dev.off()
|
|
284
284
|
|
|
285
|
+
add_report(
|
|
286
|
+
list(
|
|
287
|
+
kind = "descr",
|
|
288
|
+
content = paste0(
|
|
289
|
+
"Diversity estimation using ",
|
|
290
|
+
"<code>",
|
|
291
|
+
case$method,
|
|
292
|
+
"</code>: ",
|
|
293
|
+
switch(case$method,
|
|
294
|
+
chao1 = paste0(
|
|
295
|
+
"a nonparameteric asymptotic estimator of species richness ",
|
|
296
|
+
"(number of species in a population)."),
|
|
297
|
+
hill = paste0(
|
|
298
|
+
"Hill numbers are a mathematically unified family of ",
|
|
299
|
+
"diversity indices (differing only by an exponent q)."),
|
|
300
|
+
div = paste0(
|
|
301
|
+
"true diversity, or the effective number of types, ",
|
|
302
|
+
"refers to the number of equally abundant types needed for ",
|
|
303
|
+
"the average proportional abundance of the types to equal that ",
|
|
304
|
+
"observed in the dataset of interest where all types may ",
|
|
305
|
+
"not be equally abundant."),
|
|
306
|
+
gini.simp = paste0(
|
|
307
|
+
"the Gini-Simpson index is the probability of interspecific ",
|
|
308
|
+
"encounter, i.e., probability that two entities represent different types."),
|
|
309
|
+
inv.simp = paste0(
|
|
310
|
+
"Inverse Simpson index is the effective number of types ",
|
|
311
|
+
"that is obtained when the weighted arithmetic mean is used ",
|
|
312
|
+
"to quantify average proportional abundance of types in ",
|
|
313
|
+
"the dataset of interest."),
|
|
314
|
+
gini = paste0(
|
|
315
|
+
"the Gini coefficient measures the inequality among ",
|
|
316
|
+
"values of a frequency distribution (for example levels of income). ",
|
|
317
|
+
"A Gini coefficient of zero expresses perfect equality, ",
|
|
318
|
+
"where all values are the same (for example, where everyone has ",
|
|
319
|
+
"the same income). A Gini coefficient of one (or 100 percents ) ",
|
|
320
|
+
"expresses maximal inequality among values (for example where only ",
|
|
321
|
+
"one person has all the income).")
|
|
322
|
+
)
|
|
323
|
+
)
|
|
324
|
+
),
|
|
325
|
+
h1 = "Diversity Estimation",
|
|
326
|
+
h2 = casename
|
|
327
|
+
)
|
|
328
|
+
add_report(
|
|
329
|
+
list(
|
|
330
|
+
name = "Diversity Plot",
|
|
331
|
+
contents = list(list(kind = "image", src = file.path(ddir, "diversity.png")))
|
|
332
|
+
),
|
|
333
|
+
list(
|
|
334
|
+
name = "Diversity Table",
|
|
335
|
+
contents = list(list(kind = "table", src = file.path(ddir, "diversity.txt")))
|
|
336
|
+
),
|
|
337
|
+
h1 = "Diversity Estimation",
|
|
338
|
+
h2 = casename,
|
|
339
|
+
ui = "tabs"
|
|
340
|
+
)
|
|
341
|
+
|
|
285
342
|
# Test
|
|
286
343
|
if (!is.null(case$test) && case$test$method != "none") {
|
|
287
344
|
# Use pairwise.t.test or pairwise.wilcox.test
|
|
@@ -344,6 +401,19 @@ run_general = function(d, case, ddir, value_col = "Value") {
|
|
|
344
401
|
row.names = FALSE,
|
|
345
402
|
col.names = TRUE
|
|
346
403
|
)
|
|
404
|
+
|
|
405
|
+
add_report(
|
|
406
|
+
list(
|
|
407
|
+
name = paste0("Test (", case$test$method, ")"),
|
|
408
|
+
contents = list(list(
|
|
409
|
+
kind = "table",
|
|
410
|
+
src = file.path(ddir, paste0("diversity.test.", case$test$method, ".txt"))
|
|
411
|
+
))
|
|
412
|
+
),
|
|
413
|
+
h1 = "Diversity Estimation",
|
|
414
|
+
h2 = casename,
|
|
415
|
+
ui = "tabs"
|
|
416
|
+
)
|
|
347
417
|
}
|
|
348
418
|
}
|
|
349
419
|
|
|
@@ -471,7 +541,12 @@ run_raref_multi = function(d, case, ddir) {
|
|
|
471
541
|
} else {
|
|
472
542
|
height = case$devpars$height
|
|
473
543
|
}
|
|
474
|
-
png(
|
|
544
|
+
png(
|
|
545
|
+
file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png")),
|
|
546
|
+
width = width,
|
|
547
|
+
height = height,
|
|
548
|
+
res = res
|
|
549
|
+
)
|
|
475
550
|
print(p)
|
|
476
551
|
dev.off()
|
|
477
552
|
}
|
|
@@ -481,9 +556,9 @@ run_div_case = function(casename) {
|
|
|
481
556
|
log_info("Processing case: {casename} ...")
|
|
482
557
|
case = div_cases[[casename]]
|
|
483
558
|
if (case$method == "raref") {
|
|
484
|
-
ddir = file.path(outdir, "rarefraction", casename)
|
|
559
|
+
ddir = file.path(outdir, "rarefraction", slugify(casename, tolower = FALSE))
|
|
485
560
|
} else {
|
|
486
|
-
ddir = file.path(div_dir, casename)
|
|
561
|
+
ddir = file.path(div_dir, slugify(casename, tolower = FALSE))
|
|
487
562
|
}
|
|
488
563
|
dir.create(ddir, recursive = TRUE, showWarnings = FALSE)
|
|
489
564
|
|
|
@@ -495,26 +570,56 @@ run_div_case = function(casename) {
|
|
|
495
570
|
}
|
|
496
571
|
|
|
497
572
|
# Run repDiversity
|
|
498
|
-
if (case$method == "
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
573
|
+
if (case$method == "raref") {
|
|
574
|
+
add_report(
|
|
575
|
+
list(
|
|
576
|
+
kind = "descr",
|
|
577
|
+
content = paste0(
|
|
578
|
+
"Rarefaction is a technique to assess species richness from the ",
|
|
579
|
+
"results of sampling through extrapolation. "
|
|
580
|
+
)
|
|
581
|
+
),
|
|
582
|
+
h1 = "Rarefraction",
|
|
583
|
+
h2 = casename
|
|
584
|
+
)
|
|
585
|
+
|
|
511
586
|
if (!is.null(case$separate_by)) {
|
|
512
587
|
run_raref_multi(d, case, ddir)
|
|
588
|
+
add_report(
|
|
589
|
+
list(
|
|
590
|
+
kind = "image",
|
|
591
|
+
src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png"))
|
|
592
|
+
),
|
|
593
|
+
h1 = "Rarefraction",
|
|
594
|
+
h2 = casename
|
|
595
|
+
)
|
|
513
596
|
} else {
|
|
514
597
|
run_raref_single(d, case, ddir)
|
|
598
|
+
add_report(
|
|
599
|
+
list(
|
|
600
|
+
kind = "image",
|
|
601
|
+
src = file.path(ddir, "raref.png")
|
|
602
|
+
),
|
|
603
|
+
h1 = "Rarefraction",
|
|
604
|
+
h2 = casename
|
|
605
|
+
)
|
|
515
606
|
}
|
|
516
607
|
} else {
|
|
517
|
-
|
|
608
|
+
if (case$method == "chao1") {
|
|
609
|
+
run_general(casename, d, case, ddir, "Estimator")
|
|
610
|
+
} else if (case$method == "hill") {
|
|
611
|
+
run_general(casename, d, case, ddir)
|
|
612
|
+
} else if (case$method == "div") {
|
|
613
|
+
run_general(casename, d, case, ddir)
|
|
614
|
+
} else if (case$method == "gini.simp") {
|
|
615
|
+
run_general(casename, d, case, ddir)
|
|
616
|
+
} else if (case$method == "inv.simp") {
|
|
617
|
+
run_general(casename, d, case, ddir)
|
|
618
|
+
} else if (case$method == "gini") {
|
|
619
|
+
run_general(casename, d, case, ddir, "V1")
|
|
620
|
+
} else {
|
|
621
|
+
stop(paste0("Unknown diversity method: ", case$method))
|
|
622
|
+
}
|
|
518
623
|
}
|
|
519
624
|
}
|
|
520
625
|
|
|
@@ -126,9 +126,23 @@ do_one_case_geneusage = function(name, case, gu_dir) {
|
|
|
126
126
|
|
|
127
127
|
ofig = file.path(odir, paste0(name, ".png"))
|
|
128
128
|
png(ofig, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
|
|
129
|
-
print(p)
|
|
129
|
+
print(p + scale_fill_biopipen())
|
|
130
130
|
dev.off()
|
|
131
131
|
|
|
132
|
+
add_report(
|
|
133
|
+
list(
|
|
134
|
+
kind = "table_image",
|
|
135
|
+
src = ofig,
|
|
136
|
+
descr = paste0(
|
|
137
|
+
"Distribution of known gene segments following the ",
|
|
138
|
+
'<a href="http://www.imgt.org/IMGTrepertoire/LocusGenes/" target="_blank">IMGT</a> ',
|
|
139
|
+
"nomenclature."
|
|
140
|
+
)
|
|
141
|
+
),
|
|
142
|
+
h1 = "Gene Usage",
|
|
143
|
+
h2 = ifelse(name == "DEFAULT", "#", name)
|
|
144
|
+
)
|
|
145
|
+
|
|
132
146
|
if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
|
|
133
147
|
for (aname in names(case$analyses$cases)) {
|
|
134
148
|
if (case$analyses$cases[[aname]]$method == "none") {
|
|
@@ -160,6 +174,14 @@ do_one_case_geneusage = function(name, case, gu_dir) {
|
|
|
160
174
|
png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
|
|
161
175
|
print(ap)
|
|
162
176
|
dev.off()
|
|
177
|
+
|
|
178
|
+
add_report(
|
|
179
|
+
list(src = aofig, name = aname),
|
|
180
|
+
h1 = "Gene Usage",
|
|
181
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
182
|
+
h3 = "Gene Usage Analysis",
|
|
183
|
+
ui = "table_of_images"
|
|
184
|
+
)
|
|
163
185
|
}
|
|
164
186
|
}
|
|
165
187
|
}
|
|
@@ -88,7 +88,7 @@ for (name in names(cases)) {
|
|
|
88
88
|
do_one_case_kmer = function(name, case, kmer_dir) {
|
|
89
89
|
# print(paste0(" Case: ", name))
|
|
90
90
|
log_info("Processing case: {name} ...")
|
|
91
|
-
odir = file.path(kmer_dir, name)
|
|
91
|
+
odir = file.path(kmer_dir, slugify(name, tolower = FALSE))
|
|
92
92
|
dir.create(odir, showWarnings = FALSE)
|
|
93
93
|
|
|
94
94
|
if (!is.null(case$subset)) {
|
|
@@ -108,6 +108,29 @@ do_one_case_kmer = function(name, case, kmer_dir) {
|
|
|
108
108
|
print(p)
|
|
109
109
|
dev.off()
|
|
110
110
|
|
|
111
|
+
add_report(
|
|
112
|
+
list(
|
|
113
|
+
kind = "descr",
|
|
114
|
+
content = "K-mer sequence occurrences and motif analysis of CDR3 amino acid sequences"
|
|
115
|
+
),
|
|
116
|
+
h1 = "Kmer and sequence motif analysis",
|
|
117
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
118
|
+
h3 = "Kmer sequence occurrences"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
add_report(
|
|
122
|
+
list(kind = "image", src = ofig),
|
|
123
|
+
h1 = "Kmer and sequence motif analysis",
|
|
124
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
125
|
+
h3 = "Kmer sequence occurrences"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
add_report(
|
|
129
|
+
h1 = "Kmer and sequence motif analysis",
|
|
130
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
131
|
+
h3 = "Motif analysis"
|
|
132
|
+
)
|
|
133
|
+
|
|
111
134
|
for (sample in names(d$data)) {
|
|
112
135
|
# print(paste0(" Sample: ", sample))
|
|
113
136
|
log_info("- Sample: {sample} ...")
|
|
@@ -122,18 +145,37 @@ do_one_case_kmer = function(name, case, kmer_dir) {
|
|
|
122
145
|
avis_args$.data = imm_kmera
|
|
123
146
|
ap = do_call(vis, avis_args)
|
|
124
147
|
if (aname == "DEFAULT") {
|
|
125
|
-
aofig = file.path(odir, paste0(sample, "-profile.png"))
|
|
148
|
+
aofig = file.path(odir, paste0(slugify(sample), "-profile.png"))
|
|
126
149
|
} else {
|
|
127
|
-
aofig = file.path(odir, paste0(sample, "-", aname, "-profile.png"))
|
|
150
|
+
aofig = file.path(odir, paste0(slugify(sample), "-", slugify(aname), "-profile.png"))
|
|
128
151
|
}
|
|
129
152
|
png(aofig, width = case$profiles$cases[[aname]]$devpars$width, height = case$profiles$cases[[aname]]$devpars$height, res = case$profiles$cases[[aname]]$devpars$res)
|
|
130
153
|
print(ap)
|
|
131
154
|
dev.off()
|
|
155
|
+
|
|
156
|
+
add_report(
|
|
157
|
+
list(
|
|
158
|
+
src = aofig,
|
|
159
|
+
name = paste0(sample, ifelse(aname == "DEFAULT", "", paste0(" - ", aname)))
|
|
160
|
+
),
|
|
161
|
+
h1 = "Kmer and sequence motif analysis",
|
|
162
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
163
|
+
h3 = "Motif analysis",
|
|
164
|
+
ui = "table_of_images"
|
|
165
|
+
)
|
|
132
166
|
}
|
|
133
167
|
}
|
|
134
168
|
}
|
|
135
169
|
}
|
|
136
170
|
|
|
171
|
+
add_report(
|
|
172
|
+
list(
|
|
173
|
+
kind = "descr",
|
|
174
|
+
content = "Counting k-mer occurrences"
|
|
175
|
+
),
|
|
176
|
+
h1 = "Kmer and sequence motif analysis"
|
|
177
|
+
)
|
|
178
|
+
|
|
137
179
|
kmer_dir = file.path(outdir, "kmer")
|
|
138
180
|
dir.create(kmer_dir, showWarnings = FALSE)
|
|
139
181
|
|
|
@@ -80,6 +80,45 @@ for (name in names(cases)) {
|
|
|
80
80
|
cases[[name]]$analyses = analyses
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
get_method_descr <- function(method) {
|
|
84
|
+
descr <- switch(method,
|
|
85
|
+
public = paste0(
|
|
86
|
+
"number of public (shared) clonotypes, ",
|
|
87
|
+
"a classic measure of overlap similarity"
|
|
88
|
+
),
|
|
89
|
+
overlap = paste0(
|
|
90
|
+
"overlap coefficient, a normalised measure of overlap similarity. ",
|
|
91
|
+
"It is defined as the size of the intersection divided by the smaller of ",
|
|
92
|
+
"the size of the two sets."
|
|
93
|
+
),
|
|
94
|
+
jaccard = paste0(
|
|
95
|
+
"Jaccard index, measures the similarity between finite sample sets, ",
|
|
96
|
+
"and is defined as the size of the intersection divided by the size of ",
|
|
97
|
+
"the union of the sample sets."
|
|
98
|
+
),
|
|
99
|
+
tversky = paste0(
|
|
100
|
+
"Tversky index, an asymmetric similarity measure on sets that compares ",
|
|
101
|
+
"a variant to a prototype. ",
|
|
102
|
+
"If using default arguments, it’s similar to Dice’s coefficient."
|
|
103
|
+
),
|
|
104
|
+
cosine = "cosine similarity, a measure of similarity between two non-zero vectors",
|
|
105
|
+
morisita = paste0(
|
|
106
|
+
"Morisita's overlap index, a statistical measure of dispersion of ",
|
|
107
|
+
"individuals in a population. ",
|
|
108
|
+
"It is used to compare overlap among samples."
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if (!is.null(descr)) {
|
|
113
|
+
return(descr)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return(paste0(
|
|
117
|
+
"incremental overlap, ",
|
|
118
|
+
"overlaps of the N most abundant clonotypes with incrementally growing N"
|
|
119
|
+
))
|
|
120
|
+
}
|
|
121
|
+
|
|
83
122
|
do_one_case_overlap = function(name, case, ov_dir) {
|
|
84
123
|
# print(paste0(" Case: ", name))
|
|
85
124
|
log_info("Processing case: {name} ...")
|
|
@@ -102,6 +141,20 @@ do_one_case_overlap = function(name, case, ov_dir) {
|
|
|
102
141
|
print(p)
|
|
103
142
|
dev.off()
|
|
104
143
|
|
|
144
|
+
add_report(
|
|
145
|
+
list(
|
|
146
|
+
kind = "table_image",
|
|
147
|
+
src = ofig,
|
|
148
|
+
descr = paste0(
|
|
149
|
+
"Repertoire overlap is the most common approach to measure repertoire similarity, ",
|
|
150
|
+
"using method <code>", case$method, "</code>, ",
|
|
151
|
+
get_method_descr(case$method)
|
|
152
|
+
)
|
|
153
|
+
),
|
|
154
|
+
h1 = "Repertoire Overlaps",
|
|
155
|
+
h2 = ifelse(name == "DEFAULT", "#", name)
|
|
156
|
+
)
|
|
157
|
+
|
|
105
158
|
if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
|
|
106
159
|
for (aname in names(case$analyses$cases)) {
|
|
107
160
|
if (case$analyses$cases[[aname]]$method == "none") next
|
|
@@ -135,6 +188,15 @@ do_one_case_overlap = function(name, case, ov_dir) {
|
|
|
135
188
|
png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
|
|
136
189
|
print(ap)
|
|
137
190
|
dev.off()
|
|
191
|
+
|
|
192
|
+
add_report(
|
|
193
|
+
list(src = aofig, name = aname),
|
|
194
|
+
h1 = "Repertoire Overlaps",
|
|
195
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
196
|
+
h3 = "Repertoire Overlap Analysis",
|
|
197
|
+
ui = "table_of_images"
|
|
198
|
+
)
|
|
199
|
+
|
|
138
200
|
}
|
|
139
201
|
}
|
|
140
202
|
}
|
|
@@ -45,7 +45,7 @@ if (is.null(spects$cases) || length(spects$cases) == 0) {
|
|
|
45
45
|
do_one_case_spectratyping = function(name, case, spect_dir) {
|
|
46
46
|
# print(paste0(" Case: ", name))
|
|
47
47
|
log_info("- Processing case: {name} ...")
|
|
48
|
-
odir = file.path(spect_dir, name)
|
|
48
|
+
odir = file.path(spect_dir, slugify(name, tolower = FALSE))
|
|
49
49
|
dir.create(odir, showWarnings = FALSE)
|
|
50
50
|
|
|
51
51
|
if (!is.null(case$subset)) {
|
|
@@ -62,17 +62,33 @@ do_one_case_spectratyping = function(name, case, spect_dir) {
|
|
|
62
62
|
.quant = case$quant,
|
|
63
63
|
.col = case$col
|
|
64
64
|
)
|
|
65
|
+
spectfile = file.path(odir, paste0(slugify(sample, tolower = FALSE), ".spect"))
|
|
65
66
|
png(
|
|
66
|
-
|
|
67
|
+
spectfile,
|
|
67
68
|
res = case$devpars$res,
|
|
68
69
|
width = case$devpars$width,
|
|
69
70
|
height = case$devpars$height
|
|
70
71
|
)
|
|
71
72
|
print(vis(spec_obj))
|
|
72
73
|
dev.off()
|
|
74
|
+
|
|
75
|
+
add_report(
|
|
76
|
+
list(src = spectfile, name = sample),
|
|
77
|
+
h1 = "Spectratyping",
|
|
78
|
+
h2 = name,
|
|
79
|
+
ui = "table_of_images"
|
|
80
|
+
)
|
|
73
81
|
}
|
|
74
82
|
}
|
|
75
83
|
|
|
84
|
+
add_report(
|
|
85
|
+
list(
|
|
86
|
+
kind = "descr",
|
|
87
|
+
content = "Spectratype is a useful way to represent distributions of genes per sequence length."
|
|
88
|
+
),
|
|
89
|
+
h1 = "Spectratyping"
|
|
90
|
+
)
|
|
91
|
+
|
|
76
92
|
spect_dir = file.path(outdir, "spectratyping")
|
|
77
93
|
dir.create(spect_dir, showWarnings = FALSE)
|
|
78
94
|
|
|
@@ -86,10 +86,33 @@ run_tracking_case = function(casename) {
|
|
|
86
86
|
imm_tracking = trackClonotypes(newdata, targets, .col = "aa")
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
tracking_png = file.path(tracking_dir, paste0(casename, ".png"))
|
|
89
|
+
tracking_png = file.path(tracking_dir, paste0(slugify(casename), ".png"))
|
|
90
90
|
png(tracking_png, res=100, height=1000, width=600 + 150 * length(subjects))
|
|
91
91
|
print(vis(imm_tracking))
|
|
92
92
|
dev.off()
|
|
93
|
+
|
|
94
|
+
add_report(
|
|
95
|
+
list(
|
|
96
|
+
kind = "descr",
|
|
97
|
+
content = paste0(
|
|
98
|
+
"Clonotype tracking is a popular approach to monitor changes in the frequency of ",
|
|
99
|
+
"clonotypes of interest in vaccination and cancer immunology. ",
|
|
100
|
+
"For example, a researcher can track a clonotype across different time points ",
|
|
101
|
+
"in pre- and post-vaccination repertoires, or analyse the growth of ",
|
|
102
|
+
"malignant clonotypes in a tumor sample."
|
|
103
|
+
)
|
|
104
|
+
),
|
|
105
|
+
h1 = "Tracking of clonotypes"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
add_report(
|
|
109
|
+
list(
|
|
110
|
+
src = tracking_png,
|
|
111
|
+
name = if (casename == "DEFAULT") NULL else casename
|
|
112
|
+
),
|
|
113
|
+
h1 = "Tracking of clonotypes",
|
|
114
|
+
ui = "table_of_images"
|
|
115
|
+
)
|
|
93
116
|
}
|
|
94
117
|
}
|
|
95
118
|
|
|
@@ -48,7 +48,7 @@ dir.create(vjjunc_dir, showWarnings = FALSE)
|
|
|
48
48
|
|
|
49
49
|
do_one_case_vjjunc <- function(name, case) {
|
|
50
50
|
log_info("Processing case: {name} ...")
|
|
51
|
-
odir = file.path(vjjunc_dir, name)
|
|
51
|
+
odir = file.path(vjjunc_dir, slugify(name, tolower = FALSE))
|
|
52
52
|
dir.create(odir, showWarnings = FALSE)
|
|
53
53
|
|
|
54
54
|
if (!is.null(case$subset)) {
|
|
@@ -76,7 +76,7 @@ do_one_case_vjjunc <- function(name, case) {
|
|
|
76
76
|
filter(!is.na(V.name) & !is.na(J.name) & V.name != "None" & J.name != "None") %>%
|
|
77
77
|
arrange(V.name, J.name)
|
|
78
78
|
|
|
79
|
-
figfile <- file.path(odir, paste0(by_name, ".png"))
|
|
79
|
+
figfile <- file.path(odir, paste0(slugify(by_name), ".png"))
|
|
80
80
|
png(figfile, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
|
|
81
81
|
chordDiagram(
|
|
82
82
|
gsd,
|
|
@@ -96,8 +96,23 @@ do_one_case_vjjunc <- function(name, case) {
|
|
|
96
96
|
}, bg.border = NA) # here set bg.border to NA is important
|
|
97
97
|
dev.off()
|
|
98
98
|
|
|
99
|
+
add_report(
|
|
100
|
+
list(src = figfile, name = by_name),
|
|
101
|
+
h1 = "V-J Junction Circos Plots",
|
|
102
|
+
h2 = ifelse(name == "DEFAULT", "#" , name),
|
|
103
|
+
ui = "table_of_images"
|
|
104
|
+
)
|
|
105
|
+
|
|
99
106
|
NULL
|
|
100
107
|
})
|
|
101
108
|
}
|
|
102
109
|
|
|
110
|
+
add_report(
|
|
111
|
+
list(
|
|
112
|
+
kind = "descr",
|
|
113
|
+
content = "V-J usage plot displaying the frequency of various V-J junctions."
|
|
114
|
+
),
|
|
115
|
+
h1 = "V-J Junction Circos Plots"
|
|
116
|
+
)
|
|
117
|
+
|
|
103
118
|
sapply(names(cases), function(name) do_one_case_vjjunc(name, cases[[name]]))
|
biopipen/scripts/tcr/Immunarch.R
CHANGED
|
@@ -12,6 +12,7 @@ library(glue)
|
|
|
12
12
|
library(tidyr)
|
|
13
13
|
library(tibble)
|
|
14
14
|
library(logger)
|
|
15
|
+
library(slugify)
|
|
15
16
|
|
|
16
17
|
log_info("Loading arguments ...")
|
|
17
18
|
theme_set(theme_prism())
|
|
@@ -19,12 +20,16 @@ theme_set(theme_prism())
|
|
|
19
20
|
immfile = {{ in.immdata | r }}
|
|
20
21
|
metafile = {{ in.metafile | r }}
|
|
21
22
|
outdir = {{ out.outdir | r }}
|
|
23
|
+
joboutdir = {{ job.outdir | r }}
|
|
22
24
|
mutaters = {{ envs.mutaters | r }}
|
|
23
25
|
prefix = {{ envs.prefix | r }}
|
|
24
26
|
|
|
25
27
|
log_info("Loading immdata ...")
|
|
26
28
|
immdata = readRDS(immfile)
|
|
27
29
|
|
|
30
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
31
|
+
if (is.null(prefix)) { prefix = "" }
|
|
32
|
+
|
|
28
33
|
log_info("Expanding immdata ...")
|
|
29
34
|
exdata = expand_immdata(immdata)
|
|
30
35
|
|
|
@@ -101,3 +106,5 @@ n_samples = length(immdata$data)
|
|
|
101
106
|
# VJ junction #
|
|
102
107
|
######################
|
|
103
108
|
{% include biopipen_dir + "/scripts/tcr/Immunarch-vjjunc.R" %}
|
|
109
|
+
|
|
110
|
+
save_report(joboutdir)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
2
3
|
|
|
3
4
|
# Loading 10x data into immunarch
|
|
4
5
|
library(immunarch)
|
|
@@ -13,7 +14,8 @@ rdsfile = {{ out.rdsfile | quote }}
|
|
|
13
14
|
metatxt = {{ out.metatxt | quote }}
|
|
14
15
|
tmpdir = {{ envs.tmpdir | quote }}
|
|
15
16
|
mode = {{ envs.mode | quote }}
|
|
16
|
-
|
|
17
|
+
extracols = {{ envs.extracols | r}}
|
|
18
|
+
prefix = {{ envs.prefix | r }}
|
|
17
19
|
|
|
18
20
|
metadata = read.table(
|
|
19
21
|
metafile,
|
|
@@ -164,27 +166,24 @@ immdata$meta = left_join(
|
|
|
164
166
|
by = "Sample"
|
|
165
167
|
)
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
|
|
170
|
-
# Clones Proportion CDR3.aa Barcode
|
|
171
|
-
# 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
|
|
172
|
-
# 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
|
|
173
|
-
cldata = immdata$data[[i]][, unique(c(metacols, "Barcode"))]
|
|
174
|
-
# # A tibble: 4 × 5
|
|
175
|
-
# Sample Patient Timepoint Tissue
|
|
176
|
-
# <chr> <chr> <chr> <chr>
|
|
177
|
-
# 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
|
|
178
|
-
mdata = as.list(immdata$meta[i, , drop=FALSE])
|
|
179
|
-
for (mname in names(mdata)) {
|
|
180
|
-
assign(mname, mdata[[mname]])
|
|
181
|
-
}
|
|
169
|
+
immdata$prefix = prefix
|
|
182
170
|
|
|
183
|
-
|
|
184
|
-
separate_rows(Barcode, sep=";") %>%
|
|
185
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
186
|
-
mutate(Barcode = glue("{{envs.prefix}}{Barcode}")) %>%
|
|
187
|
-
column_to_rownames("Barcode")
|
|
171
|
+
saveRDS(immdata, file=rdsfile)
|
|
188
172
|
|
|
189
|
-
|
|
190
|
-
|
|
173
|
+
exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
174
|
+
distinct(Sample, Barcode, .keep_all = TRUE) %>%
|
|
175
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
176
|
+
select(any_of(c(
|
|
177
|
+
colnames(immdata$meta),
|
|
178
|
+
"Barcode",
|
|
179
|
+
"CDR3.aa",
|
|
180
|
+
"Clones",
|
|
181
|
+
"Proportion",
|
|
182
|
+
"V.name",
|
|
183
|
+
"D.name",
|
|
184
|
+
"J.name",
|
|
185
|
+
extracols
|
|
186
|
+
))) %>%
|
|
187
|
+
column_to_rownames("Barcode")
|
|
188
|
+
|
|
189
|
+
write.table(exdata, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
|