biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (62) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +142 -0
  3. biopipen/ns/scrna.py +19 -1
  4. biopipen/ns/tcr.py +30 -10
  5. biopipen/reports/delim/SampleInfo.svelte +2 -22
  6. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  7. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  8. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  9. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  10. biopipen/reports/scrna/ScFGSEA.svelte +4 -23
  11. biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
  12. biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
  13. biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
  14. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
  15. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
  16. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
  17. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
  18. biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
  19. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  20. biopipen/reports/tcr/Immunarch.svelte +4 -168
  21. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  22. biopipen/reports/tcr/TESSA.svelte +11 -28
  23. biopipen/scripts/delim/SampleInfo.R +41 -7
  24. biopipen/scripts/scrna/CellsDistribution.R +127 -16
  25. biopipen/scripts/scrna/MarkersFinder.R +245 -100
  26. biopipen/scripts/scrna/MetaMarkers.R +163 -82
  27. biopipen/scripts/scrna/RadarPlots.R +163 -110
  28. biopipen/scripts/scrna/ScFGSEA.R +51 -11
  29. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
  30. biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
  31. biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
  32. biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
  33. biopipen/scripts/scrna/SeuratClustering.R +73 -26
  34. biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
  35. biopipen/scripts/scrna/SeuratPreparing.R +93 -19
  36. biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
  37. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
  38. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
  39. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
  40. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
  41. biopipen/scripts/tcr/Attach2Seurat.R +2 -1
  42. biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
  43. biopipen/scripts/tcr/CloneResidency.R +114 -34
  44. biopipen/scripts/tcr/Immunarch-basic.R +18 -4
  45. biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
  46. biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
  47. biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
  48. biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
  49. biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
  50. biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
  51. biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
  52. biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
  53. biopipen/scripts/tcr/Immunarch.R +7 -0
  54. biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
  55. biopipen/scripts/tcr/TCRClusterStats.R +124 -11
  56. biopipen/scripts/tcr/TCRClustering.R +8 -9
  57. biopipen/scripts/tcr/TESSA.R +66 -41
  58. biopipen/utils/misc.R +96 -1
  59. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
  60. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
  61. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
  62. {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
@@ -165,7 +165,7 @@ filter_div = function(div, samples) {
165
165
  # case: the case with argument to be run
166
166
  # ddir: the directory to save the results
167
167
  # value_col: the column name of the value
168
- run_general = function(d, case, ddir, value_col = "Value") {
168
+ run_general = function(casename, d, case, ddir, value_col = "Value") {
169
169
  args = case$args
170
170
  args$.data = d$data
171
171
  args$.method = case$method
@@ -282,6 +282,63 @@ run_general = function(d, case, ddir, value_col = "Value") {
282
282
  print(p)
283
283
  dev.off()
284
284
 
285
+ add_report(
286
+ list(
287
+ kind = "descr",
288
+ content = paste0(
289
+ "Diversity estimation using ",
290
+ "<code>",
291
+ case$method,
292
+ "</code>: ",
293
+ switch(case$method,
294
+ chao1 = paste0(
295
+ "a nonparameteric asymptotic estimator of species richness ",
296
+ "(number of species in a population)."),
297
+ hill = paste0(
298
+ "Hill numbers are a mathematically unified family of ",
299
+ "diversity indices (differing only by an exponent q)."),
300
+ div = paste0(
301
+ "true diversity, or the effective number of types, ",
302
+ "refers to the number of equally abundant types needed for ",
303
+ "the average proportional abundance of the types to equal that ",
304
+ "observed in the dataset of interest where all types may ",
305
+ "not be equally abundant."),
306
+ gini.simp = paste0(
307
+ "the Gini-Simpson index is the probability of interspecific ",
308
+ "encounter, i.e., probability that two entities represent different types."),
309
+ inv.simp = paste0(
310
+ "Inverse Simpson index is the effective number of types ",
311
+ "that is obtained when the weighted arithmetic mean is used ",
312
+ "to quantify average proportional abundance of types in ",
313
+ "the dataset of interest."),
314
+ gini = paste0(
315
+ "the Gini coefficient measures the inequality among ",
316
+ "values of a frequency distribution (for example levels of income). ",
317
+ "A Gini coefficient of zero expresses perfect equality, ",
318
+ "where all values are the same (for example, where everyone has ",
319
+ "the same income). A Gini coefficient of one (or 100 percents ) ",
320
+ "expresses maximal inequality among values (for example where only ",
321
+ "one person has all the income).")
322
+ )
323
+ )
324
+ ),
325
+ h1 = "Diversity Estimation",
326
+ h2 = casename
327
+ )
328
+ add_report(
329
+ list(
330
+ name = "Diversity Plot",
331
+ contents = list(list(kind = "image", src = file.path(ddir, "diversity.png")))
332
+ ),
333
+ list(
334
+ name = "Diversity Table",
335
+ contents = list(list(kind = "table", src = file.path(ddir, "diversity.txt")))
336
+ ),
337
+ h1 = "Diversity Estimation",
338
+ h2 = casename,
339
+ ui = "tabs"
340
+ )
341
+
285
342
  # Test
286
343
  if (!is.null(case$test) && case$test$method != "none") {
287
344
  # Use pairwise.t.test or pairwise.wilcox.test
@@ -344,6 +401,19 @@ run_general = function(d, case, ddir, value_col = "Value") {
344
401
  row.names = FALSE,
345
402
  col.names = TRUE
346
403
  )
404
+
405
+ add_report(
406
+ list(
407
+ name = paste0("Test (", case$test$method, ")"),
408
+ contents = list(list(
409
+ kind = "table",
410
+ src = file.path(ddir, paste0("diversity.test.", case$test$method, ".txt"))
411
+ ))
412
+ ),
413
+ h1 = "Diversity Estimation",
414
+ h2 = casename,
415
+ ui = "tabs"
416
+ )
347
417
  }
348
418
  }
349
419
 
@@ -471,7 +541,12 @@ run_raref_multi = function(d, case, ddir) {
471
541
  } else {
472
542
  height = case$devpars$height
473
543
  }
474
- png(file.path(ddir, paste0("raref-", case$separate_by, ".png")), width = width, height = height, res = res)
544
+ png(
545
+ file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png")),
546
+ width = width,
547
+ height = height,
548
+ res = res
549
+ )
475
550
  print(p)
476
551
  dev.off()
477
552
  }
@@ -481,9 +556,9 @@ run_div_case = function(casename) {
481
556
  log_info("Processing case: {casename} ...")
482
557
  case = div_cases[[casename]]
483
558
  if (case$method == "raref") {
484
- ddir = file.path(outdir, "rarefraction", casename)
559
+ ddir = file.path(outdir, "rarefraction", slugify(casename, tolower = FALSE))
485
560
  } else {
486
- ddir = file.path(div_dir, casename)
561
+ ddir = file.path(div_dir, slugify(casename, tolower = FALSE))
487
562
  }
488
563
  dir.create(ddir, recursive = TRUE, showWarnings = FALSE)
489
564
 
@@ -495,26 +570,56 @@ run_div_case = function(casename) {
495
570
  }
496
571
 
497
572
  # Run repDiversity
498
- if (case$method == "chao1") {
499
- run_general(d, case, ddir, "Estimator")
500
- } else if (case$method == "hill") {
501
- run_general(d, case, ddir)
502
- } else if (case$method == "div") {
503
- run_general(d, case, ddir)
504
- } else if (case$method == "gini.simp") {
505
- run_general(d, case, ddir)
506
- } else if (case$method == "inv.simp") {
507
- run_general(d, case, ddir)
508
- } else if (case$method == "gini") {
509
- run_general(d, case, ddir, "V1")
510
- } else if (case$method == "raref") {
573
+ if (case$method == "raref") {
574
+ add_report(
575
+ list(
576
+ kind = "descr",
577
+ content = paste0(
578
+ "Rarefaction is a technique to assess species richness from the ",
579
+ "results of sampling through extrapolation. "
580
+ )
581
+ ),
582
+ h1 = "Rarefraction",
583
+ h2 = casename
584
+ )
585
+
511
586
  if (!is.null(case$separate_by)) {
512
587
  run_raref_multi(d, case, ddir)
588
+ add_report(
589
+ list(
590
+ kind = "image",
591
+ src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png"))
592
+ ),
593
+ h1 = "Rarefraction",
594
+ h2 = casename
595
+ )
513
596
  } else {
514
597
  run_raref_single(d, case, ddir)
598
+ add_report(
599
+ list(
600
+ kind = "image",
601
+ src = file.path(ddir, "raref.png")
602
+ ),
603
+ h1 = "Rarefraction",
604
+ h2 = casename
605
+ )
515
606
  }
516
607
  } else {
517
- stop(paste0("Unknown diversity method: ", case$method))
608
+ if (case$method == "chao1") {
609
+ run_general(casename, d, case, ddir, "Estimator")
610
+ } else if (case$method == "hill") {
611
+ run_general(casename, d, case, ddir)
612
+ } else if (case$method == "div") {
613
+ run_general(casename, d, case, ddir)
614
+ } else if (case$method == "gini.simp") {
615
+ run_general(casename, d, case, ddir)
616
+ } else if (case$method == "inv.simp") {
617
+ run_general(casename, d, case, ddir)
618
+ } else if (case$method == "gini") {
619
+ run_general(casename, d, case, ddir, "V1")
620
+ } else {
621
+ stop(paste0("Unknown diversity method: ", case$method))
622
+ }
518
623
  }
519
624
  }
520
625
 
@@ -126,9 +126,23 @@ do_one_case_geneusage = function(name, case, gu_dir) {
126
126
 
127
127
  ofig = file.path(odir, paste0(name, ".png"))
128
128
  png(ofig, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
129
- print(p)
129
+ print(p + scale_fill_biopipen())
130
130
  dev.off()
131
131
 
132
+ add_report(
133
+ list(
134
+ kind = "table_image",
135
+ src = ofig,
136
+ descr = paste0(
137
+ "Distribution of known gene segments following the ",
138
+ '<a href="http://www.imgt.org/IMGTrepertoire/LocusGenes/" target="_blank">IMGT</a> ',
139
+ "nomenclature."
140
+ )
141
+ ),
142
+ h1 = "Gene Usage",
143
+ h2 = ifelse(name == "DEFAULT", "#", name)
144
+ )
145
+
132
146
  if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
133
147
  for (aname in names(case$analyses$cases)) {
134
148
  if (case$analyses$cases[[aname]]$method == "none") {
@@ -160,6 +174,14 @@ do_one_case_geneusage = function(name, case, gu_dir) {
160
174
  png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
161
175
  print(ap)
162
176
  dev.off()
177
+
178
+ add_report(
179
+ list(src = aofig, name = aname),
180
+ h1 = "Gene Usage",
181
+ h2 = ifelse(name == "DEFAULT", "#", name),
182
+ h3 = "Gene Usage Analysis",
183
+ ui = "table_of_images"
184
+ )
163
185
  }
164
186
  }
165
187
  }
@@ -88,7 +88,7 @@ for (name in names(cases)) {
88
88
  do_one_case_kmer = function(name, case, kmer_dir) {
89
89
  # print(paste0(" Case: ", name))
90
90
  log_info("Processing case: {name} ...")
91
- odir = file.path(kmer_dir, name)
91
+ odir = file.path(kmer_dir, slugify(name, tolower = FALSE))
92
92
  dir.create(odir, showWarnings = FALSE)
93
93
 
94
94
  if (!is.null(case$subset)) {
@@ -108,6 +108,29 @@ do_one_case_kmer = function(name, case, kmer_dir) {
108
108
  print(p)
109
109
  dev.off()
110
110
 
111
+ add_report(
112
+ list(
113
+ kind = "descr",
114
+ content = "K-mer sequence occurrences and motif analysis of CDR3 amino acid sequences"
115
+ ),
116
+ h1 = "Kmer and sequence motif analysis",
117
+ h2 = ifelse(name == "DEFAULT", "#", name),
118
+ h3 = "Kmer sequence occurrences"
119
+ )
120
+
121
+ add_report(
122
+ list(kind = "image", src = ofig),
123
+ h1 = "Kmer and sequence motif analysis",
124
+ h2 = ifelse(name == "DEFAULT", "#", name),
125
+ h3 = "Kmer sequence occurrences"
126
+ )
127
+
128
+ add_report(
129
+ h1 = "Kmer and sequence motif analysis",
130
+ h2 = ifelse(name == "DEFAULT", "#", name),
131
+ h3 = "Motif analysis"
132
+ )
133
+
111
134
  for (sample in names(d$data)) {
112
135
  # print(paste0(" Sample: ", sample))
113
136
  log_info("- Sample: {sample} ...")
@@ -122,18 +145,37 @@ do_one_case_kmer = function(name, case, kmer_dir) {
122
145
  avis_args$.data = imm_kmera
123
146
  ap = do_call(vis, avis_args)
124
147
  if (aname == "DEFAULT") {
125
- aofig = file.path(odir, paste0(sample, "-profile.png"))
148
+ aofig = file.path(odir, paste0(slugify(sample), "-profile.png"))
126
149
  } else {
127
- aofig = file.path(odir, paste0(sample, "-", aname, "-profile.png"))
150
+ aofig = file.path(odir, paste0(slugify(sample), "-", slugify(aname), "-profile.png"))
128
151
  }
129
152
  png(aofig, width = case$profiles$cases[[aname]]$devpars$width, height = case$profiles$cases[[aname]]$devpars$height, res = case$profiles$cases[[aname]]$devpars$res)
130
153
  print(ap)
131
154
  dev.off()
155
+
156
+ add_report(
157
+ list(
158
+ src = aofig,
159
+ name = paste0(sample, ifelse(aname == "DEFAULT", "", paste0(" - ", aname)))
160
+ ),
161
+ h1 = "Kmer and sequence motif analysis",
162
+ h2 = ifelse(name == "DEFAULT", "#", name),
163
+ h3 = "Motif analysis",
164
+ ui = "table_of_images"
165
+ )
132
166
  }
133
167
  }
134
168
  }
135
169
  }
136
170
 
171
+ add_report(
172
+ list(
173
+ kind = "descr",
174
+ content = "Counting k-mer occurrences"
175
+ ),
176
+ h1 = "Kmer and sequence motif analysis"
177
+ )
178
+
137
179
  kmer_dir = file.path(outdir, "kmer")
138
180
  dir.create(kmer_dir, showWarnings = FALSE)
139
181
 
@@ -80,6 +80,45 @@ for (name in names(cases)) {
80
80
  cases[[name]]$analyses = analyses
81
81
  }
82
82
 
83
+ get_method_descr <- function(method) {
84
+ descr <- switch(method,
85
+ public = paste0(
86
+ "number of public (shared) clonotypes, ",
87
+ "a classic measure of overlap similarity"
88
+ ),
89
+ overlap = paste0(
90
+ "overlap coefficient, a normalised measure of overlap similarity. ",
91
+ "It is defined as the size of the intersection divided by the smaller of ",
92
+ "the size of the two sets."
93
+ ),
94
+ jaccard = paste0(
95
+ "Jaccard index, measures the similarity between finite sample sets, ",
96
+ "and is defined as the size of the intersection divided by the size of ",
97
+ "the union of the sample sets."
98
+ ),
99
+ tversky = paste0(
100
+ "Tversky index, an asymmetric similarity measure on sets that compares ",
101
+ "a variant to a prototype. ",
102
+ "If using default arguments, it’s similar to Dice’s coefficient."
103
+ ),
104
+ cosine = "cosine similarity, a measure of similarity between two non-zero vectors",
105
+ morisita = paste0(
106
+ "Morisita's overlap index, a statistical measure of dispersion of ",
107
+ "individuals in a population. ",
108
+ "It is used to compare overlap among samples."
109
+ )
110
+ )
111
+
112
+ if (!is.null(descr)) {
113
+ return(descr)
114
+ }
115
+
116
+ return(paste0(
117
+ "incremental overlap, ",
118
+ "overlaps of the N most abundant clonotypes with incrementally growing N"
119
+ ))
120
+ }
121
+
83
122
  do_one_case_overlap = function(name, case, ov_dir) {
84
123
  # print(paste0(" Case: ", name))
85
124
  log_info("Processing case: {name} ...")
@@ -102,6 +141,20 @@ do_one_case_overlap = function(name, case, ov_dir) {
102
141
  print(p)
103
142
  dev.off()
104
143
 
144
+ add_report(
145
+ list(
146
+ kind = "table_image",
147
+ src = ofig,
148
+ descr = paste0(
149
+ "Repertoire overlap is the most common approach to measure repertoire similarity, ",
150
+ "using method <code>", case$method, "</code>, ",
151
+ get_method_descr(case$method)
152
+ )
153
+ ),
154
+ h1 = "Repertoire Overlaps",
155
+ h2 = ifelse(name == "DEFAULT", "#", name)
156
+ )
157
+
105
158
  if (!is.null(case$analyses$cases) && length(case$analyses$cases) > 0) {
106
159
  for (aname in names(case$analyses$cases)) {
107
160
  if (case$analyses$cases[[aname]]$method == "none") next
@@ -135,6 +188,15 @@ do_one_case_overlap = function(name, case, ov_dir) {
135
188
  png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
136
189
  print(ap)
137
190
  dev.off()
191
+
192
+ add_report(
193
+ list(src = aofig, name = aname),
194
+ h1 = "Repertoire Overlaps",
195
+ h2 = ifelse(name == "DEFAULT", "#", name),
196
+ h3 = "Repertoire Overlap Analysis",
197
+ ui = "table_of_images"
198
+ )
199
+
138
200
  }
139
201
  }
140
202
  }
@@ -45,7 +45,7 @@ if (is.null(spects$cases) || length(spects$cases) == 0) {
45
45
  do_one_case_spectratyping = function(name, case, spect_dir) {
46
46
  # print(paste0(" Case: ", name))
47
47
  log_info("- Processing case: {name} ...")
48
- odir = file.path(spect_dir, name)
48
+ odir = file.path(spect_dir, slugify(name, tolower = FALSE))
49
49
  dir.create(odir, showWarnings = FALSE)
50
50
 
51
51
  if (!is.null(case$subset)) {
@@ -62,17 +62,33 @@ do_one_case_spectratyping = function(name, case, spect_dir) {
62
62
  .quant = case$quant,
63
63
  .col = case$col
64
64
  )
65
+ spectfile = file.path(odir, paste0(slugify(sample, tolower = FALSE), ".spect"))
65
66
  png(
66
- file.path(odir, paste0(sample, ".png")),
67
+ spectfile,
67
68
  res = case$devpars$res,
68
69
  width = case$devpars$width,
69
70
  height = case$devpars$height
70
71
  )
71
72
  print(vis(spec_obj))
72
73
  dev.off()
74
+
75
+ add_report(
76
+ list(src = spectfile, name = sample),
77
+ h1 = "Spectratyping",
78
+ h2 = name,
79
+ ui = "table_of_images"
80
+ )
73
81
  }
74
82
  }
75
83
 
84
+ add_report(
85
+ list(
86
+ kind = "descr",
87
+ content = "Spectratype is a useful way to represent distributions of genes per sequence length."
88
+ ),
89
+ h1 = "Spectratyping"
90
+ )
91
+
76
92
  spect_dir = file.path(outdir, "spectratyping")
77
93
  dir.create(spect_dir, showWarnings = FALSE)
78
94
 
@@ -86,10 +86,33 @@ run_tracking_case = function(casename) {
86
86
  imm_tracking = trackClonotypes(newdata, targets, .col = "aa")
87
87
  }
88
88
 
89
- tracking_png = file.path(tracking_dir, paste0(casename, ".png"))
89
+ tracking_png = file.path(tracking_dir, paste0(slugify(casename), ".png"))
90
90
  png(tracking_png, res=100, height=1000, width=600 + 150 * length(subjects))
91
91
  print(vis(imm_tracking))
92
92
  dev.off()
93
+
94
+ add_report(
95
+ list(
96
+ kind = "descr",
97
+ content = paste0(
98
+ "Clonotype tracking is a popular approach to monitor changes in the frequency of ",
99
+ "clonotypes of interest in vaccination and cancer immunology. ",
100
+ "For example, a researcher can track a clonotype across different time points ",
101
+ "in pre- and post-vaccination repertoires, or analyse the growth of ",
102
+ "malignant clonotypes in a tumor sample."
103
+ )
104
+ ),
105
+ h1 = "Tracking of clonotypes"
106
+ )
107
+
108
+ add_report(
109
+ list(
110
+ src = tracking_png,
111
+ name = if (casename == "DEFAULT") NULL else casename
112
+ ),
113
+ h1 = "Tracking of clonotypes",
114
+ ui = "table_of_images"
115
+ )
93
116
  }
94
117
  }
95
118
 
@@ -48,7 +48,7 @@ dir.create(vjjunc_dir, showWarnings = FALSE)
48
48
 
49
49
  do_one_case_vjjunc <- function(name, case) {
50
50
  log_info("Processing case: {name} ...")
51
- odir = file.path(vjjunc_dir, name)
51
+ odir = file.path(vjjunc_dir, slugify(name, tolower = FALSE))
52
52
  dir.create(odir, showWarnings = FALSE)
53
53
 
54
54
  if (!is.null(case$subset)) {
@@ -76,7 +76,7 @@ do_one_case_vjjunc <- function(name, case) {
76
76
  filter(!is.na(V.name) & !is.na(J.name) & V.name != "None" & J.name != "None") %>%
77
77
  arrange(V.name, J.name)
78
78
 
79
- figfile <- file.path(odir, paste0(by_name, ".png"))
79
+ figfile <- file.path(odir, paste0(slugify(by_name), ".png"))
80
80
  png(figfile, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
81
81
  chordDiagram(
82
82
  gsd,
@@ -96,8 +96,23 @@ do_one_case_vjjunc <- function(name, case) {
96
96
  }, bg.border = NA) # here set bg.border to NA is important
97
97
  dev.off()
98
98
 
99
+ add_report(
100
+ list(src = figfile, name = by_name),
101
+ h1 = "V-J Junction Circos Plots",
102
+ h2 = ifelse(name == "DEFAULT", "#" , name),
103
+ ui = "table_of_images"
104
+ )
105
+
99
106
  NULL
100
107
  })
101
108
  }
102
109
 
110
+ add_report(
111
+ list(
112
+ kind = "descr",
113
+ content = "V-J usage plot displaying the frequency of various V-J junctions."
114
+ ),
115
+ h1 = "V-J Junction Circos Plots"
116
+ )
117
+
103
118
  sapply(names(cases), function(name) do_one_case_vjjunc(name, cases[[name]]))
@@ -12,6 +12,7 @@ library(glue)
12
12
  library(tidyr)
13
13
  library(tibble)
14
14
  library(logger)
15
+ library(slugify)
15
16
 
16
17
  log_info("Loading arguments ...")
17
18
  theme_set(theme_prism())
@@ -19,12 +20,16 @@ theme_set(theme_prism())
19
20
  immfile = {{ in.immdata | r }}
20
21
  metafile = {{ in.metafile | r }}
21
22
  outdir = {{ out.outdir | r }}
23
+ joboutdir = {{ job.outdir | r }}
22
24
  mutaters = {{ envs.mutaters | r }}
23
25
  prefix = {{ envs.prefix | r }}
24
26
 
25
27
  log_info("Loading immdata ...")
26
28
  immdata = readRDS(immfile)
27
29
 
30
+ if (is.null(prefix)) { prefix = immdata$prefix }
31
+ if (is.null(prefix)) { prefix = "" }
32
+
28
33
  log_info("Expanding immdata ...")
29
34
  exdata = expand_immdata(immdata)
30
35
 
@@ -101,3 +106,5 @@ n_samples = length(immdata$data)
101
106
  # VJ junction #
102
107
  ######################
103
108
  {% include biopipen_dir + "/scripts/tcr/Immunarch-vjjunc.R" %}
109
+
110
+ save_report(joboutdir)
@@ -1,4 +1,5 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/single_cell.R")
2
3
 
3
4
  # Loading 10x data into immunarch
4
5
  library(immunarch)
@@ -13,7 +14,8 @@ rdsfile = {{ out.rdsfile | quote }}
13
14
  metatxt = {{ out.metatxt | quote }}
14
15
  tmpdir = {{ envs.tmpdir | quote }}
15
16
  mode = {{ envs.mode | quote }}
16
- metacols = {{ envs.metacols | r}}
17
+ extracols = {{ envs.extracols | r}}
18
+ prefix = {{ envs.prefix | r }}
17
19
 
18
20
  metadata = read.table(
19
21
  metafile,
@@ -164,27 +166,24 @@ immdata$meta = left_join(
164
166
  by = "Sample"
165
167
  )
166
168
 
167
- saveRDS(immdata, file=rdsfile)
168
-
169
- metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
170
- # Clones Proportion CDR3.aa Barcode
171
- # 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
172
- # 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
173
- cldata = immdata$data[[i]][, unique(c(metacols, "Barcode"))]
174
- # # A tibble: 4 × 5
175
- # Sample Patient Timepoint Tissue
176
- # <chr> <chr> <chr> <chr>
177
- # 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
178
- mdata = as.list(immdata$meta[i, , drop=FALSE])
179
- for (mname in names(mdata)) {
180
- assign(mname, mdata[[mname]])
181
- }
169
+ immdata$prefix = prefix
182
170
 
183
- cldata %>%
184
- separate_rows(Barcode, sep=";") %>%
185
- distinct(Barcode, .keep_all = TRUE) %>%
186
- mutate(Barcode = glue("{{envs.prefix}}{Barcode}")) %>%
187
- column_to_rownames("Barcode")
171
+ saveRDS(immdata, file=rdsfile)
188
172
 
189
- }))
190
- write.table(metadf, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
173
+ exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
174
+ distinct(Sample, Barcode, .keep_all = TRUE) %>%
175
+ mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
176
+ select(any_of(c(
177
+ colnames(immdata$meta),
178
+ "Barcode",
179
+ "CDR3.aa",
180
+ "Clones",
181
+ "Proportion",
182
+ "V.name",
183
+ "D.name",
184
+ "J.name",
185
+ extracols
186
+ ))) %>%
187
+ column_to_rownames("Barcode")
188
+
189
+ write.table(exdata, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)