biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +77 -26
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +22 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -139
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/CellsDistribution.R +31 -6
  59. biopipen/scripts/scrna/MarkersFinder.R +272 -602
  60. biopipen/scripts/scrna/MetaMarkers.R +16 -7
  61. biopipen/scripts/scrna/RadarPlots.R +75 -35
  62. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  63. biopipen/scripts/scrna/ScVelo.py +0 -0
  64. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
  65. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
  66. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
  67. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
  68. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
  69. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  70. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
  71. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  72. biopipen/scripts/scrna/SlingShot.R +71 -0
  73. biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
  74. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  75. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  76. biopipen/scripts/snp/PlinkFilter.py +7 -7
  77. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  78. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  79. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  80. biopipen/scripts/stats/ChowTest.R +48 -22
  81. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  82. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  83. biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
  84. biopipen/scripts/tcr/ClonalStats.R +484 -0
  85. biopipen/scripts/tcr/CloneResidency.R +23 -5
  86. biopipen/scripts/tcr/Immunarch-basic.R +8 -1
  87. biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
  88. biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
  89. biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
  90. biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
  91. biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
  92. biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
  93. biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
  94. biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
  95. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  96. biopipen/scripts/tcr/TCRClusterStats.R +24 -7
  97. biopipen/scripts/tcr/TCRDock.py +10 -6
  98. biopipen/scripts/tcr/TESSA.R +6 -1
  99. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  100. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  101. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  102. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  103. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  104. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  105. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  106. biopipen/scripts/vcf/VcfAnno.py +11 -11
  107. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  108. biopipen/scripts/vcf/VcfFilter.py +5 -5
  109. biopipen/scripts/vcf/VcfFix.py +7 -7
  110. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  111. biopipen/scripts/vcf/VcfIndex.py +3 -3
  112. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  113. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  114. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  115. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  116. biopipen/scripts/web/Download.py +8 -4
  117. biopipen/scripts/web/DownloadList.py +5 -5
  118. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  119. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  120. biopipen/scripts/web/gcloud_common.py +1 -1
  121. biopipen/utils/gsea.R +96 -42
  122. biopipen/utils/misc.R +205 -7
  123. biopipen/utils/misc.py +17 -8
  124. biopipen/utils/plot.R +53 -17
  125. biopipen/utils/reference.py +11 -11
  126. biopipen/utils/repr.R +146 -0
  127. biopipen/utils/vcf.py +1 -1
  128. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
  129. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
  130. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  131. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
  132. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
  133. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
  134. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
@@ -353,7 +353,14 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
353
353
  width = width * case$ncol
354
354
  }
355
355
  }
356
- png(file.path(ddir, "diversity.png"), width = width, height = height, res = res)
356
+
357
+ div_plot = file.path(ddir, "diversity.png")
358
+ png(div_plot, width = width, height = height, res = res)
359
+ print(p)
360
+ dev.off()
361
+
362
+ div_plot_pdf = file.path(ddir, "diversity.pdf")
363
+ pdf(div_plot_pdf, width = width / res, height = height / res)
357
364
  print(p)
358
365
  dev.off()
359
366
 
@@ -407,7 +414,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
407
414
  add_report(
408
415
  list(
409
416
  name = "Diversity Plot",
410
- contents = list(list(kind = "image", src = file.path(ddir, "diversity.png")))
417
+ contents = list(list(kind = "image", src = div_plot, download = div_plot_pdf))
411
418
  ),
412
419
  list(
413
420
  name = "Diversity Table",
@@ -559,6 +566,10 @@ run_raref_single = function(d, case, ddir, suffix = "", save_p = TRUE) {
559
566
  png(file.path(ddir, "raref.png"), width = devpars$width, height = devpars$height, res = devpars$res)
560
567
  print(p)
561
568
  dev.off()
569
+
570
+ pdf(file.path(ddir, "raref.pdf"), width = devpars$width / devpars$res, height = devpars$height / devpars$res)
571
+ print(p)
572
+ dev.off()
562
573
  } else {
563
574
  return (list(p = p, width = devpars$width))
564
575
  }
@@ -628,6 +639,14 @@ run_raref_multi = function(d, case, ddir) {
628
639
  )
629
640
  print(p)
630
641
  dev.off()
642
+
643
+ pdf(
644
+ file.path(ddir, paste0("raref-", slugify(case$separate_by), ".pdf")),
645
+ width = width / res,
646
+ height = height / res
647
+ )
648
+ print(p)
649
+ dev.off()
631
650
  }
632
651
 
633
652
  # Run the diversity estimation for one case
@@ -673,7 +692,8 @@ run_div_case = function(casename) {
673
692
  add_report(
674
693
  list(
675
694
  kind = "image",
676
- src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png"))
695
+ src = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".png")),
696
+ download = file.path(ddir, paste0("raref-", slugify(case$separate_by), ".pdf"))
677
697
  ),
678
698
  h1 = "Rarefraction",
679
699
  h2 = casename
@@ -683,7 +703,8 @@ run_div_case = function(casename) {
683
703
  add_report(
684
704
  list(
685
705
  kind = "image",
686
- src = file.path(ddir, "raref.png")
706
+ src = file.path(ddir, "raref.png"),
707
+ download = file.path(ddir, "raref.pdf")
687
708
  ),
688
709
  h1 = "Rarefraction",
689
710
  h2 = casename
@@ -126,10 +126,16 @@ do_one_case_geneusage = function(name, case, gu_dir) {
126
126
  print(p + scale_fill_biopipen())
127
127
  dev.off()
128
128
 
129
+ ofig_pdf = file.path(odir, paste0(name, ".pdf"))
130
+ pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
131
+ print(p + scale_fill_biopipen())
132
+ dev.off()
133
+
129
134
  add_report(
130
135
  list(
131
136
  kind = "table_image",
132
137
  src = ofig,
138
+ download = ofig_pdf,
133
139
  descr = paste0(
134
140
  "Distribution of known gene segments following the ",
135
141
  '<a href="http://www.imgt.org/IMGTrepertoire/LocusGenes/" target="_blank">IMGT</a> ',
@@ -165,15 +171,23 @@ do_one_case_geneusage = function(name, case, gu_dir) {
165
171
  ap = do_call(vis, avis_args)
166
172
  if (aname == "DEFAULT") {
167
173
  aofig = file.path(odir, paste0(name, "-analysis.png"))
174
+ aofig_pdf = file.path(odir, paste0(name, "-analysis.pdf"))
168
175
  } else {
169
176
  aofig = file.path(odir, paste0(name, "-", aname, "-analysis.png"))
177
+ aofig_pdf = file.path(odir, paste0(name, "-", aname, "-analysis.pdf"))
170
178
  }
171
179
  png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
172
180
  print(ap)
173
181
  dev.off()
174
182
 
183
+ pdf(aofig_pdf,
184
+ width = case$analyses$cases[[aname]]$devpars$width / case$analyses$cases[[aname]]$devpars$res,
185
+ height = case$analyses$cases[[aname]]$devpars$height / case$analyses$cases[[aname]]$devpars$res)
186
+ print(ap)
187
+ dev.off()
188
+
175
189
  add_report(
176
- list(src = aofig, name = aname),
190
+ list(src = aofig, name = aname, download = aofig_pdf),
177
191
  h1 = "Gene Usage",
178
192
  h2 = ifelse(name == "DEFAULT", "#", name),
179
193
  h3 = "Gene Usage Analysis",
@@ -105,6 +105,11 @@ do_one_case_kmer = function(name, case, kmer_dir) {
105
105
  print(p)
106
106
  dev.off()
107
107
 
108
+ ofig_pdf = file.path(odir, "Allsamples.pdf")
109
+ pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
110
+ print(p)
111
+ dev.off()
112
+
108
113
  add_report(
109
114
  list(
110
115
  kind = "descr",
@@ -116,7 +121,7 @@ do_one_case_kmer = function(name, case, kmer_dir) {
116
121
  )
117
122
 
118
123
  add_report(
119
- list(kind = "image", src = ofig),
124
+ list(kind = "image", src = ofig, download = ofig_pdf),
120
125
  h1 = "Kmer and sequence motif analysis",
121
126
  h2 = ifelse(name == "DEFAULT", "#", name),
122
127
  h3 = "Kmer sequence occurrences"
@@ -150,9 +155,17 @@ do_one_case_kmer = function(name, case, kmer_dir) {
150
155
  print(ap)
151
156
  dev.off()
152
157
 
158
+ aofig_pdf = gsub(".png$", ".pdf", aofig)
159
+ pdf(aofig_pdf,
160
+ width = case$profiles$cases[[aname]]$devpars$width / case$profiles$cases[[aname]]$devpars$res,
161
+ height = case$profiles$cases[[aname]]$devpars$height / case$profiles$cases[[aname]]$devpars$res)
162
+ print(ap)
163
+ dev.off()
164
+
153
165
  add_report(
154
166
  list(
155
167
  src = aofig,
168
+ download = aofig_pdf,
156
169
  name = paste0(sample, ifelse(aname == "DEFAULT", "", paste0(" - ", aname)))
157
170
  ),
158
171
  h1 = "Kmer and sequence motif analysis",
@@ -138,10 +138,16 @@ do_one_case_overlap = function(name, case, ov_dir) {
138
138
  print(p)
139
139
  dev.off()
140
140
 
141
+ ofig_pdf = file.path(odir, paste0(name, ".pdf"))
142
+ pdf(ofig_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
143
+ print(p)
144
+ dev.off()
145
+
141
146
  add_report(
142
147
  list(
143
148
  kind = "table_image",
144
149
  src = ofig,
150
+ download = ofig_pdf,
145
151
  descr = paste0(
146
152
  "Repertoire overlap is the most common approach to measure repertoire similarity, ",
147
153
  "using method <code>", case$method, "</code>, ",
@@ -179,15 +185,23 @@ do_one_case_overlap = function(name, case, ov_dir) {
179
185
  ap = do_call(vis, avis_args)
180
186
  if (aname == "DEFAULT") {
181
187
  aofig = file.path(odir, paste0(name, "-analysis.png"))
188
+ aofig_pdf = file.path(odir, paste0(name, "-analysis.pdf"))
182
189
  } else {
183
190
  aofig = file.path(odir, paste0(name, "-", aname, "-analysis.png"))
191
+ aofig_pdf = file.path(odir, paste0(name, "-", aname, "-analysis.pdf"))
184
192
  }
185
193
  png(aofig, width = case$analyses$cases[[aname]]$devpars$width, height = case$analyses$cases[[aname]]$devpars$height, res = case$analyses$cases[[aname]]$devpars$res)
186
194
  print(ap)
187
195
  dev.off()
188
196
 
197
+ pdf(aofig_pdf,
198
+ width = case$analyses$cases[[aname]]$devpars$width / case$analyses$cases[[aname]]$devpars$res,
199
+ height = case$analyses$cases[[aname]]$devpars$height / case$analyses$cases[[aname]]$devpars$res)
200
+ print(ap)
201
+ dev.off()
202
+
189
203
  add_report(
190
- list(src = aofig, name = aname),
204
+ list(src = aofig, name = aname, download = aofig_pdf),
191
205
  h1 = "Repertoire Overlaps",
192
206
  h2 = ifelse(name == "DEFAULT", "#", name),
193
207
  h3 = "Repertoire Overlap Analysis",
@@ -69,8 +69,17 @@ do_one_case_spectratyping = function(name, case, spect_dir) {
69
69
  print(vis(spec_obj))
70
70
  dev.off()
71
71
 
72
+ spectfile_pdf = file.path(odir, paste0(slugify(sample), ".spect.pdf"))
73
+ pdf(
74
+ spectfile_pdf,
75
+ width = case$devpars$width / case$devpars$res,
76
+ height = case$devpars$height / case$devpars$res
77
+ )
78
+ print(vis(spec_obj))
79
+ dev.off()
80
+
72
81
  add_report(
73
- list(src = spectfile, name = sample),
82
+ list(src = spectfile, name = sample, download = spectfile_pdf),
74
83
  h1 = "Spectratyping",
75
84
  h2 = name,
76
85
  ui = "table_of_images"
@@ -88,6 +88,11 @@ run_tracking_case = function(casename) {
88
88
  print(vis(imm_tracking))
89
89
  dev.off()
90
90
 
91
+ tracking_pdf = file.path(tracking_dir, paste0(slugify(casename), ".pdf"))
92
+ pdf(tracking_pdf, height=10, width=6 + 1.5 * length(subjects))
93
+ print(vis(imm_tracking))
94
+ dev.off()
95
+
91
96
  add_report(
92
97
  list(
93
98
  kind = "descr",
@@ -105,6 +110,7 @@ run_tracking_case = function(casename) {
105
110
  add_report(
106
111
  list(
107
112
  src = tracking_png,
113
+ download = tracking_pdf,
108
114
  name = if (casename == "DEFAULT") NULL else casename
109
115
  ),
110
116
  h1 = "Tracking of clonotypes",
@@ -110,7 +110,40 @@ do_one_case_vjjunc <- function(name, case) {
110
110
  }, bg.border = NA) # here set bg.border to NA is important
111
111
  dev.off()
112
112
 
113
+ # figfile_pdf <- file.path(odir, paste0(slugify(by_name), ".pdf"))
114
+ # png(figfile_pdf, width = case$devpars$width / case$devpars$res, height = case$devpars$height / case$devpars$res)
115
+ # circos.clear()
116
+ # tryCatch({
117
+ # chordDiagram(
118
+ # gsd,
119
+ # annotationTrack = c("grid", "axis"),
120
+ # preAllocateTracks = list(track.height = 0.25)
121
+ # )
122
+ # }, error = function(e) {
123
+ # log_warn("Error encountered: {e$message}, setting gap.after ...")
124
+ # circos.par(gap.after = c(rep(1, nrow(gsd) - 1), 5, rep(1, nrow(gsd) - 1), 5))
125
+ # chordDiagram(
126
+ # gsd,
127
+ # annotationTrack = c("grid", "axis"),
128
+ # preAllocateTracks = list(track.height = 0.25)
129
+ # )
130
+
131
+ # })
132
+ # circos.track(track.index = 1, panel.fun = function(x, y) {
133
+ # circos.text(
134
+ # CELL_META$xcenter,
135
+ # CELL_META$ylim[1],
136
+ # CELL_META$sector.index,
137
+ # cex = .8,
138
+ # facing = "clockwise",
139
+ # niceFacing = TRUE,
140
+ # adj = c(-0.2, 0.5)
141
+ # )
142
+ # }, bg.border = NA) # here set bg.border to NA is important
143
+ # dev.off()
144
+
113
145
  add_report(
146
+ # list(src = figfile, name = by_name, download = figfile_pdf),
114
147
  list(src = figfile, name = by_name),
115
148
  h1 = "V-J Junction Circos Plots",
116
149
  h2 = ifelse(name == "DEFAULT", "#" , name),
@@ -0,0 +1,127 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
3
+ library(rlang)
4
+ library(bracer)
5
+ library(scRepertoire)
6
+
7
+ metafile <- {{in.metafile | quote}}
8
+ outfile <- {{out.outfile | quote}}
9
+ combineTCR_args <- {{envs.combineTCR | r}}
10
+ exclude <- {{envs.exclude | r}}
11
+ if (length(exclude) == 1) {
12
+ exclude <- strsplit(exclude, ",")[[1]]
13
+ }
14
+
15
+ log_info("Loading metadata ...")
16
+ metadata <- read.table(metafile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
17
+ stopifnot("Error: Column `Sample` is not found in metafile." = "Sample" %in% colnames(metadata))
18
+ stopifnot("Error: Column `TCRData` is not found in metafile." = "TCRData" %in% colnames(metadata))
19
+ rownames(metadata) <- metadata$Sample
20
+
21
+ # helper function
22
+ get_contig_annofile <- function(dir, sample, warn = TRUE) {
23
+ if (is.na(dir) || !is.character(dir) || nchar(dir) == 0 || dir == "NA") {
24
+ warning(paste0("No path found for sample: ", sample), immediate. = TRUE)
25
+ return (NULL)
26
+ }
27
+ if (file.exists(dir) && !dir.exists(dir)) {
28
+ return(dir)
29
+ }
30
+
31
+ annofilepat <- paste0("*", "{all,filtered}", "_contig_annotations.csv*") # .gz
32
+ annofiles <- glob(file.path(as.character(dir), annofilepat))
33
+ if (length(annofiles) == 0) {
34
+ stop(
35
+ "Cannot find neither `filtered_contig_annotations.csv[.gz]` nor",
36
+ "`all_contig_annotations.csv[.gz]` in given TCRData for sample: ",
37
+ sample
38
+ )
39
+ }
40
+ if (length(annofiles) > 1) {
41
+ if (warn) {
42
+ warning("Found more than one file in given TCRData for sample: ", sample, immediate. = TRUE)
43
+ }
44
+ for (annofile in annofiles) {
45
+ # use filtered if both filtered_ and all_ are found
46
+ if (grepl("filtered", annofile)) {
47
+ annofiles <- annofile
48
+ break
49
+ }
50
+ # give a warning if only all_ is found
51
+ if (warn) {
52
+ warning("Using all_contig_annotations as filtred_config_annotations not found ",
53
+ "in given TCRData for sample: ", sample,
54
+ immediate. = TRUE
55
+ )
56
+ }
57
+ }
58
+ }
59
+ annofiles[1]
60
+ }
61
+
62
+ # for (i in seq_len(nrow(metadata))) {
63
+ # sample <- as.character(metadata$Sample[i])
64
+ # annofile <- get_contig_annofile(metadata$TCRData[i], sample)
65
+ # if (is.null(annofile)) { next }
66
+
67
+ # anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
68
+ # # Add cdr1, cdr2, fwr1, fwr2, etc columns
69
+ # anno$cdr1 <- anno$cdr1 %||% ""
70
+ # anno$cdr1_nt <- anno$cdr1_nt %||% ""
71
+ # anno$cdr2 <- anno$cdr2 %||% ""
72
+ # anno$cdr2_nt <- anno$cdr2_nt %||% ""
73
+ # anno$fwr1 <- anno$fwr1 %||% ""
74
+ # anno$fwr1_nt <- anno$fwr1_nt %||% ""
75
+ # anno$fwr2 <- anno$fwr2 %||% ""
76
+ # anno$fwr2_nt <- anno$fwr2_nt %||% ""
77
+ # anno$fwr3 <- anno$fwr3 %||% ""
78
+ # anno$fwr3_nt <- anno$fwr3_nt %||% ""
79
+ # anno$fwr4 <- anno$fwr4 %||% ""
80
+ # anno$fwr4_nt <- anno$fwr4_nt %||% ""
81
+
82
+ # annotfile = file.path(datadir, paste0(sample, ".csv"))
83
+ # write.table(anno, annotfile, sep = ",", quote = FALSE, row.names = FALSE, col.names = TRUE)
84
+ # }
85
+
86
+ log_info("Reading TCR data ...")
87
+ contig_list <- lapply(seq_len(nrow(metadata)), function(i) {
88
+ sample <- as.character(metadata$Sample[i])
89
+ annofile <- get_contig_annofile(metadata$TCRData[i], sample)
90
+ if (is.null(annofile)) { return (NULL) }
91
+
92
+ log_info("- Sample: {sample} ...")
93
+ anno <- read.delim2(annofile, sep = ",", header = TRUE, stringsAsFactors = FALSE)
94
+ # Add cdr1, cdr2, fwr1, fwr2, etc columns for compatibility
95
+ anno$cdr1 <- anno$cdr1 %||% ""
96
+ anno$cdr1_nt <- anno$cdr1_nt %||% ""
97
+ anno$cdr2 <- anno$cdr2 %||% ""
98
+ anno$cdr2_nt <- anno$cdr2_nt %||% ""
99
+ anno$fwr1 <- anno$fwr1 %||% ""
100
+ anno$fwr1_nt <- anno$fwr1_nt %||% ""
101
+ anno$fwr2 <- anno$fwr2 %||% ""
102
+ anno$fwr2_nt <- anno$fwr2_nt %||% ""
103
+ anno$fwr3 <- anno$fwr3 %||% ""
104
+ anno$fwr3_nt <- anno$fwr3_nt %||% ""
105
+ anno$fwr4 <- anno$fwr4 %||% ""
106
+ anno$fwr4_nt <- anno$fwr4_nt %||% ""
107
+
108
+ anno
109
+ })
110
+ names(contig_list) <- as.character(metadata$Sample)
111
+ contig_list <- contig_list[!sapply(contig_list, is.null)]
112
+
113
+ log_info("Combining TCR data and adding meta data ...")
114
+ if (isTRUE(combineTCR_args$samples)) {
115
+ combineTCR_args$samples <- names(contig_list)
116
+ }
117
+ combineTCR_args$input.data <- contig_list
118
+ screp_data <- do_call(combineTCR, combineTCR_args)
119
+ for (col in colnames(metadata)) {
120
+ if (col %in% exclude) { next }
121
+ screp_data <- addVariable(screp_data, col, metadata[names(screp_data), col])
122
+ }
123
+
124
+ rm(contig_list, combineTCR_args)
125
+
126
+ log_info("Saving TCR data ...")
127
+ saveRDS(screp_data, outfile)
@@ -69,6 +69,7 @@ cluster_size_distribution = function(name) {
69
69
 
70
70
  outfile = file.path(odir, "cluster_size_distribution.txt")
71
71
  outplot = file.path(odir, "cluster_size_distribution.png")
72
+ outplot_pdf = file.path(odir, "cluster_size_distribution.pdf")
72
73
  write.table(clsizes, outfile, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
73
74
 
74
75
  plotGG(
@@ -82,14 +83,15 @@ cluster_size_distribution = function(name) {
82
83
  "scale_fill_biopipen()"
83
84
  ),
84
85
  devpars = case$devpars,
85
- outfile = outplot
86
+ outfile = c(outplot, outplot_pdf)
86
87
  )
87
88
 
88
89
  add_report(
89
90
  list(
90
91
  src = outplot,
91
92
  name = ifelse(name == "DEFAULT", FALSE, name),
92
- descr = paste0("Cluster size distribution for each ", case$by)
93
+ descr = paste0("Cluster size distribution for each ", case$by),
94
+ download = outplot_pdf
93
95
  ),
94
96
  ui = "table_of_images",
95
97
  h1 = "Cluster Size Distribution"
@@ -162,6 +164,8 @@ shared_clusters = function(name) {
162
164
  row_samples = samples
163
165
  }
164
166
 
167
+ hmplot = file.path(odir, "shared_clusters.png")
168
+ hmplot_pdf = file.path(odir, "shared_clusters.pdf")
165
169
  # Plot heatmap
166
170
  plotHeatmap(
167
171
  plotdata,
@@ -178,12 +182,13 @@ shared_clusters = function(name) {
178
182
  }
179
183
  ),
180
184
  devpars = case$devpars,
181
- outfile = file.path(odir, "shared_clusters.png")
185
+ outfile = c(hmplot, hmplot_pdf)
182
186
  )
183
187
 
184
188
  add_report(
185
189
  list(
186
- src = file.path(odir, "shared_clusters.png"),
190
+ src = hmplot,
191
+ download = hmplot_pdf,
187
192
  name = ifelse(name == "DEFAULT", FALSE, name),
188
193
  descr = paste0("Shared TCR clusters across samples")
189
194
  ),
@@ -219,16 +224,18 @@ shared_clusters_by_grouping = function(name) {
219
224
  }
220
225
 
221
226
  outfile = file.path(odir, "shared_clusters.png")
227
+ outfile_pdf = file.path(odir, "shared_clusters.pdf")
222
228
  plotVenn(
223
229
  data,
224
230
  ggs = 'ggtitle("Shared TCR Clusters")',
225
231
  devpars = case$devpars,
226
- outfile = outfile
232
+ outfile = c(outfile, outfile_pdf)
227
233
  )
228
234
 
229
235
  add_report(
230
236
  list(
231
237
  src = outfile,
238
+ download = outfile_pdf,
232
239
  name = ifelse(name == "DEFAULT", FALSE, name),
233
240
  descr = paste0("Shared TCR clusters across ", grouping)
234
241
  ),
@@ -275,6 +282,7 @@ sample_diversity = function(name) {
275
282
  }
276
283
  outfile = file.path(odir, "diversity.txt")
277
284
  outplot = file.path(odir, "diversity.png")
285
+ outplot_pdf = file.path(odir, "diversity.pdf")
278
286
  div = repDiversity(data, .method = case$method)
279
287
  write.table(
280
288
  if (ncol(div) == 1) {
@@ -320,7 +328,7 @@ sample_diversity = function(name) {
320
328
  args = list(mapping = mapping),
321
329
  ggs = ggs,
322
330
  devpars = case$devpars,
323
- outfile = outplot
331
+ outfile = c(outplot, outplot_pdf)
324
332
  )
325
333
  } else {
326
334
  if (is.null(case$by) || length(case$by) == 0) {
@@ -338,6 +346,14 @@ sample_diversity = function(name) {
338
346
  )
339
347
  print(p)
340
348
  dev.off()
349
+
350
+ pdf(
351
+ outplot_pdf,
352
+ width=case$devpars$width / case$devpars$res,
353
+ height=case$devpars$height / case$devpars$res
354
+ )
355
+ print(p)
356
+ dev.off()
341
357
  }
342
358
 
343
359
  add_report(
@@ -359,7 +375,8 @@ sample_diversity = function(name) {
359
375
  ),
360
376
  list(
361
377
  kind = "image",
362
- src = outplot
378
+ src = outplot,
379
+ download = outplot_pdf
363
380
  )
364
381
  )
365
382
  ),
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
2
4
  import sys
3
5
  from pathlib import Path
@@ -7,10 +9,10 @@ import pandas as pd
7
9
  from tempfile import gettempdir
8
10
  from biopipen.utils.misc import logger, run_command
9
11
 
10
- configfile = {{in.configfile | repr}} # pyright: ignore
11
- outdir = Path({{out.outdir | repr}}) # pyright: ignore
12
- envs = {{envs | dict | repr}} # pyright: ignore
13
- python = sys.executable
12
+ configfile: str = {{in.configfile | quote}} # pyright: ignore # noqa
13
+ outdir = Path({{out.outdir | quote}}) # pyright: ignore
14
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
15
+ python: str | list[str] = sys.executable
14
16
 
15
17
  args = envs.copy()
16
18
  config = rtoml.load(Path(configfile))
@@ -18,8 +20,8 @@ args.update(config)
18
20
  model_name = args.pop("model_name")
19
21
  model_file = Path(args.pop("model_file"))
20
22
  data_dir = args.pop("data_dir", None)
21
- tcrdock = args.pop("tcrdock", None)
22
- tmpdir = args.pop("tmpdir", gettempdir())
23
+ tcrdock: Path | str | None = args.pop("tcrdock", None)
24
+ tmpdir: str = args.pop("tmpdir", gettempdir())
23
25
  python = args.pop("python", python)
24
26
 
25
27
  if not isinstance(python, (list, tuple)):
@@ -46,6 +48,8 @@ if not tcrdock:
46
48
  ]
47
49
  run_command(cmd, fg=True, cwd=str(tcrdock))
48
50
 
51
+ tcrdock = str(tcrdock)
52
+
49
53
  if not model_file.is_absolute():
50
54
  model_file = Path(data_dir) / "params" / model_file
51
55
 
@@ -198,10 +198,15 @@ png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in
198
198
  print(p)
199
199
  dev.off()
200
200
 
201
+ pdf(file.path(result_dir, "Cluster_size_dist.pdf"), width=8, height=8)
202
+ print(p)
203
+ dev.off()
204
+
201
205
  add_report(
202
206
  list(
203
207
  src = file.path(result_dir, "Cluster_size_dist.png"),
204
- descr = "Histogram of cluster size distribution"
208
+ descr = "Histogram of cluster size distribution",
209
+ download = file.path(result_dir, "Cluster_size_dist.pdf")
205
210
  ),
206
211
  list(
207
212
  src = file.path(result_dir, "clone_size.png"),
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
 
3
3
  # run the command and capture the stdout
4
- out=$(command $@)
4
+ out=$(command "$@")
5
5
 
6
6
  echo "$out"
7
7
 
@@ -6,11 +6,11 @@ from biopipen.utils.reference import tabix_index
6
6
  from biopipen.utils.misc import logger
7
7
  from biopipen.scripts.vcf.bcftools_utils import run_bcftools
8
8
 
9
- infile = {{in.infile | repr}} # pyright: ignore # noqa: E999
10
- annfile = {{in.annfile | repr}} # pyright: ignore
11
- outfile = {{out.outfile | repr}} # pyright: ignore
12
- joboutdir = {{job.outdir | repr}} # pyright: ignore
13
- envs = {{envs | dict | repr}} # pyright: ignore
9
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa: E999
10
+ annfile: str = {{in.annfile | quote}} # pyright: ignore
11
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
12
+ joboutdir: str = {{job.outdir | quote}} # pyright: ignore
13
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
14
14
 
15
15
  bcftools = envs.pop("bcftools")
16
16
  tabix = envs.pop("tabix")
@@ -25,14 +25,14 @@ if isinstance(columns, list):
25
25
  columns = ",".join(columns)
26
26
 
27
27
  if "c" in envs:
28
- logger.warning("Ignoring envs\[c], use envs\[columns] instead.")
28
+ logger.warning(r"Ignoring envs\[c], use envs\[columns] instead.")
29
29
  del envs["c"]
30
30
 
31
31
  if isinstance(remove, list):
32
32
  remove = ",".join(remove)
33
33
 
34
34
  if "x" in envs:
35
- logger.warning("Ignoring envs\[x], use envs\[remove] instead.")
35
+ logger.warning(r"Ignoring envs\[x], use envs\[remove] instead.")
36
36
  del envs["x"]
37
37
 
38
38
  envs_has_annfile = "a" in envs or "annotations" in envs
@@ -43,7 +43,7 @@ if header:
43
43
 
44
44
  if annfile and envs_has_annfile:
45
45
  logger.warning(
46
- "Ignoring envs\[a/annotations] because in.annfile is provided."
46
+ r"Ignoring envs\[a/annotations] because in.annfile is provided."
47
47
  )
48
48
  with suppress(KeyError):
49
49
  del envs["a"]
@@ -3,11 +3,11 @@ from pathlib import Path, PosixPath # noqa: F401
3
3
  from biopipen.utils.misc import logger
4
4
  from biopipen.scripts.vcf.bcftools_utils import run_bcftools
5
5
 
6
- infile = {{in.infile | repr}} # pyright: ignore # noqa: #999
7
- outfile = {{out.outfile | repr}} # pyright: ignore
6
+ infile: str | Path = {{in.infile | quote}} # pyright: ignore # noqa: #999
7
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
8
8
  outdir = Path(outfile).parent
9
9
 
10
- envs = {{envs | dict | repr}} # pyright: ignore
10
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
11
11
  bcftools = envs.pop("bcftools")
12
12
  tabix = envs.pop("tabix")
13
13
  keep = envs.pop("keep")
@@ -0,0 +1,31 @@
1
+ from biopipen.utils.reference import tabix_index
2
+ from biopipen.utils.misc import logger
3
+ from biopipen.scripts.vcf.bcftools_utils import run_bcftools
4
+
5
+ infiles: list = {{in.infiles | each: as_path}} # pyright: ignore # noqa: E999
6
+ outfile = {{out.outfile | repr}} # pyright: ignore
7
+ joboutdir = {{job.outdir | repr}} # pyright: ignore
8
+ envs: dict = {{envs | dict | repr}} # pyright: ignore
9
+
10
+ bcftools = envs.pop("bcftools")
11
+ tabix = envs.pop("tabix")
12
+ ncores = envs.pop("ncores")
13
+ gz = envs.pop("gz")
14
+ index = envs.pop("index")
15
+
16
+ envs.setdefault("force-single", True)
17
+ envs.setdefault("missing-to-ref", True)
18
+
19
+ if index and not gz:
20
+ logger.warning("Forcing envs.gz to True because envs.index is True.")
21
+ gz = True
22
+
23
+ if "O" not in envs and "output-type" not in envs and "output_type" not in envs:
24
+ envs["O"] = "z" if gz else "v"
25
+
26
+ envs[""] = [bcftools, "merge"]
27
+ envs["o"] = outfile
28
+ envs["threads"] = ncores
29
+ envs["_"] = infiles
30
+
31
+ run_bcftools(envs, bcftools=bcftools, index=index, tabix=tabix)