biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +77 -26
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +22 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -139
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/CellsDistribution.R +31 -6
  59. biopipen/scripts/scrna/MarkersFinder.R +272 -602
  60. biopipen/scripts/scrna/MetaMarkers.R +16 -7
  61. biopipen/scripts/scrna/RadarPlots.R +75 -35
  62. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  63. biopipen/scripts/scrna/ScVelo.py +0 -0
  64. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
  65. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
  66. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
  67. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
  68. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
  69. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  70. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
  71. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  72. biopipen/scripts/scrna/SlingShot.R +71 -0
  73. biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
  74. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  75. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  76. biopipen/scripts/snp/PlinkFilter.py +7 -7
  77. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  78. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  79. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  80. biopipen/scripts/stats/ChowTest.R +48 -22
  81. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  82. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  83. biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
  84. biopipen/scripts/tcr/ClonalStats.R +484 -0
  85. biopipen/scripts/tcr/CloneResidency.R +23 -5
  86. biopipen/scripts/tcr/Immunarch-basic.R +8 -1
  87. biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
  88. biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
  89. biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
  90. biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
  91. biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
  92. biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
  93. biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
  94. biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
  95. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  96. biopipen/scripts/tcr/TCRClusterStats.R +24 -7
  97. biopipen/scripts/tcr/TCRDock.py +10 -6
  98. biopipen/scripts/tcr/TESSA.R +6 -1
  99. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  100. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  101. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  102. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  103. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  104. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  105. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  106. biopipen/scripts/vcf/VcfAnno.py +11 -11
  107. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  108. biopipen/scripts/vcf/VcfFilter.py +5 -5
  109. biopipen/scripts/vcf/VcfFix.py +7 -7
  110. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  111. biopipen/scripts/vcf/VcfIndex.py +3 -3
  112. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  113. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  114. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  115. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  116. biopipen/scripts/web/Download.py +8 -4
  117. biopipen/scripts/web/DownloadList.py +5 -5
  118. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  119. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  120. biopipen/scripts/web/gcloud_common.py +1 -1
  121. biopipen/utils/gsea.R +96 -42
  122. biopipen/utils/misc.R +205 -7
  123. biopipen/utils/misc.py +17 -8
  124. biopipen/utils/plot.R +53 -17
  125. biopipen/utils/reference.py +11 -11
  126. biopipen/utils/repr.R +146 -0
  127. biopipen/utils/vcf.py +1 -1
  128. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
  129. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
  130. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  131. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
  132. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
  133. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
  134. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
@@ -171,14 +171,17 @@ do_enrich <- function(info, markers, sig) {
171
171
  next
172
172
  }
173
173
 
174
- png(
175
- file.path(info$casedir, paste0("Enrichr-", db, ".png")),
176
- res = 100, height = 600, width = 800
177
- )
178
- print(
179
- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
174
+ p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
180
175
  theme_prism()
181
- )
176
+
177
+ plotfile <- file.path(info$casedir, paste0("Enrichr-", db, ".png"))
178
+ png(plotfile, res = 100, height = 600, width = 800)
179
+ print(p)
180
+ dev.off()
181
+
182
+ plotfile_pdf <- gsub(".png$", ".pdf", plotfile)
183
+ pdf(plotfile_pdf, height = 6, width = 8)
184
+ print(p)
182
185
  dev.off()
183
186
  }
184
187
  }
@@ -292,9 +295,15 @@ do_case <- function(casename) {
292
295
  print(p)
293
296
  dev.off()
294
297
 
298
+ outfile_pdf <- gsub(".png$", ".pdf", outfile)
299
+ pdf(outfile_pdf, height = 6, width = 8)
300
+ print(p)
301
+ dev.off()
302
+
295
303
  geneplots[[length(geneplots) + 1]] <- list(
296
304
  kind = "table_image",
297
305
  src = outfile,
306
+ download = outfile_pdf,
298
307
  name = gene
299
308
  )
300
309
  }
@@ -1,4 +1,5 @@
1
1
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "repr.R" | source_r }}
2
3
 
3
4
  library(Seurat)
4
5
  library(rlang)
@@ -8,6 +9,8 @@ library(tibble)
8
9
  library(ggplot2)
9
10
  library(ggradar)
10
11
  library(ggprism)
12
+ library(glue)
13
+ library(gglogger)
11
14
 
12
15
  # input/output
13
16
  srtfile = {{in.srtobj | r}}
@@ -192,31 +195,46 @@ do_radarplot <- function(info, case, counts) {
192
195
  }
193
196
 
194
197
  # Plot
195
- plotfile = file.path(info$casedir, "plot.png")
196
198
  if (!is.null(case$colors) && length(case$colors) == 1 && case$colors == "biopipen") {
197
199
  colors = pal_biopipen()(nrow(rdr_data))
198
200
  } else if (!is.null(case$colors) && length(case$colors) > 0) {
199
201
  colors = trimws(unlist(strsplit(case$colors, ",")))
200
202
  }
203
+
204
+ plotdf <- rdr_data %>%
205
+ as.data.frame() %>%
206
+ rownames_to_column("group") %>%
207
+ mutate(group = factor(group, levels = rownames(rdr_data)))
208
+
201
209
  p = ggradar(
202
- rdr_data %>%
203
- as.data.frame() %>%
204
- rownames_to_column("group") %>%
205
- mutate(group = factor(group, levels = rownames(rdr_data))),
210
+ plotdf,
206
211
  values.radar = paste0(breaks, "%"),
207
212
  grid.min = breaks[1] / 100,
208
213
  grid.mid = breaks[2] / 100,
209
214
  grid.max = breaks[3] / 100,
210
215
  group.colours = colors
211
216
  )
212
- png(
213
- plotfile,
214
- width = case$devpars$width,
215
- height = case$devpars$height,
216
- res = case$devpars$res
217
+ prefix <- file.path(info$casedir, "plot")
218
+ save_plot(p, prefix, case$devpars)
219
+
220
+ code_file <- paste0(prefix, ".R")
221
+ code = glue(
222
+ "library(ggradar)
223
+
224
+ plotdf <- {repr(plotdf)}
225
+ breaks <- {repr(breaks)}
226
+ colors <- {repr(colors)}
227
+
228
+ ggradar(
229
+ plotdf,
230
+ values.radar = paste0(breaks, '%'),
231
+ grid.min = breaks[1] / 100,
232
+ grid.mid = breaks[2] / 100,
233
+ grid.max = breaks[3] / 100,
234
+ group.colours = colors
235
+ )"
217
236
  )
218
- print(p)
219
- dev.off()
237
+ writeLines(code, code_file)
220
238
  }
221
239
 
222
240
  do_barplot_and_tests <- function(info, case, counts) {
@@ -266,34 +284,42 @@ do_barplot_and_tests <- function(info, case, counts) {
266
284
  rowwise() %>%
267
285
  mutate(mean_sd1 = max(.mean - .sd, 0), mean_sd2 = .mean + .sd)
268
286
 
287
+ if (!is.null(case$colors) && length(case$colors) == 1 && case$colors == "biopipen") {
288
+ colors <- pal_biopipen(.8)(length(unique(plotdata[[case$by]])))
289
+ } else if (!is.null(case$colors) && length(case$colors) > 0) {
290
+ colors <- trimws(unlist(strsplit(case$colors, ",")))
291
+ }
292
+
269
293
  # Plot the barplot
270
- plotfile = file.path(info$casedir, "barplot.png")
271
294
  p = ggplot(plotdata, aes(x = !!sym(case$ident), y = .mean, fill = !!sym(case$by))) +
272
- geom_bar(stat = "identity", position = "dodge") +
295
+ geom_bar(stat = "identity", position = "dodge", color = "#333333") +
273
296
  geom_errorbar(
274
297
  aes(ymin = mean_sd1, ymax = mean_sd2),
275
- width = 0.4,
276
- linewidth = 0.8,
298
+ width = 0.2,
299
+ alpha = 0.5,
300
+ linewidth = 0.6,
277
301
  position = position_dodge(0.9),
278
302
  color = "#333333"
279
303
  ) +
280
- theme_prism(axis_text_angle = 90) +
281
- ylab("Fraction of cells")
282
-
283
- if (!is.null(case$colors) && length(case$colors) == 1 && case$colors == "biopipen") {
284
- p <- p + scale_fill_biopipen(.8)
285
- } else if (!is.null(case$colors) && length(case$colors) > 0) {
286
- p <- p + scale_fill_manual(values = trimws(unlist(strsplit(case$colors, ","))))
287
- }
288
-
289
- png(
290
- plotfile,
291
- width = case$bar_devpars$width,
292
- height = case$bar_devpars$height,
293
- res = case$bar_devpars$res
294
- )
295
- print(p)
296
- dev.off()
304
+ theme_prism(axis_text_angle = 45) +
305
+ ylab("Fraction of cells") +
306
+ scale_fill_manual(values = colors)
307
+
308
+ prefix = file.path(info$casedir, "barplot")
309
+ save_plot(p, prefix, case$bar_devpars)
310
+ neat_case <- list(by = case$by, ident = case$ident)
311
+ save_plotcode(
312
+ p,
313
+ setup = c(
314
+ 'library(rlang)',
315
+ 'library(ggplot2)',
316
+ 'library(ggprism)',
317
+ '',
318
+ 'load("data.RData")',
319
+ 'case <- neat_case'
320
+ ),
321
+ prefix,
322
+ "plotdata", "neat_case", "colors")
297
323
 
298
324
  # Do the tests in each cluster between groups on .frac
299
325
  bys <- bardata %>% pull(!!sym(case$by)) %>% unique()
@@ -348,7 +374,13 @@ add_case_report = function(info, breakdown, test) {
348
374
  contents = list(
349
375
  list(
350
376
  kind = "image",
351
- src = file.path(info$casedir, "plot.png")
377
+ src = file.path(info$casedir, "plot.png"),
378
+ download = list(
379
+ file.path(info$casedir, "plot.pdf"),
380
+ list(
381
+ src = file.path(info$casedir, "plot.R"),
382
+ tip = "Download the code used to reproduce the plot",
383
+ icon = "Code"))
352
384
  )
353
385
  )
354
386
  ),
@@ -381,7 +413,15 @@ add_case_report = function(info, breakdown, test) {
381
413
  contents = list(
382
414
  list(
383
415
  kind = "image",
384
- src = file.path(info$casedir, "barplot.png")
416
+ src = file.path(info$casedir, "barplot.png"),
417
+ download = list(
418
+ file.path(info$casedir, "barplot.pdf"),
419
+ list(
420
+ src = file.path(info$casedir, "barplot.code.zip"),
421
+ tip = "Download the code used to reproduce the plot",
422
+ icon = "Code"
423
+ )
424
+ )
385
425
  )
386
426
  )
387
427
  ))