biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +77 -26
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +22 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -139
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/CellsDistribution.R +31 -6
  59. biopipen/scripts/scrna/MarkersFinder.R +272 -602
  60. biopipen/scripts/scrna/MetaMarkers.R +16 -7
  61. biopipen/scripts/scrna/RadarPlots.R +75 -35
  62. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  63. biopipen/scripts/scrna/ScVelo.py +0 -0
  64. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
  65. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
  66. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
  67. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
  68. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
  69. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  70. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
  71. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  72. biopipen/scripts/scrna/SlingShot.R +71 -0
  73. biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
  74. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  75. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  76. biopipen/scripts/snp/PlinkFilter.py +7 -7
  77. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  78. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  79. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  80. biopipen/scripts/stats/ChowTest.R +48 -22
  81. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  82. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  83. biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
  84. biopipen/scripts/tcr/ClonalStats.R +484 -0
  85. biopipen/scripts/tcr/CloneResidency.R +23 -5
  86. biopipen/scripts/tcr/Immunarch-basic.R +8 -1
  87. biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
  88. biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
  89. biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
  90. biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
  91. biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
  92. biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
  93. biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
  94. biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
  95. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  96. biopipen/scripts/tcr/TCRClusterStats.R +24 -7
  97. biopipen/scripts/tcr/TCRDock.py +10 -6
  98. biopipen/scripts/tcr/TESSA.R +6 -1
  99. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  100. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  101. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  102. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  103. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  104. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  105. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  106. biopipen/scripts/vcf/VcfAnno.py +11 -11
  107. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  108. biopipen/scripts/vcf/VcfFilter.py +5 -5
  109. biopipen/scripts/vcf/VcfFix.py +7 -7
  110. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  111. biopipen/scripts/vcf/VcfIndex.py +3 -3
  112. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  113. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  114. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  115. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  116. biopipen/scripts/web/Download.py +8 -4
  117. biopipen/scripts/web/DownloadList.py +5 -5
  118. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  119. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  120. biopipen/scripts/web/gcloud_common.py +1 -1
  121. biopipen/utils/gsea.R +96 -42
  122. biopipen/utils/misc.R +205 -7
  123. biopipen/utils/misc.py +17 -8
  124. biopipen/utils/plot.R +53 -17
  125. biopipen/utils/reference.py +11 -11
  126. biopipen/utils/repr.R +146 -0
  127. biopipen/utils/vcf.py +1 -1
  128. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
  129. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
  130. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  131. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
  132. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
  133. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
  134. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
File without changes
@@ -1,15 +1,16 @@
1
1
  # srtobj, clustrees_defaults, clustrees
2
- log_info("clustrees:")
2
+
3
+ log$info("clustrees:")
4
+
3
5
  if (
4
6
  (is.null(clustrees) || length(clustrees) == 0) &&
5
- (is.null(clustrees_defaults$prefix) || clustrees_defaults$prefix == "")) {
6
- log_warn("- no cases, skipping intentionally ...")
7
+ (is.null(clustrees_defaults$prefix) || isFALSE(clustrees_defaults$prefix))) {
8
+ log$warn("- no case specified, skipping ...")
7
9
  } else { # clustrees set or prefix is not empty
8
- library(clustree)
9
10
  odir = file.path(outdir, "clustrees")
10
11
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
11
12
 
12
- if ((is.null(clustrees) || length(clustrees) == 0) && clustrees_defaults$prefix == "_auto") {
13
+ if ((is.null(clustrees) || length(clustrees) == 0) && isTRUE(clustrees_defaults$prefix)) {
13
14
  clustrees <- list()
14
15
  for (key in names(srtobj@commands)) {
15
16
  if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
@@ -23,35 +24,24 @@ if (
23
24
  }
24
25
  }
25
26
  if (length(clustrees) == 0) {
26
- log_warn("- no cases found, skipping ...")
27
+ log$warn("- no case found, skipping ...")
27
28
  } else {
28
29
  reports <- list()
29
30
  for (name in names(clustrees)) {
30
31
  if (is.null(clustrees[[name]]$prefix)) {
31
32
  stop(paste0("clustrees: prefix is required for case: ", name))
32
33
  }
34
+ log$info("- Case: {name} ...")
33
35
  case <- list_update(clustrees_defaults, clustrees[[name]])
36
+ extract_vars(case, "devpars", "more_formats", "save_code")
34
37
 
35
- devpars <- case$devpars
36
- devpars$width <- devpars$width %||% clustrees_defaults$devpars$width %||% 800
37
- devpars$height <- devpars$height %||% clustrees_defaults$devpars$height %||% 1000
38
- devpars$res <- devpars$res %||% clustrees_defaults$devpars$res %||% 100
39
- case$devpars <- NULL
40
38
  prefix <- sub("\\.$", "", case$prefix)
41
- log_info("- Case: {name} ...")
42
39
  case$prefix <- paste0(prefix, ".")
43
- case$x <- srtobj@meta.data %>% select(starts_with(case$prefix))
44
- case$x <- case$x[complete.cases(case$x), , drop = FALSE]
40
+ case$object <- srtobj
45
41
 
46
42
  command <- srtobj@commands[[paste0("FindClusters.", prefix)]] %||%
47
43
  (if(prefix == "seurat_clusters") srtobj@commands$FindClusters else NULL)
48
44
 
49
- clustree_file <- file.path(odir, paste0(prefix, ".clustree.png"))
50
- png(clustree_file, width = devpars$width, height = devpars$height, res = devpars$res)
51
- p <- do_call(clustree, case)
52
- print(p)
53
- dev.off()
54
-
55
45
  if (is.null(command)) {
56
46
  resolution <- substring(colnames(case$x), nchar(case$prefix) + 1)
57
47
  } else {
@@ -59,15 +49,23 @@ if (
59
49
  }
60
50
  resolution_used <- resolution[length(resolution)]
61
51
 
62
- reports[[length(reports) + 1]] <- list(
63
- kind = "table_image",
64
- src = clustree_file,
65
- name = name,
52
+ plot_prefix <- file.path(odir, paste0(slugify(prefix), ".clustree"))
53
+ p <- do_call(gglogger::register(ClustreePlot), case)
54
+ save_plot(p, plot_prefix, devpars, formats = c("png", more_formats))
55
+
56
+ if (save_code) {
57
+ save_plotcode(p, plot_prefix,
58
+ setup = c("library(scplotter)", "load('data.RData')", "invisible(list2env('case'))"),
59
+ "case",
60
+ auto_data_setup = FALSE)
61
+ }
62
+ reports[[length(reports) + 1]] <- reporter$image(
63
+ plot_prefix, more_formats, save_code, kind = "image",
66
64
  descr = paste0("Resolutions: ", paste(resolution, collapse = ", "), "; resolution used: ", resolution_used)
67
65
  )
68
66
  }
69
67
  reports$h1 <- "Clustree plots"
70
68
  reports$ui <- "table_of_images"
71
- do.call(add_report, reports)
69
+ do_call(reporter$add, reports)
72
70
  }
73
71
  }
@@ -2,68 +2,47 @@
2
2
 
3
3
  # dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
4
  # dimplots = {{envs.dimplots | r: todot="-", skip=1}}
5
- log_info("dimplots:")
5
+ log$info("dimplots:")
6
6
 
7
- odir = file.path(outdir, "dimplots")
7
+ odir <- file.path(outdir, "dimplots")
8
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
9
9
 
10
10
  do_one_dimplot = function(name) {
11
- log_info("- Case: {name}")
11
+ log$info("- Case: {name}")
12
12
 
13
- case = list_update(dimplots_defaults, dimplots[[name]])
14
- case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
15
- if (!is.null(case$subset)) {
16
- case$object = srtobj %>% filter(!!rlang::parse_expr(case$subset))
13
+ case <- list_update(dimplots_defaults, dimplots[[name]])
14
+
15
+ # Get functional arguments and inconsistent arguments
16
+ subset <- case$subset; case$subset <- NULL
17
+ reduction <- case$reduction; case$reduction <- NULL
18
+ devpars <- case$devpars; case$devpars <- NULL
19
+
20
+ # Normalize arguments
21
+ reduction <- if (reduction %in% c("dim", "auto")) DefaultDimReduc(srtobj) else reduction
22
+ devpars <- list_update(dimplots_defaults$devpars, devpars)
23
+ key <- paste0("sub_umap_", case$group_by)
24
+
25
+ if (!is.null(subset)) {
26
+ case$object <- srtobj %>% filter(!!parse_expr(subset))
17
27
  } else {
18
- case$object = srtobj
28
+ case$object <- srtobj
19
29
  }
20
-
21
- if (is.null(case$group.by)) { case$group.by = case$ident }
22
- key <- paste0("sub_umap_", case$ident)
23
- if (
24
- key %in% names(case$object@reductions) &&
25
- (is.null(case$reduction) || case$reduction %in% c("dim", "auto"))) {
30
+ if (key %in% names(case$object@reductions) && is.null(reduction)) {
26
31
  case$reduction = key
27
- }
28
- if (is.null(case$na_group)) {
29
- case$object = filter(case$object, !is.na(!!sym(case$group.by)))
30
- } else if (is.null(case$order)) {
31
- case$order = case$object@meta.data[[case$group.by]] %>%
32
- unique() %>% na.omit() %>% as.character() %>% sort()
33
- case$object@meta.data = replace_na(
34
- case$object@meta.data,
35
- setNames(list(case$na_group), case$group.by)
36
- )
37
- }
38
- case$cols = case$cols %||% pal_biopipen()(length(unique(case$object@meta.data[[case$group.by]])))
39
- if (!is.null(case$na_group)) {
40
- # Is the NA value in the first position?
41
- case$cols = c("lightgrey", case$cols[1:(length(case$cols) - 1)])
42
- }
43
-
44
- excluded_args = c("devpars", "ident", "subset")
45
- for (arg in excluded_args) {
46
- assign(arg, case[[arg]])
47
- case[[arg]] = NULL
32
+ } else {
33
+ case$reduction = reduction
48
34
  }
49
35
 
50
- if (case$reduction %in% c("dim", "auto")) { case$reduction = NULL }
51
- case$na_group <- NULL
52
- figfile = file.path(odir, paste0(slugify(name), ".dim.png"))
53
- png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
54
- p = do_call(DimPlot, case)
55
- print(p)
56
- dev.off()
36
+ p <- do_call(CellDimPlot, case)
37
+ prefix <- file.path(odir, paste0(slugify(name), ".dim"))
38
+ save_plot(p, prefix, devpars)
57
39
 
58
- add_report(
40
+ reporter$add(
59
41
  list(
60
42
  kind = "descr",
61
43
  content = paste0("Dimensionality reduction plot for ", case$group.by)
62
44
  ),
63
- list(
64
- kind = "image",
65
- src = figfile
66
- ),
45
+ reporter$image(prefix, "pdf", FALSE),
67
46
  h1 = name
68
47
  )
69
48
  }