biopipen 0.29.2__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (106) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +2 -0
  3. biopipen/core/filters.py +21 -0
  4. biopipen/ns/plot.py +55 -0
  5. biopipen/ns/scrna.py +110 -21
  6. biopipen/ns/web.py +87 -5
  7. biopipen/scripts/bam/CNAClinic.R +2 -1
  8. biopipen/scripts/cellranger/CellRangerCount.py +3 -3
  9. biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
  10. biopipen/scripts/cnv/AneuploidyScore.R +1 -1
  11. biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
  12. biopipen/scripts/delim/RowsBinder.R +1 -1
  13. biopipen/scripts/delim/SampleInfo.R +3 -2
  14. biopipen/scripts/gene/GeneNameConversion.R +2 -2
  15. biopipen/scripts/gsea/Enrichr.R +3 -3
  16. biopipen/scripts/gsea/FGSEA.R +2 -2
  17. biopipen/scripts/gsea/GSEA.R +2 -2
  18. biopipen/scripts/gsea/PreRank.R +2 -2
  19. biopipen/scripts/plot/Heatmap.R +3 -3
  20. biopipen/scripts/plot/Manhattan.R +2 -1
  21. biopipen/scripts/plot/QQPlot.R +1 -1
  22. biopipen/scripts/plot/ROC.R +1 -1
  23. biopipen/scripts/plot/Scatter.R +112 -0
  24. biopipen/scripts/plot/VennDiagram.R +3 -3
  25. biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
  26. biopipen/scripts/rnaseq/Simulation.R +1 -1
  27. biopipen/scripts/rnaseq/UnitConversion.R +2 -1
  28. biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
  29. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
  31. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
  32. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
  33. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
  34. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
  35. biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
  36. biopipen/scripts/scrna/CellsDistribution.R +4 -3
  37. biopipen/scripts/scrna/DimPlots.R +1 -1
  38. biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
  39. biopipen/scripts/scrna/MarkersFinder.R +5 -5
  40. biopipen/scripts/scrna/MetaMarkers.R +4 -4
  41. biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
  42. biopipen/scripts/scrna/RadarPlots.R +1 -1
  43. biopipen/scripts/scrna/ScFGSEA.R +4 -3
  44. biopipen/scripts/scrna/ScSimulation.R +64 -0
  45. biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
  46. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
  47. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
  48. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
  49. biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
  50. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
  51. biopipen/scripts/scrna/SeuratClusterStats-stats.R +20 -25
  52. biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
  53. biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
  54. biopipen/scripts/scrna/SeuratClustering.R +10 -170
  55. biopipen/scripts/scrna/SeuratMap2Ref.R +98 -54
  56. biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
  57. biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
  58. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
  59. biopipen/scripts/scrna/SeuratPreparing.R +22 -562
  60. biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
  61. biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
  62. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
  63. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
  64. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
  65. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
  66. biopipen/scripts/snp/MatrixEQTL.R +1 -1
  67. biopipen/scripts/snp/PlinkCallRate.R +2 -2
  68. biopipen/scripts/snp/PlinkFreq.R +2 -2
  69. biopipen/scripts/snp/PlinkHWE.R +2 -2
  70. biopipen/scripts/snp/PlinkHet.R +2 -2
  71. biopipen/scripts/snp/PlinkIBD.R +2 -2
  72. biopipen/scripts/stats/ChowTest.R +1 -1
  73. biopipen/scripts/stats/DiffCoexpr.R +1 -1
  74. biopipen/scripts/stats/LiquidAssoc.R +1 -1
  75. biopipen/scripts/stats/Mediation.R +11 -9
  76. biopipen/scripts/stats/MetaPvalue.R +4 -1
  77. biopipen/scripts/stats/MetaPvalue1.R +4 -1
  78. biopipen/scripts/tcr/Attach2Seurat.R +1 -1
  79. biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
  80. biopipen/scripts/tcr/CloneResidency.R +2 -2
  81. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  82. biopipen/scripts/tcr/Immunarch-basic.R +0 -4
  83. biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
  84. biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
  85. biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
  86. biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
  87. biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
  88. biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
  89. biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
  90. biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
  91. biopipen/scripts/tcr/Immunarch.R +43 -11
  92. biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
  93. biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
  94. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  95. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  96. biopipen/scripts/tcr/TCRClustering.R +2 -2
  97. biopipen/scripts/tcr/TESSA.R +2 -2
  98. biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
  99. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  100. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  101. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  102. biopipen/scripts/web/gcloud_common.py +49 -0
  103. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/METADATA +7 -7
  104. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/RECORD +106 -96
  105. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/WHEEL +0 -0
  106. {biopipen-0.29.2.dist-info → biopipen-0.31.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,64 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
3
+ library(rlang)
4
+ library(splatter)
5
+ library(scater)
6
+
7
+ # Load template variables
8
+ seed <- {{ in.seed | r }}
9
+ outfile <- {{ out.outfile | r }}
10
+ ngenes <- {{ envs.ngenes | r }}
11
+ ncells <- {{ envs.ncells | r }}
12
+ nspikes <- {{ envs.nspikes | r }}
13
+ outtype <- {{ envs.outtype | r }}
14
+ method <- {{ envs.method | r }}
15
+ user_params <- {{ envs.params | r: todot="-" }}
16
+
17
+ log_info("Generating simulation parameters ...")
18
+
19
+ seed <- seed %||% 1
20
+ if (length(seed) > 1) {
21
+ log_warn("- multiple seeds provided, using the first one")
22
+ seed <- seed[1]
23
+ }
24
+ if (is.character(seed)) {
25
+ library(digest)
26
+ proj <- seed
27
+ seed <- digest2int(seed)
28
+ } else {
29
+ proj <- paste0("S", seed)
30
+ }
31
+
32
+ set.seed(seed)
33
+ mock_sce_params <- list()
34
+ if (!is.null(ngenes)) mock_sce_params$ngenes <- ngenes
35
+ if (!is.null(ncells)) mock_sce_params$ncells <- ncells
36
+ if (!is.null(nspikes)) mock_sce_params$nspikes <- nspikes
37
+ sce <- do.call(mockSCE, mock_sce_params)
38
+ params <- splatEstimate(sce)
39
+ user_params$seed <- seed
40
+ user_params$object = params
41
+ do_call(setParams, user_params)
42
+
43
+
44
+ log_info("Saving simulation parameters to file ...")
45
+
46
+ sim <- splatSimulate(params, method = method, verbose = TRUE)
47
+
48
+ outtype <- tolower(outtype)
49
+ if (outtype == "sce") outtype <- "singlecellexperiment"
50
+
51
+ if (outtype == "singlecellexperiment") {
52
+ log_info("Saving simulation to file ...")
53
+ saveRDS(sim, file = outfile)
54
+ } else {
55
+ log_info("Converting simulation to Seurat object ...")
56
+ cnts <- SingleCellExperiment::counts(sim)
57
+ sobj <- Seurat::CreateSeuratObject(counts = cnts, project = proj)
58
+ rm(sim)
59
+ rm(cnts)
60
+ gc()
61
+
62
+ log_info("Saving simulation to file ...")
63
+ saveRDS(sobj, file = outfile)
64
+ }
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(rlang)
4
4
  library(Seurat)
@@ -0,0 +1,73 @@
1
+ # srtobj, clustrees_defaults, clustrees
2
+ log_info("clustrees:")
3
+ if (
4
+ (is.null(clustrees) || length(clustrees) == 0) &&
5
+ (is.null(clustrees_defaults$prefix) || clustrees_defaults$prefix == "")) {
6
+ log_warn("- no cases, skipping intentionally ...")
7
+ } else { # clustrees set or prefix is not empty
8
+ library(clustree)
9
+ odir = file.path(outdir, "clustrees")
10
+ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
11
+
12
+ if ((is.null(clustrees) || length(clustrees) == 0) && clustrees_defaults$prefix == "_auto") {
13
+ clustrees <- list()
14
+ for (key in names(srtobj@commands)) {
15
+ if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
16
+ pref <- substring(key, 14)
17
+ if (pref == "") {
18
+ pref <- "seurat_clusters"
19
+ }
20
+
21
+ clustrees[[pref]] <- list(prefix = pref)
22
+ }
23
+ }
24
+ }
25
+ if (length(clustrees) == 0) {
26
+ log_warn("- no cases found, skipping ...")
27
+ } else {
28
+ reports <- list()
29
+ for (name in names(clustrees)) {
30
+ if (is.null(clustrees[[name]]$prefix)) {
31
+ stop(paste0("clustrees: prefix is required for case: ", name))
32
+ }
33
+ case <- list_update(clustrees_defaults, clustrees[[name]])
34
+
35
+ devpars <- case$devpars
36
+ devpars$width <- devpars$width %||% clustrees_defaults$devpars$width %||% 800
37
+ devpars$height <- devpars$height %||% clustrees_defaults$devpars$height %||% 1000
38
+ devpars$res <- devpars$res %||% clustrees_defaults$devpars$res %||% 100
39
+ case$devpars <- NULL
40
+ prefix <- sub("\\.$", "", case$prefix)
41
+ log_info("- Case: {name} ...")
42
+ case$prefix <- paste0(prefix, ".")
43
+ case$x <- srtobj@meta.data %>% select(starts_with(case$prefix))
44
+ case$x <- case$x[complete.cases(case$x), , drop = FALSE]
45
+
46
+ command <- srtobj@commands[[paste0("FindClusters.", prefix)]] %||%
47
+ (if(prefix == "seurat_clusters") srtobj@commands$FindClusters else NULL)
48
+
49
+ clustree_file <- file.path(odir, paste0(prefix, ".clustree.png"))
50
+ png(clustree_file, width = devpars$width, height = devpars$height, res = devpars$res)
51
+ p <- do_call(clustree, case)
52
+ print(p)
53
+ dev.off()
54
+
55
+ if (is.null(command)) {
56
+ resolution <- substring(colnames(case$x), nchar(case$prefix) + 1)
57
+ } else {
58
+ resolution <- command$resolution
59
+ }
60
+ resolution_used <- resolution[length(resolution)]
61
+
62
+ reports[[length(reports) + 1]] <- list(
63
+ kind = "table_image",
64
+ src = clustree_file,
65
+ name = name,
66
+ descr = paste0("Resolutions: ", paste(resolution, collapse = ", "), "; resolution used: ", resolution_used)
67
+ )
68
+ }
69
+ reports$h1 <- "Clustree plots"
70
+ reports$ui <- "table_of_images"
71
+ do.call(add_report, reports)
72
+ }
73
+ }
@@ -1,13 +1,14 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
- dimplots = {{envs.dimplots | r: todot="-", skip=1}}
3
+ # dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
+ # dimplots = {{envs.dimplots | r: todot="-", skip=1}}
5
+ log_info("dimplots:")
5
6
 
6
7
  odir = file.path(outdir, "dimplots")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
9
 
9
10
  do_one_dimplot = function(name) {
10
- log_info(paste0("Doing dimplots for: ", name))
11
+ log_info("- Case: {name}")
11
12
 
12
13
  case = list_update(dimplots_defaults, dimplots[[name]])
13
14
  case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
@@ -1,7 +1,8 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- features_defaults = {{envs.features_defaults | r: todot="-"}}
4
- features = {{envs.features | r: todot="-", skip=1}}
3
+ # features_defaults = {{envs.features_defaults | r: todot="-"}}
4
+ # features = {{envs.features | r: todot="-", skip=1}}
5
+ log_info("features:")
5
6
 
6
7
  odir = file.path(outdir, "features")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
@@ -36,7 +37,7 @@ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
36
37
  }
37
38
 
38
39
  do_one_features = function(name) {
39
- log_info("Doing features for: {name}")
40
+ log_info("- Case: {name}")
40
41
 
41
42
  case = list_update(features_defaults, features[[name]])
42
43
  case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
@@ -105,7 +106,7 @@ do_one_features = function(name) {
105
106
  if (is.null(ncol)) { ncol = 1 }
106
107
  list(
107
108
  width = 400 * ncol,
108
- height = ceiling(length(features) / ncol + max_nchar_idents * .05) * 150,
109
+ height = ceiling(length(features) / ncol) * (max_nchar_idents * .1 + 275),
109
110
  res = 100
110
111
  )
111
112
  }
@@ -398,7 +399,7 @@ do_one_features = function(name) {
398
399
  devpars = list_update(default_devpars(case$features, case$ncol), devpars)
399
400
  if (kind == "heatmap") {
400
401
  if (!exists("downsample") || is.null(downsample)) {
401
- log_warn("- `downsample` is not specified for `heatmap`, using `downsample=1000`")
402
+ log_warn(" 'downsample' is not specified for `heatmap`, using `downsample=1000`")
402
403
  downsample = 1000
403
404
  }
404
405
  if (is.numeric(downsample)) {
@@ -1,7 +1,8 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
4
- hists <- {{envs.hists | r: todot="-", skip=1}}
3
+ # hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
4
+ # hists <- {{envs.hists | r: todot="-", skip=1}}
5
+ log_info("hists:")
5
6
 
6
7
  do_one_hists <- function(m, case, odir, h1, each = NULL) {
7
8
  ofile <- file.path(odir, paste0(slugify(h1), ifelse(is.null(each), "", paste0("-", slugify(each))), ".png"))
@@ -57,7 +58,7 @@ do_one_hists <- function(m, case, odir, h1, each = NULL) {
57
58
  }
58
59
 
59
60
  if (is.null(hists) || length(hists) == 0) {
60
- log_warn("No hists cases specified, skipping ...")
61
+ log_warn("- no cases specified, skipping ...")
61
62
  } else {
62
63
 
63
64
  for (name in names(hists)) {
@@ -112,12 +113,12 @@ if (is.null(hists) || length(hists) == 0) {
112
113
  h1 = h1
113
114
  )
114
115
  for (each in eachs) {
115
- log_info("Doing hists for: {h1} - {each} ...")
116
+ log_info("- Case: {h1} - {each} ...")
116
117
  m <- meta %>% filter(!!sym(case$each) == each)
117
118
  do_one_hists(m, case, odir, h1, each)
118
119
  }
119
120
  } else {
120
- log_info("Doing hists for: {h1} ...")
121
+ log_info("- Case: {h1} ...")
121
122
  add_report(
122
123
  list(
123
124
  kind = "descr",
@@ -1,13 +1,14 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
4
- ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
3
+ # ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
4
+ # ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
5
+ log_info("ngenes:")
5
6
 
6
7
  odir <- file.path(outdir, "ngenes")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
9
 
9
10
  do_one_ngenes <- function(name) {
10
- log_info("Doing ngenes for: {name}")
11
+ log_info("- Case: {name}")
11
12
 
12
13
  case <- list_update(ngenes_defaults, ngenes[[name]])
13
14
  case$devpars <- list_update(ngenes_defaults$devpars, case$devpars)
@@ -1,14 +1,15 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
  library(circlize)
3
3
 
4
- stats_defaults = {{envs.stats_defaults | r: todot="-"}}
5
- stats = {{envs.stats | r: todot="-", skip=1}}
4
+ # stats_defaults = {{envs.stats_defaults | r: todot="-"}}
5
+ # stats = {{envs.stats | r: todot="-", skip=1}}
6
+ log_info("stats:")
6
7
 
7
8
  odir = file.path(outdir, "stats")
8
9
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
9
10
 
10
11
  do_one_stats = function(name) {
11
- log_info("Doing stats for: {name}")
12
+ log_info("- Case: {name}")
12
13
 
13
14
  case = list_update(stats_defaults, stats[[name]])
14
15
  case$devpars = list_update(stats_defaults$devpars, case$devpars)
@@ -17,12 +18,6 @@ do_one_stats = function(name) {
17
18
  if (isTRUE(case$pie) && !is.null(case$group.by)) {
18
19
  stop(paste0(name, ": pie charts are not supported for group-by"))
19
20
  }
20
- if (!isTRUE(case$frac) && isTRUE(case$frac_ofall)) {
21
- stop(paste0(name, ": frac_ofall is only supported when frac is true"))
22
- }
23
- if (isTRUE(case$frac_ofall) && is.null(case$group.by)) {
24
- stop(paste0(name, ": frac_ofall is only supported for group-by"))
25
- }
26
21
  if (isTRUE(case$transpose) && is.null(case$group.by)) {
27
22
  stop(paste0(name, ": transpose is only supported for group-by"))
28
23
  }
@@ -45,28 +40,28 @@ do_one_stats = function(name) {
45
40
  !!!syms(case$split.by)
46
41
  ), function(df) {
47
42
  out <- df %>% group_by(!!!syms(select_cols)) %>% summarise(.n = n(), .groups = "drop")
48
- if (!is.null(case$group.by) && isTRUE(case$frac)) {
49
- if (isTRUE(case$frac_ofall)) {
43
+ if (!is.null(case$group.by) && case$frac != "none") {
44
+ if (case$frac == "all") {
50
45
  out <- out %>% mutate(.frac = .n / sum(.n))
51
- } else if (isTRUE(case$transpose)) {
52
- out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
53
- } else {
46
+ } else if (case$frac == "group") {
54
47
  out <- out %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
48
+ } else { # case$frac == "ident" or "cluster"
49
+ out <- out %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
55
50
  }
56
51
  }
57
52
  out
58
53
  }))
59
- } else if (!is.null(case$group.by) && isTRUE(case$frac)) {
54
+ } else if (!is.null(case$group.by) && case$frac != "none") { # no split.by
60
55
  plot_df <- df_cells %>%
61
56
  select(all_of(select_cols)) %>%
62
57
  group_by(!!!syms(select_cols)) %>%
63
58
  summarise(.n = n(), .groups = "drop")
64
- if (isTRUE(case$frac_ofall)) {
59
+ if (case$frac == "all") {
65
60
  plot_df = plot_df %>% mutate(.frac = .n / sum(.n))
66
- } else {
67
- plot_df = plot_df %>%
68
- group_by(!!sym(ifelse(isTRUE(case$transpose), case$group.by, case$ident))) %>%
69
- mutate(.frac = .n / sum(.n))
61
+ } else if (case$frac == "group") {
62
+ plot_df = plot_df %>% group_by(!!sym(case$group.by)) %>% mutate(.frac = .n / sum(.n))
63
+ } else { # case$frac == "ident" or "cluster"
64
+ plot_df = plot_df %>% group_by(!!sym(case$ident)) %>% mutate(.frac = .n / sum(.n))
70
65
  }
71
66
  } else {
72
67
  plot_df <- df_cells %>%
@@ -74,7 +69,7 @@ do_one_stats = function(name) {
74
69
  group_by(!!!syms(select_cols)) %>%
75
70
  summarise(.n = n(), .groups = "drop")
76
71
 
77
- if (isTRUE(case$frac) || isTRUE(case$frac_ofall)) {
72
+ if (case$frac != "none") {
78
73
  plot_df <- plot_df %>% mutate(.frac = .n / sum(.n))
79
74
  }
80
75
  }
@@ -87,13 +82,13 @@ do_one_stats = function(name) {
87
82
  p = plot_df %>%
88
83
  ggplot(aes(
89
84
  x=!!sym(ifelse(case$transpose, case$group.by, case$ident)),
90
- y=if (isTRUE(case$frac)) .frac else .n,
85
+ y=if (case$frac != "none") .frac else .n,
91
86
  fill=!!sym(ifelse(is.null(case$group.by) || isTRUE(case$transpose), case$ident, case$group.by))
92
87
  )) +
93
88
  geom_bar(stat="identity", position=bar_position, alpha=.8) +
94
89
  theme_prism(axis_text_angle = 90) +
95
90
  scale_fill_biopipen() +
96
- ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
91
+ ylab(ifelse(case$frac != "none", "Fraction of cells", "Number of cells"))
97
92
 
98
93
  if (!is.null(case$split.by)) {
99
94
  p = p + facet_wrap(case$split.by)
@@ -108,7 +103,7 @@ do_one_stats = function(name) {
108
103
  kind = "descr",
109
104
  content = paste0(
110
105
  "Plots showing the ",
111
- ifelse(isTRUE(case$frac), "number/faction", "number"),
106
+ ifelse(case$frac != "none", "number/faction", "number"),
112
107
  " of cells per cluster",
113
108
  ifelse(
114
109
  is.null(case$group.by),
@@ -149,7 +144,7 @@ do_one_stats = function(name) {
149
144
  guides(fill = guide_legend(title = case$ident)) +
150
145
  theme_void() +
151
146
  geom_label(
152
- if (isTRUE(case$frac))
147
+ if (case$frac != "none")
153
148
  aes(label=sprintf("%.1f%%", .frac * 100))
154
149
  else
155
150
  aes(label=.n),
@@ -1,6 +1,7 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
3
- source("{{biopipen_dir}}/utils/plot.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
4
+
4
5
  library(Seurat)
5
6
  library(rlang)
6
7
  library(dplyr)
@@ -26,19 +27,34 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
26
27
  mutate(!!!lapply(mutaters, parse_expr))
27
28
  }
28
29
 
30
+ ############## clustree ##############
31
+ clustrees_defaults <- {{envs.clustrees_defaults | r}}
32
+ clustrees <- {{envs.clustrees | r}}
33
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-clustree.R" | source_r }}
34
+
29
35
  ############## stats ##############
30
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-stats.R" %}
36
+ stats_defaults = {{envs.stats_defaults | r: todot="-"}}
37
+ stats = {{envs.stats | r: todot="-", skip=1}}
38
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-stats.R" | source_r }}
31
39
 
32
40
  ############## hists ##############
33
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-hists.R" %}
41
+ hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
42
+ hists <- {{envs.hists | r: todot="-", skip=1}}
43
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-hists.R" | source_r }}
34
44
 
35
45
  ############## ngenes ##############
36
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-ngenes.R" %}
46
+ ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
47
+ ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
48
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-ngenes.R" | source_r }}
37
49
 
38
50
  ############## features ##############
39
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-features.R" %}
51
+ features_defaults = {{envs.features_defaults | r: todot="-"}}
52
+ features = {{envs.features | r: todot="-", skip=1}}
53
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-features.R" | source_r }}
40
54
 
41
55
  ############## dimplots ##############
42
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-dimplots.R" %}
56
+ dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
57
+ dimplots = {{envs.dimplots | r: todot="-", skip=1}}
58
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-dimplots.R" | source_r }}
43
59
 
44
60
  save_report(joboutdir)
@@ -0,0 +1,213 @@
1
+
2
+ expand_dims <- function(args, name = "dims") {
3
+ # Expand dims from 30 to 1:30
4
+ if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
5
+ args[[name]] <- 1:args[[name]]
6
+ }
7
+ args
8
+ }
9
+
10
+ expand_resolution <- function(resolution) {
11
+ expanded_res <- c()
12
+ for (res in resolution) {
13
+ if (is.numeric(res)) {
14
+ expanded_res <- c(expanded_res, res)
15
+ } else {
16
+ # is.character
17
+ parts <- trimws(unlist(strsplit(res, ",")))
18
+ for (part in parts) {
19
+ if (grepl(":", part)) {
20
+ ps <- trimws(unlist(strsplit(part, ":")))
21
+ if (length(ps) == 2) { ps <- c(ps, 0.1) }
22
+ if (length(ps) != 3) {
23
+ stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
24
+ }
25
+ ps <- as.numeric(ps)
26
+ expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
27
+ } else {
28
+ expanded_res <- c(expanded_res, as.numeric(part))
29
+ }
30
+ }
31
+ }
32
+ }
33
+ # keep the last resolution at last
34
+ rev(unique(rev(round(expanded_res, 2))))
35
+ }
36
+
37
+ # recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
38
+ recode_clusters <- function(clusters) {
39
+ recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
40
+ clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
41
+ clusters
42
+ }
43
+
44
+ run_transformation <- function(sobj) {
45
+ if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
46
+ log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
47
+ return(sobj)
48
+ }
49
+ if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
50
+ stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
51
+ }
52
+ if (length(envs$ScaleData) > 0) {
53
+ if (DefaultAssay(sobj) == "SCT") {
54
+ stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
55
+ }
56
+ cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
57
+ if (is.null(cached$data)) {
58
+ log_info("Running ScaleData ...")
59
+ sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
60
+ cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
61
+ save_to_cache(cached, "ScaleData", cache_dir)
62
+ } else {
63
+ log_info("Loading cached ScaleData ...")
64
+ sobj@assays$RNA <- cached$data$assay
65
+ sobj@commands <- cached$data$commands
66
+ DefaultAssay(sobj) <- "RNA"
67
+ }
68
+ } else if (length(envs$SCTransform) > 0) {
69
+ if (DefaultAssay(sobj) != "SCT") {
70
+ stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
71
+ }
72
+ cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
73
+ asssay <- envs$SCTransform$new.assay.name %||% "SCT"
74
+ if (is.null(cached$data)) {
75
+ log_info("Running SCTransform ...")
76
+ sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
77
+ cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
78
+ save_to_cache(cached, "SCTransform", cache_dir)
79
+ } else {
80
+ log_info("Loading cached SCTransform ...")
81
+ sobj@assays[[assay]] <- cached$data$assay
82
+ sobj@commands <- cached$data$commands
83
+ DefaultAssay(sobj) <- assay
84
+ }
85
+ }
86
+ sobj
87
+ }
88
+
89
+ run_umap <- function(sobj) {
90
+ cached <- get_cached(
91
+ list(sobj = sobj, RunUMAP = envs$RunUMAP),
92
+ "RunUMAP",
93
+ cache_dir
94
+ )
95
+ reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
96
+ if (is.null(cached$data)) {
97
+ log_info("Running RunUMAP ...")
98
+ umap_args <- list_setdefault(
99
+ envs$RunUMAP,
100
+ object = sobj,
101
+ dims = 1:30,
102
+ reduction = sobj@misc$integrated_new_reduction %||% "pca"
103
+ )
104
+ ncells <- ncol(sobj)
105
+ umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
106
+ umap_method <- envs$RunUMAP$umap.method %||% "uwot"
107
+ if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
108
+ # https://github.com/satijalab/seurat/issues/4312
109
+ umap_args$n.neighbors <- min(ncells - 1, 30)
110
+ }
111
+ sobj <- do_call(RunUMAP, umap_args)
112
+ cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
113
+ save_to_cache(cached, "RunUMAP", cache_dir)
114
+ } else {
115
+ log_info("Loading cached RunUMAP ...")
116
+ sobj@reductions[[reduc_name]] <- cached$data$reduc
117
+ sobj@commands <- cached$data$commands
118
+ }
119
+
120
+ sobj
121
+ }
122
+
123
+ run_findneighbors <- function(sobj) {
124
+ cached <- get_cached(
125
+ list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
126
+ "FindNeighbors",
127
+ cache_dir
128
+ )
129
+ if (is.null(cached$data)) {
130
+ log_info("Running FindNeighbors ...")
131
+ envs$FindNeighbors$object <- sobj
132
+ envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
133
+ sobj <- do_call(FindNeighbors, envs$FindNeighbors)
134
+ cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
135
+ save_to_cache(cached, "FindNeighbors", cache_dir)
136
+ } else {
137
+ log_info("Loading cached FindNeighbors ...")
138
+ sobj@graphs <- cached$data$graphs
139
+ sobj@commands <- cached$data$commands
140
+ }
141
+
142
+ sobj
143
+ }
144
+
145
+ run_findclusters <- function(sobj) {
146
+ cached <- get_cached(
147
+ list(sobj = sobj, FindClusters = envs$FindClusters),
148
+ "FindClusters",
149
+ cache_dir
150
+ )
151
+ if (is.null(cached$data)) {
152
+ findclusters_args <- envs$FindClusters
153
+ findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
154
+ resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
155
+ log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
156
+
157
+ findclusters_args$object <- sobj
158
+ findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
159
+ sobj <- do_call(FindClusters, findclusters_args)
160
+
161
+ for (clname in findclusters_args$cluster.name) {
162
+ sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
163
+ }
164
+ sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
165
+ Idents(sobj) <- "seurat_clusters"
166
+
167
+ ident_table <- table(Idents(sobj))
168
+ log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
169
+ print(ident_table)
170
+ cat("\n")
171
+
172
+ cached$data <- list(
173
+ clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
174
+ commands = sobj@commands
175
+ )
176
+ save_to_cache(cached, "FindClusters", cache_dir)
177
+ } else {
178
+ log_info("Loading cached FindClusters ...")
179
+
180
+ sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
181
+ Idents(sobj) <- "seurat_clusters"
182
+ sobj@commands <- cached$data$commands
183
+ }
184
+
185
+ sobj
186
+ }
187
+
188
+ run_prepsctfindmarkers <- function(sobj) {
189
+ if (DefaultAssay(sobj) == "SCT") {
190
+ cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
191
+ if (is.null(cached$data)) {
192
+ # https://github.com/satijalab/seurat/issues/6968
193
+ log_info("Running PrepSCTFindMarkers ...")
194
+ sobj <- PrepSCTFindMarkers(sobj)
195
+ # compose a new SeuratCommand to record it to sobj@commands
196
+ scommand <- sobj@commands$FindClusters
197
+ scommand@name <- "PrepSCTFindMarkers"
198
+ scommand@time.stamp <- Sys.time()
199
+ scommand@assay.used <- "SCT"
200
+ scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
201
+ scommand@params <- list()
202
+ sobj@commands$PrepSCTFindMarkers <- scommand
203
+
204
+ cached$data <- sobj
205
+ save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
206
+ } else {
207
+ log_info("Loading cached PrepSCTFindMarkers ...")
208
+ sobj <- cached$data
209
+ }
210
+ }
211
+
212
+ sobj
213
+ }