biopipen 0.29.1__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (105) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +2 -0
  3. biopipen/core/filters.py +21 -0
  4. biopipen/ns/plot.py +55 -0
  5. biopipen/ns/scrna.py +49 -13
  6. biopipen/ns/web.py +87 -5
  7. biopipen/scripts/bam/CNAClinic.R +2 -1
  8. biopipen/scripts/cellranger/CellRangerCount.py +3 -3
  9. biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
  10. biopipen/scripts/cnv/AneuploidyScore.R +1 -1
  11. biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
  12. biopipen/scripts/delim/RowsBinder.R +1 -1
  13. biopipen/scripts/delim/SampleInfo.R +3 -2
  14. biopipen/scripts/gene/GeneNameConversion.R +2 -2
  15. biopipen/scripts/gsea/Enrichr.R +3 -3
  16. biopipen/scripts/gsea/FGSEA.R +2 -2
  17. biopipen/scripts/gsea/GSEA.R +2 -2
  18. biopipen/scripts/gsea/PreRank.R +2 -2
  19. biopipen/scripts/plot/Heatmap.R +3 -3
  20. biopipen/scripts/plot/Manhattan.R +2 -1
  21. biopipen/scripts/plot/QQPlot.R +1 -1
  22. biopipen/scripts/plot/ROC.R +1 -1
  23. biopipen/scripts/plot/Scatter.R +112 -0
  24. biopipen/scripts/plot/VennDiagram.R +3 -3
  25. biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
  26. biopipen/scripts/rnaseq/Simulation.R +1 -1
  27. biopipen/scripts/rnaseq/UnitConversion.R +2 -1
  28. biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
  29. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
  31. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
  32. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
  33. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
  34. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
  35. biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
  36. biopipen/scripts/scrna/CellsDistribution.R +3 -2
  37. biopipen/scripts/scrna/DimPlots.R +1 -1
  38. biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
  39. biopipen/scripts/scrna/MarkersFinder.R +5 -4
  40. biopipen/scripts/scrna/MetaMarkers.R +22 -4
  41. biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
  42. biopipen/scripts/scrna/RadarPlots.R +1 -1
  43. biopipen/scripts/scrna/ScFGSEA.R +4 -3
  44. biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
  45. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
  46. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
  47. biopipen/scripts/scrna/SeuratClusterStats-features.R +8 -5
  48. biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
  49. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
  50. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
  51. biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
  52. biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
  53. biopipen/scripts/scrna/SeuratClustering.R +10 -170
  54. biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
  55. biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
  56. biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
  57. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
  58. biopipen/scripts/scrna/SeuratPreparing.R +22 -562
  59. biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
  60. biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
  61. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +7 -4
  62. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +7 -3
  63. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
  64. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
  65. biopipen/scripts/snp/MatrixEQTL.R +1 -1
  66. biopipen/scripts/snp/PlinkCallRate.R +2 -2
  67. biopipen/scripts/snp/PlinkFreq.R +2 -2
  68. biopipen/scripts/snp/PlinkHWE.R +2 -2
  69. biopipen/scripts/snp/PlinkHet.R +2 -2
  70. biopipen/scripts/snp/PlinkIBD.R +2 -2
  71. biopipen/scripts/stats/ChowTest.R +1 -1
  72. biopipen/scripts/stats/DiffCoexpr.R +1 -1
  73. biopipen/scripts/stats/LiquidAssoc.R +1 -1
  74. biopipen/scripts/stats/Mediation.R +26 -12
  75. biopipen/scripts/stats/MetaPvalue.R +4 -1
  76. biopipen/scripts/stats/MetaPvalue1.R +4 -1
  77. biopipen/scripts/tcr/Attach2Seurat.R +1 -1
  78. biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
  79. biopipen/scripts/tcr/CloneResidency.R +2 -2
  80. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  81. biopipen/scripts/tcr/Immunarch-basic.R +0 -4
  82. biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
  83. biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
  84. biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
  85. biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
  86. biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
  87. biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
  88. biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
  89. biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
  90. biopipen/scripts/tcr/Immunarch.R +43 -11
  91. biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
  92. biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
  93. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  94. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  95. biopipen/scripts/tcr/TCRClustering.R +2 -2
  96. biopipen/scripts/tcr/TESSA.R +2 -2
  97. biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
  98. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  99. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  100. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  101. biopipen/scripts/web/gcloud_common.py +49 -0
  102. {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
  103. {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
  104. {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
  105. {biopipen-0.29.1.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- features_defaults = {{envs.features_defaults | r: todot="-"}}
4
- features = {{envs.features | r: todot="-", skip=1}}
3
+ # features_defaults = {{envs.features_defaults | r: todot="-"}}
4
+ # features = {{envs.features | r: todot="-", skip=1}}
5
+ log_info("features:")
5
6
 
6
7
  odir = file.path(outdir, "features")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
@@ -36,7 +37,7 @@ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
36
37
  }
37
38
 
38
39
  do_one_features = function(name) {
39
- log_info("Doing features for: {name}")
40
+ log_info("- Case: {name}")
40
41
 
41
42
  case = list_update(features_defaults, features[[name]])
42
43
  case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
@@ -75,6 +76,7 @@ do_one_features = function(name) {
75
76
  Idents(case$object) = case$ident
76
77
  }
77
78
  n_uidents = length(unique(Idents(case$object)))
79
+ max_nchar_idents = max(nchar(unique(as.character(Idents(case$object)))))
78
80
 
79
81
  fn = NULL
80
82
  default_devpars = NULL
@@ -97,13 +99,14 @@ do_one_features = function(name) {
97
99
  case$kind = "violin"
98
100
  if (is.null(case$cols)) { case$cols = pal_biopipen()(n_uidents) }
99
101
  if (is.null(case$pt.size)) { case$pt.size = 0 }
102
+
100
103
  excluded_args = c(excluded_args, "reduction")
101
104
  fn = VlnPlot
102
105
  default_devpars = function(features, ncol) {
103
106
  if (is.null(ncol)) { ncol = 1 }
104
107
  list(
105
108
  width = 400 * ncol,
106
- height = ceiling(length(features) / ncol) * 200,
109
+ height = ceiling(length(features) / ncol) * (max_nchar_idents * .1 + 275),
107
110
  res = 100
108
111
  )
109
112
  }
@@ -396,7 +399,7 @@ do_one_features = function(name) {
396
399
  devpars = list_update(default_devpars(case$features, case$ncol), devpars)
397
400
  if (kind == "heatmap") {
398
401
  if (!exists("downsample") || is.null(downsample)) {
399
- log_warn("- `downsample` is not specified for `heatmap`, using `downsample=1000`")
402
+ log_warn(" 'downsample' is not specified for `heatmap`, using `downsample=1000`")
400
403
  downsample = 1000
401
404
  }
402
405
  if (is.numeric(downsample)) {
@@ -1,7 +1,8 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
4
- hists <- {{envs.hists | r: todot="-", skip=1}}
3
+ # hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
4
+ # hists <- {{envs.hists | r: todot="-", skip=1}}
5
+ log_info("hists:")
5
6
 
6
7
  do_one_hists <- function(m, case, odir, h1, each = NULL) {
7
8
  ofile <- file.path(odir, paste0(slugify(h1), ifelse(is.null(each), "", paste0("-", slugify(each))), ".png"))
@@ -57,7 +58,7 @@ do_one_hists <- function(m, case, odir, h1, each = NULL) {
57
58
  }
58
59
 
59
60
  if (is.null(hists) || length(hists) == 0) {
60
- log_warn("No hists cases specified, skipping ...")
61
+ log_warn("- no cases specified, skipping ...")
61
62
  } else {
62
63
 
63
64
  for (name in names(hists)) {
@@ -112,12 +113,12 @@ if (is.null(hists) || length(hists) == 0) {
112
113
  h1 = h1
113
114
  )
114
115
  for (each in eachs) {
115
- log_info("Doing hists for: {h1} - {each} ...")
116
+ log_info("- Case: {h1} - {each} ...")
116
117
  m <- meta %>% filter(!!sym(case$each) == each)
117
118
  do_one_hists(m, case, odir, h1, each)
118
119
  }
119
120
  } else {
120
- log_info("Doing hists for: {h1} ...")
121
+ log_info("- Case: {h1} ...")
121
122
  add_report(
122
123
  list(
123
124
  kind = "descr",
@@ -1,13 +1,14 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
4
- ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
3
+ # ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
4
+ # ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
5
+ log_info("ngenes:")
5
6
 
6
7
  odir <- file.path(outdir, "ngenes")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
9
 
9
10
  do_one_ngenes <- function(name) {
10
- log_info("Doing ngenes for: {name}")
11
+ log_info("- Case: {name}")
11
12
 
12
13
  case <- list_update(ngenes_defaults, ngenes[[name]])
13
14
  case$devpars <- list_update(ngenes_defaults$devpars, case$devpars)
@@ -1,14 +1,15 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
  library(circlize)
3
3
 
4
- stats_defaults = {{envs.stats_defaults | r: todot="-"}}
5
- stats = {{envs.stats | r: todot="-", skip=1}}
4
+ # stats_defaults = {{envs.stats_defaults | r: todot="-"}}
5
+ # stats = {{envs.stats | r: todot="-", skip=1}}
6
+ log_info("stats:")
6
7
 
7
8
  odir = file.path(outdir, "stats")
8
9
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
9
10
 
10
11
  do_one_stats = function(name) {
11
- log_info("Doing stats for: {name}")
12
+ log_info("- Case: {name}")
12
13
 
13
14
  case = list_update(stats_defaults, stats[[name]])
14
15
  case$devpars = list_update(stats_defaults$devpars, case$devpars)
@@ -1,6 +1,7 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
3
- source("{{biopipen_dir}}/utils/plot.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
4
+
4
5
  library(Seurat)
5
6
  library(rlang)
6
7
  library(dplyr)
@@ -26,19 +27,34 @@ if (!is.null(mutaters) && length(mutaters) > 0) {
26
27
  mutate(!!!lapply(mutaters, parse_expr))
27
28
  }
28
29
 
30
+ ############## clustree ##############
31
+ clustrees_defaults <- {{envs.clustrees_defaults | r}}
32
+ clustrees <- {{envs.clustrees | r}}
33
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-clustree.R" | source_r }}
34
+
29
35
  ############## stats ##############
30
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-stats.R" %}
36
+ stats_defaults = {{envs.stats_defaults | r: todot="-"}}
37
+ stats = {{envs.stats | r: todot="-", skip=1}}
38
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-stats.R" | source_r }}
31
39
 
32
40
  ############## hists ##############
33
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-hists.R" %}
41
+ hists_defaults <- {{envs.hists_defaults | r: todot="-"}}
42
+ hists <- {{envs.hists | r: todot="-", skip=1}}
43
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-hists.R" | source_r }}
34
44
 
35
45
  ############## ngenes ##############
36
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-ngenes.R" %}
46
+ ngenes_defaults <- {{envs.ngenes_defaults | r: todot="-"}}
47
+ ngenes <- {{envs.ngenes | r: todot="-", skip=1}}
48
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-ngenes.R" | source_r }}
37
49
 
38
50
  ############## features ##############
39
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-features.R" %}
51
+ features_defaults = {{envs.features_defaults | r: todot="-"}}
52
+ features = {{envs.features | r: todot="-", skip=1}}
53
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-features.R" | source_r }}
40
54
 
41
55
  ############## dimplots ##############
42
- {% include biopipen_dir + "/scripts/scrna/SeuratClusterStats-dimplots.R" %}
56
+ dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
57
+ dimplots = {{envs.dimplots | r: todot="-", skip=1}}
58
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClusterStats-dimplots.R" | source_r }}
43
59
 
44
60
  save_report(joboutdir)
@@ -0,0 +1,213 @@
1
+
2
+ expand_dims <- function(args, name = "dims") {
3
+ # Expand dims from 30 to 1:30
4
+ if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
5
+ args[[name]] <- 1:args[[name]]
6
+ }
7
+ args
8
+ }
9
+
10
+ expand_resolution <- function(resolution) {
11
+ expanded_res <- c()
12
+ for (res in resolution) {
13
+ if (is.numeric(res)) {
14
+ expanded_res <- c(expanded_res, res)
15
+ } else {
16
+ # is.character
17
+ parts <- trimws(unlist(strsplit(res, ",")))
18
+ for (part in parts) {
19
+ if (grepl(":", part)) {
20
+ ps <- trimws(unlist(strsplit(part, ":")))
21
+ if (length(ps) == 2) { ps <- c(ps, 0.1) }
22
+ if (length(ps) != 3) {
23
+ stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
24
+ }
25
+ ps <- as.numeric(ps)
26
+ expanded_res <- c(expanded_res, seq(ps[1], ps[2], by = ps[3]))
27
+ } else {
28
+ expanded_res <- c(expanded_res, as.numeric(part))
29
+ }
30
+ }
31
+ }
32
+ }
33
+ # keep the last resolution at last
34
+ rev(unique(rev(round(expanded_res, 2))))
35
+ }
36
+
37
+ # recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
38
+ recode_clusters <- function(clusters) {
39
+ recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
40
+ clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
41
+ clusters
42
+ }
43
+
44
+ run_transformation <- function(sobj) {
45
+ if (length(envs$ScaleData) == 0 && length(envs$SCTransform) == 0) {
46
+ log_warn("Skipping ScaleData/SCTransform (neither specified) ...")
47
+ return(sobj)
48
+ }
49
+ if (length(envs$ScaleData) > 0 && length(envs$SCTransform) > 0) {
50
+ stop("Both envs.ScaleData and envs.SCTransform are specified. Please choose either.")
51
+ }
52
+ if (length(envs$ScaleData) > 0) {
53
+ if (DefaultAssay(sobj) == "SCT") {
54
+ stop("SCT assay detected, but envs.ScaleData is specified. Use envs.SCTransform instead.")
55
+ }
56
+ cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
57
+ if (is.null(cached$data)) {
58
+ log_info("Running ScaleData ...")
59
+ sobj <- do_call(ScaleData, c(list(object = sobj), envs$ScaleData))
60
+ cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
61
+ save_to_cache(cached, "ScaleData", cache_dir)
62
+ } else {
63
+ log_info("Loading cached ScaleData ...")
64
+ sobj@assays$RNA <- cached$data$assay
65
+ sobj@commands <- cached$data$commands
66
+ DefaultAssay(sobj) <- "RNA"
67
+ }
68
+ } else if (length(envs$SCTransform) > 0) {
69
+ if (DefaultAssay(sobj) != "SCT") {
70
+ stop("SCT assay not detected, but envs.SCTransform is specified. Use envs.ScaleData instead.")
71
+ }
72
+ cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
73
+ asssay <- envs$SCTransform$new.assay.name %||% "SCT"
74
+ if (is.null(cached$data)) {
75
+ log_info("Running SCTransform ...")
76
+ sobj <- do_call(SCTransform, c(list(object = sobj), envs$SCTransform))
77
+ cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
78
+ save_to_cache(cached, "SCTransform", cache_dir)
79
+ } else {
80
+ log_info("Loading cached SCTransform ...")
81
+ sobj@assays[[assay]] <- cached$data$assay
82
+ sobj@commands <- cached$data$commands
83
+ DefaultAssay(sobj) <- assay
84
+ }
85
+ }
86
+ sobj
87
+ }
88
+
89
+ run_umap <- function(sobj) {
90
+ cached <- get_cached(
91
+ list(sobj = sobj, RunUMAP = envs$RunUMAP),
92
+ "RunUMAP",
93
+ cache_dir
94
+ )
95
+ reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
96
+ if (is.null(cached$data)) {
97
+ log_info("Running RunUMAP ...")
98
+ umap_args <- list_setdefault(
99
+ envs$RunUMAP,
100
+ object = sobj,
101
+ dims = 1:30,
102
+ reduction = sobj@misc$integrated_new_reduction %||% "pca"
103
+ )
104
+ ncells <- ncol(sobj)
105
+ umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
106
+ umap_method <- envs$RunUMAP$umap.method %||% "uwot"
107
+ if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
108
+ # https://github.com/satijalab/seurat/issues/4312
109
+ umap_args$n.neighbors <- min(ncells - 1, 30)
110
+ }
111
+ sobj <- do_call(RunUMAP, umap_args)
112
+ cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
113
+ save_to_cache(cached, "RunUMAP", cache_dir)
114
+ } else {
115
+ log_info("Loading cached RunUMAP ...")
116
+ sobj@reductions[[reduc_name]] <- cached$data$reduc
117
+ sobj@commands <- cached$data$commands
118
+ }
119
+
120
+ sobj
121
+ }
122
+
123
+ run_findneighbors <- function(sobj) {
124
+ cached <- get_cached(
125
+ list(sobj = sobj, FindNeighbors = envs$FindNeighbors),
126
+ "FindNeighbors",
127
+ cache_dir
128
+ )
129
+ if (is.null(cached$data)) {
130
+ log_info("Running FindNeighbors ...")
131
+ envs$FindNeighbors$object <- sobj
132
+ envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
133
+ sobj <- do_call(FindNeighbors, envs$FindNeighbors)
134
+ cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
135
+ save_to_cache(cached, "FindNeighbors", cache_dir)
136
+ } else {
137
+ log_info("Loading cached FindNeighbors ...")
138
+ sobj@graphs <- cached$data$graphs
139
+ sobj@commands <- cached$data$commands
140
+ }
141
+
142
+ sobj
143
+ }
144
+
145
+ run_findclusters <- function(sobj) {
146
+ cached <- get_cached(
147
+ list(sobj = sobj, FindClusters = envs$FindClusters),
148
+ "FindClusters",
149
+ cache_dir
150
+ )
151
+ if (is.null(cached$data)) {
152
+ findclusters_args <- envs$FindClusters
153
+ findclusters_args$random.seed <- findclusters_args$random.seed %||% 8525
154
+ resolution <- findclusters_args$resolution <- expand_resolution(findclusters_args$resolution %||% 0.8)
155
+ log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
156
+
157
+ findclusters_args$object <- sobj
158
+ findclusters_args$cluster.name <- paste0("seurat_clusters.", resolution)
159
+ sobj <- do_call(FindClusters, findclusters_args)
160
+
161
+ for (clname in findclusters_args$cluster.name) {
162
+ sobj@meta.data[[clname]] <- recode_clusters(sobj@meta.data[[clname]])
163
+ }
164
+ sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
165
+ Idents(sobj) <- "seurat_clusters"
166
+
167
+ ident_table <- table(Idents(sobj))
168
+ log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
169
+ print(ident_table)
170
+ cat("\n")
171
+
172
+ cached$data <- list(
173
+ clusters = sobj@meta.data[, c(findclusters_args$cluster.name, "seurat_clusters"), drop = FALSE],
174
+ commands = sobj@commands
175
+ )
176
+ save_to_cache(cached, "FindClusters", cache_dir)
177
+ } else {
178
+ log_info("Loading cached FindClusters ...")
179
+
180
+ sobj <- AddMetaData(sobj, metadata = cached$data$clusters)
181
+ Idents(sobj) <- "seurat_clusters"
182
+ sobj@commands <- cached$data$commands
183
+ }
184
+
185
+ sobj
186
+ }
187
+
188
+ run_prepsctfindmarkers <- function(sobj) {
189
+ if (DefaultAssay(sobj) == "SCT") {
190
+ cached <- get_cached(list(sobj = sobj), "PrepSCTFindMarkers", cache_dir)
191
+ if (is.null(cached$data)) {
192
+ # https://github.com/satijalab/seurat/issues/6968
193
+ log_info("Running PrepSCTFindMarkers ...")
194
+ sobj <- PrepSCTFindMarkers(sobj)
195
+ # compose a new SeuratCommand to record it to sobj@commands
196
+ scommand <- sobj@commands$FindClusters
197
+ scommand@name <- "PrepSCTFindMarkers"
198
+ scommand@time.stamp <- Sys.time()
199
+ scommand@assay.used <- "SCT"
200
+ scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
201
+ scommand@params <- list()
202
+ sobj@commands$PrepSCTFindMarkers <- scommand
203
+
204
+ cached$data <- sobj
205
+ save_to_cache(cached, "PrepSCTFindMarkers", cache_dir)
206
+ } else {
207
+ log_info("Loading cached PrepSCTFindMarkers ...")
208
+ sobj <- cached$data
209
+ }
210
+ }
211
+
212
+ sobj
213
+ }
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/caching.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
3
3
 
4
4
  library(Seurat)
5
5
  library(future)
@@ -7,7 +7,6 @@ library(rlang)
7
7
  library(tidyr)
8
8
  library(dplyr)
9
9
  library(digest)
10
- library(clustree)
11
10
 
12
11
  set.seed(8525)
13
12
 
@@ -24,16 +23,10 @@ options(str = strOptions(vec.len = 5, digits.d = 5))
24
23
  options(future.globals.maxSize = 80000 * 1024^2)
25
24
  plan(strategy = "multicore", workers = envs$ncores)
26
25
 
27
- .expand_dims <- function(args, name = "dims") {
28
- # Expand dims from 30 to 1:30
29
- if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
30
- args[[name]] <- 1:args[[name]]
31
- }
32
- args
33
- }
26
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "SeuratClustering-common.R" | source_r }}
34
27
 
35
- envs$RunUMAP <- .expand_dims(envs$RunUMAP)
36
- envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
28
+ envs$RunUMAP <- expand_dims(envs$RunUMAP)
29
+ envs$FindNeighbors <- expand_dims(envs$FindNeighbors)
37
30
 
38
31
  log_info("Reading Seurat object ...")
39
32
  sobj <- readRDS(srtfile)
@@ -53,164 +46,11 @@ if (is.character(envs$cache)) {
53
46
  writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
54
47
  }
55
48
 
56
- if (length(envs$ScaleData) > 0) {
57
- if (DefaultAssay(sobj) == "SCT") {
58
- stop("SCT assay detected, but ScaleData is specified. Use SCTransform instead.")
59
- }
60
- cached <- get_cached(envs$ScaleData, "ScaleData", cache_dir)
61
- if (is.null(cached$data)) {
62
- log_info("Running ScaleData ...")
63
- envs$ScaleData$object <- sobj
64
- sobj <- do_call(ScaleData, envs$ScaleData)
65
- cached$data <- list(assay = sobj@assays$RNA, commands = sobj@commands)
66
- save_to_cache(cached, "ScaleData", cache_dir)
67
- } else {
68
- log_info("Loading cached ScaleData ...")
69
- sobj@assays$RNA <- cached$data$assay
70
- sobj@commands <- cached$data$commands
71
- DefaultAssay(sobj) <- "RNA"
72
- }
73
- } else if (length(envs$SCTransform) > 0) {
74
- if (DefaultAssay(sobj) != "SCT") {
75
- stop("SCT assay not detected, but SCTransform is specified. Use ScaleData instead.")
76
- }
77
- cached <- get_cached(envs$SCTransform, "SCTransform", cache_dir)
78
- asssay <- envs$SCTransform$new.assay.name %||% "SCT"
79
- if (is.null(cached$data)) {
80
- log_info("Running SCTransform ...")
81
- envs$SCTransform$object <- sobj
82
- sobj <- do_call(SCTransform, envs$SCTransform)
83
- cached$data <- list(assay = sobj@assays$SCT, commands = sobj@commands)
84
- save_to_cache(cached, "SCTransform", cache_dir)
85
- } else {
86
- log_info("Loading cached SCTransform ...")
87
- sobj@assays[[assay]] <- cached$data$assay
88
- sobj@commands <- cached$data$commands
89
- DefaultAssay(sobj) <- assay
90
- }
91
- }
92
-
93
- cached <- get_cached(envs$RunUMAP, "RunUMAP", cache_dir)
94
- reduc_name <- envs$RunUMAP$reduction.name %||% "umap"
95
- if (is.null(cached$data)) {
96
- log_info("Running RunUMAP ...")
97
- umap_args <- list_setdefault(
98
- envs$RunUMAP,
99
- object = sobj,
100
- dims = 1:30,
101
- reduction = sobj@misc$integrated_new_reduction %||% "pca"
102
- )
103
- ncells <- ncol(sobj)
104
- umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
105
- umap_method <- envs$RunUMAP$umap.method %||% "uwot"
106
- if (umap_method == "uwot" && is.null(envs$RunUMAP$n.neighbors)) {
107
- # https://github.com/satijalab/seurat/issues/4312
108
- umap_args$n.neighbors <- min(ncells - 1, 30)
109
- }
110
- sobj <- do_call(RunUMAP, umap_args)
111
- cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
112
- save_to_cache(cached, "RunUMAP", cache_dir)
113
- } else {
114
- log_info("Loading cached RunUMAP ...")
115
- sobj@reductions[[reduc_name]] <- cached$data$reduc
116
- sobj@commands <- cached$data$commands
117
- }
118
-
119
- cached <- get_cached(envs$FindNeighbors, "FindNeighbors", cache_dir)
120
- if (is.null(cached$data)) {
121
- log_info("Running FindNeighbors ...")
122
- envs$FindNeighbors$object <- sobj
123
- envs$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
124
- sobj <- do_call(FindNeighbors, envs$FindNeighbors)
125
- cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
126
- save_to_cache(cached, "FindNeighbors", cache_dir)
127
- } else {
128
- log_info("Loading cached FindNeighbors ...")
129
- sobj@graphs <- cached$data$graphs
130
- sobj@commands <- cached$data$commands
131
- }
132
-
133
- envs$FindClusters$random.seed <- envs$FindClusters$random.seed %||% 8525
134
- expand_resolution <- function(resolution) {
135
- expanded_res <- c()
136
- for (res in resolution) {
137
- if (is.numeric(res)) {
138
- expanded_res <- c(expanded_res, res)
139
- } else {
140
- # is.character
141
- parts <- trimws(unlist(strsplit(res, ",")))
142
- for (part in parts) {
143
- if (grepl(":", part)) {
144
- parts <- trimws(unlist(strsplit(part, ":")))
145
- if (length(parts) == 2) { parts <- c(parts, 0.1) }
146
- if (length(parts) != 3) {
147
- stop("Invalid resolution format: {part}. Expected 2 or 3 parts separated by ':' for a range.")
148
- }
149
- parts <- as.numeric(parts)
150
- expanded_res <- c(expanded_res, seq(parts[1], parts[2], by = parts[3]))
151
- } else {
152
- expanded_res <- c(expanded_res, as.numeric(part))
153
- }
154
- }
155
- }
156
- }
157
- # keep the last resolution at last
158
- rev(unique(rev(expanded_res)))
159
- }
160
- resolution <- envs$FindClusters$resolution <- expand_resolution(envs$FindClusters$resolution %||% 0.8)
161
- log_info("Running FindClusters at resolution: {paste(resolution, collapse=',')} ...")
162
-
163
- envs$FindClusters$object <- sobj
164
- sobj <- do_call(FindClusters, envs$FindClusters)
165
-
166
- # recode clusters from 0, 1, 2, ... to c1, c2, c3, ...
167
- recode_clusters <- function(clusters) {
168
- recode <- function(x) paste0("c", as.integer(as.character(x)) + 1)
169
- clusters <- factor(recode(clusters), levels = recode(levels(clusters)))
170
- clusters
171
- }
172
-
173
- graph_name <- envs$FindClusters$graph.name %||% paste0(DefaultAssay(sobj), "_snn_res.")
174
- for (res in resolution) {
175
- cluster_name <- paste0(graph_name, res)
176
- new_cluster_name <- paste0("seurat_clusters.", res)
177
- sobj@meta.data[[new_cluster_name]] <- recode_clusters(sobj@meta.data[[cluster_name]])
178
- }
179
- sobj@meta.data$seurat_clusters <- recode_clusters(sobj@meta.data$seurat_clusters)
180
- Idents(sobj) <- "seurat_clusters"
181
-
182
- ident_table <- table(Idents(sobj))
183
- log_info("- Found {length(ident_table)} clusters at resolution {resolution[length(resolution)]}")
184
- print(ident_table)
185
- cat("\n")
186
-
187
- # plot the tree
188
- if (length(resolution) > 1) {
189
- log_info("Plotting clustree ...")
190
- png(
191
- file.path(joboutdir, "clustree.png"),
192
- res = envs$clustree_devpars$res,
193
- width = envs$clustree_devpars$width,
194
- height = envs$clustree_devpars$height
195
- )
196
- p <- clustree(sobj, prefix = "seurat_clusters.")
197
- print(p)
198
- dev.off()
199
- }
200
-
201
- if (DefaultAssay(sobj) == "SCT") {
202
- # https://github.com/satijalab/seurat/issues/6968
203
- log_info("Running PrepSCTFindMarkers ...")
204
- sobj <- PrepSCTFindMarkers(sobj)
205
- # compose a new SeuratCommand to record it to sobj@commands
206
- scommand <- sobj@commands$FindClusters
207
- scommand@name <- "PrepSCTFindMarkers"
208
- scommand@time.stamp <- Sys.time()
209
- scommand@assay.used <- "SCT"
210
- scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
211
- scommand@params <- list()
212
- sobj@commands$PrepSCTFindMarkers <- scommand
213
- }
49
+ sobj <- run_transformation(sobj)
50
+ sobj <- run_umap(sobj)
51
+ sobj <- run_findneighbors(sobj)
52
+ sobj <- run_findclusters(sobj)
53
+ sobj <- run_prepsctfindmarkers(sobj)
214
54
 
215
55
  log_info("Saving results ...")
216
56
  saveRDS(sobj, file = rdsfile)