biopipen 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (105) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +2 -0
  3. biopipen/core/filters.py +21 -0
  4. biopipen/ns/plot.py +55 -0
  5. biopipen/ns/scrna.py +49 -13
  6. biopipen/ns/web.py +87 -5
  7. biopipen/scripts/bam/CNAClinic.R +2 -1
  8. biopipen/scripts/cellranger/CellRangerCount.py +3 -3
  9. biopipen/scripts/cellranger/CellRangerSummary.R +2 -1
  10. biopipen/scripts/cnv/AneuploidyScore.R +1 -1
  11. biopipen/scripts/cnv/AneuploidyScoreSummary.R +2 -2
  12. biopipen/scripts/delim/RowsBinder.R +1 -1
  13. biopipen/scripts/delim/SampleInfo.R +3 -2
  14. biopipen/scripts/gene/GeneNameConversion.R +2 -2
  15. biopipen/scripts/gsea/Enrichr.R +3 -3
  16. biopipen/scripts/gsea/FGSEA.R +2 -2
  17. biopipen/scripts/gsea/GSEA.R +2 -2
  18. biopipen/scripts/gsea/PreRank.R +2 -2
  19. biopipen/scripts/plot/Heatmap.R +3 -3
  20. biopipen/scripts/plot/Manhattan.R +2 -1
  21. biopipen/scripts/plot/QQPlot.R +1 -1
  22. biopipen/scripts/plot/ROC.R +1 -1
  23. biopipen/scripts/plot/Scatter.R +112 -0
  24. biopipen/scripts/plot/VennDiagram.R +3 -3
  25. biopipen/scripts/regulatory/MotifAffinityTest.R +3 -7
  26. biopipen/scripts/rnaseq/Simulation.R +1 -1
  27. biopipen/scripts/rnaseq/UnitConversion.R +2 -1
  28. biopipen/scripts/scrna/AnnData2Seurat.R +1 -1
  29. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +24 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-common.R +10 -0
  31. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +9 -1
  32. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -8
  33. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +15 -2
  34. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +38 -15
  35. biopipen/scripts/scrna/CellTypeAnnotation.R +3 -0
  36. biopipen/scripts/scrna/CellsDistribution.R +3 -2
  37. biopipen/scripts/scrna/DimPlots.R +1 -1
  38. biopipen/scripts/scrna/ExprImputation-alra.R +1 -1
  39. biopipen/scripts/scrna/MarkersFinder.R +5 -5
  40. biopipen/scripts/scrna/MetaMarkers.R +4 -4
  41. biopipen/scripts/scrna/ModuleScoreCalculator.R +2 -1
  42. biopipen/scripts/scrna/RadarPlots.R +1 -1
  43. biopipen/scripts/scrna/ScFGSEA.R +4 -3
  44. biopipen/scripts/scrna/Seurat2AnnData.R +1 -1
  45. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +73 -0
  46. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +4 -3
  47. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -5
  48. biopipen/scripts/scrna/SeuratClusterStats-hists.R +6 -5
  49. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +4 -3
  50. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -3
  51. biopipen/scripts/scrna/SeuratClusterStats.R +24 -8
  52. biopipen/scripts/scrna/SeuratClustering-common.R +213 -0
  53. biopipen/scripts/scrna/SeuratClustering.R +10 -170
  54. biopipen/scripts/scrna/SeuratMap2Ref.R +65 -31
  55. biopipen/scripts/scrna/SeuratMetadataMutater.R +2 -2
  56. biopipen/scripts/scrna/SeuratPreparing-common.R +452 -0
  57. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +201 -0
  58. biopipen/scripts/scrna/SeuratPreparing.R +22 -562
  59. biopipen/scripts/scrna/SeuratSubClustering.R +24 -39
  60. biopipen/scripts/scrna/TopExpressingGenes.R +1 -1
  61. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +2 -2
  62. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +2 -2
  63. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
  64. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +3 -3
  65. biopipen/scripts/snp/MatrixEQTL.R +1 -1
  66. biopipen/scripts/snp/PlinkCallRate.R +2 -2
  67. biopipen/scripts/snp/PlinkFreq.R +2 -2
  68. biopipen/scripts/snp/PlinkHWE.R +2 -2
  69. biopipen/scripts/snp/PlinkHet.R +2 -2
  70. biopipen/scripts/snp/PlinkIBD.R +2 -2
  71. biopipen/scripts/stats/ChowTest.R +1 -1
  72. biopipen/scripts/stats/DiffCoexpr.R +1 -1
  73. biopipen/scripts/stats/LiquidAssoc.R +1 -1
  74. biopipen/scripts/stats/Mediation.R +11 -9
  75. biopipen/scripts/stats/MetaPvalue.R +4 -1
  76. biopipen/scripts/stats/MetaPvalue1.R +4 -1
  77. biopipen/scripts/tcr/Attach2Seurat.R +1 -1
  78. biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
  79. biopipen/scripts/tcr/CloneResidency.R +2 -2
  80. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  81. biopipen/scripts/tcr/Immunarch-basic.R +0 -4
  82. biopipen/scripts/tcr/Immunarch-clonality.R +0 -4
  83. biopipen/scripts/tcr/Immunarch-diversity.R +2 -24
  84. biopipen/scripts/tcr/Immunarch-geneusage.R +0 -2
  85. biopipen/scripts/tcr/Immunarch-kmer.R +0 -2
  86. biopipen/scripts/tcr/Immunarch-overlap.R +0 -2
  87. biopipen/scripts/tcr/Immunarch-spectratyping.R +0 -2
  88. biopipen/scripts/tcr/Immunarch-tracking.R +0 -2
  89. biopipen/scripts/tcr/Immunarch-vjjunc.R +0 -2
  90. biopipen/scripts/tcr/Immunarch.R +43 -11
  91. biopipen/scripts/tcr/ImmunarchFilter.R +1 -1
  92. biopipen/scripts/tcr/ImmunarchLoading.R +2 -2
  93. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  94. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  95. biopipen/scripts/tcr/TCRClustering.R +2 -2
  96. biopipen/scripts/tcr/TESSA.R +2 -2
  97. biopipen/scripts/vcf/TruvariBenchSummary.R +2 -2
  98. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  99. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  100. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  101. biopipen/scripts/web/gcloud_common.py +49 -0
  102. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/METADATA +1 -1
  103. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/RECORD +105 -96
  104. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/WHEEL +0 -0
  105. {biopipen-0.29.2.dist-info → biopipen-0.30.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  # Script for regulatory.MotifAffinityTest
2
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
3
 
3
- source("{{biopipen_dir}}/utils/misc.R")
4
4
  library(BiocParallel)
5
5
  library(BSgenome)
6
6
  library(universalmotif)
@@ -215,12 +215,8 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
215
215
 
216
216
  if (tool == "motifbreakr") {
217
217
  motifbreakr_args <- {{envs.motifbreakr_args | r}}
218
- {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" %}
219
- # {{ sourcefile | getmtime }}
220
- source("{{sourcefile}}")
218
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" | source_r }}
221
219
  } else { # atsnp
222
220
  atsnp_args <- {{envs.atsnp_args | r}}
223
- {% set sourcefile = biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" %}
224
- # {{ sourcefile | getmtime }}
225
- source("{{sourcefile}}")
221
+ {{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" | source_r }}
226
222
  }
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  ngenes <- {{in.ngenes | r}}
4
4
  nsamples <- {{in.nsamples | r}}
@@ -1,4 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
2
3
  library(rlang)
3
4
  library(glue)
4
5
 
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(rlang)
4
4
  library(Seurat)
@@ -1,5 +1,3 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
-
3
1
  library(rlang)
4
2
  library(hdf5r)
5
3
  library(dplyr)
@@ -8,6 +6,7 @@ library(Seurat)
8
6
  sobjfile <- {{in.sobjfile | r}}
9
7
  outfile <- {{out.outfile | r}}
10
8
  newcol <- {{envs.newcol | r}}
9
+ merge_same_labels <- {{envs.merge | r}}
11
10
  celltypist_args <- {{envs.celltypist_args | r}}
12
11
 
13
12
  outdir <- dirname(outfile)
@@ -33,6 +32,7 @@ if (!file.exists(modelfile)) {
33
32
  sobj <- NULL
34
33
  outtype <- tolower(tools::file_ext(outfile)) # .rds, .h5ad, .h5seurat
35
34
  if (!endsWith(sobjfile, ".h5ad")) {
35
+ log_info("Convert input to H5AD ...")
36
36
  library(SeuratDisk)
37
37
 
38
38
  assay <- celltypist_args$assay
@@ -123,8 +123,7 @@ if (file.exists(celltypist_outfile) &&
123
123
  if (isTRUE(celltypist_args$majority_voting)) {
124
124
  command <- paste(command, "-v")
125
125
  }
126
- print("Running celltypist:")
127
- print(command)
126
+ log_info("Running celltypist:")
128
127
  log_debug("- {command}")
129
128
  rc <- system(command)
130
129
  if (rc != 0) {
@@ -135,11 +134,21 @@ if (file.exists(celltypist_outfile) &&
135
134
  if (outtype == "h5ad") {
136
135
  # log_info("Using H5AD from celltypist as output directly ...")
137
136
  # file.rename(paste0(out_prefix, ".h5ad"), outfile)
137
+ if (merge_same_labels) {
138
+ log_warn("- Merging clusters with the same labels is not supported for h5ad outfile ...")
139
+ }
138
140
  } else if (outtype == "h5seurat") {
139
141
  log_info("Converting H5AD from celltypist to H5Seurat ...")
140
142
  # outfile is cleaned by the pipeline anyway
141
143
  Convert(
142
- celltypist_outfile, assay = assay %||% 'RNA', dest = outfile, overwrite = TRUE)
144
+ celltypist_outfile,
145
+ assay = assay %||% 'RNA',
146
+ dest = outfile,
147
+ overwrite = TRUE
148
+ )
149
+ if (merge_same_labels) {
150
+ log_warn("- Merging clusters with the same labels is not supported for h5seurat outfile ...")
151
+ }
143
152
  } else if (outtype == "rds") {
144
153
  if (is.null(sobj)) {
145
154
  log_info("Converting H5AD from celltypist to RDS ...")
@@ -178,7 +187,10 @@ if (outtype == "h5ad") {
178
187
  # end
179
188
 
180
189
  sobj <- LoadH5Seurat(h5seurat_file)
181
- saveRDS(sobj, outfile)
190
+ if (merge_same_labels) {
191
+ log_info("Merging clusters with the same labels ...")
192
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
193
+ }
182
194
  } else {
183
195
  log_info("Attaching celltypist results to Seurat object ...")
184
196
 
@@ -228,9 +240,13 @@ if (outtype == "h5ad") {
228
240
  } else if (!is.null(newcol)) {
229
241
  sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
230
242
  }
231
- log_info("Saving Seurat object in RDS ...")
232
- saveRDS(sobj, outfile)
243
+ if (merge_same_labels) {
244
+ log_info("Merging clusters with the same labels ...")
245
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
246
+ }
233
247
  }
248
+ log_info("Saving Seurat object in RDS ...")
249
+ saveRDS(sobj, outfile)
234
250
  } else {
235
251
  stop(paste0("Unknown output type: ", outtype))
236
252
  }
@@ -0,0 +1,10 @@
1
+ merge_clusters_with_same_labels <- function(sobj, newcol) {
2
+ if (is.null(newcol)) {
3
+ sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
4
+ Idents(sobj) <- "seurat_clusters"
5
+ } else {
6
+ sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
7
+ }
8
+
9
+ sobj
10
+ }
@@ -1,14 +1,17 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
1
  library(Seurat)
3
2
 
4
3
  sobjfile <- {{in.sobjfile | r}}
5
4
  outfile <- {{out.outfile | r}}
6
5
  celltypes <- {{envs.cell_types | r}}
7
6
  newcol <- {{envs.newcol | r}}
7
+ merge_same_labels <- {{envs.merge | r}}
8
8
 
9
9
  if (is.null(celltypes) || length(celltypes) == 0) {
10
10
  log_warn("No cell types are given!")
11
11
 
12
+ if (merge_same_labels) {
13
+ log_warn("Ignoring 'envs.merge' because no cell types are given!")
14
+ }
12
15
  # create a symbolic link to the input file
13
16
  file.symlink(sobjfile, outfile)
14
17
  } else {
@@ -55,5 +58,10 @@ if (is.null(celltypes) || length(celltypes) == 0) {
55
58
  Idents(sobj) <- "seurat_clusters"
56
59
  }
57
60
 
61
+ if (merge_same_labels) {
62
+ log_info("Merging clusters with the same labels ...")
63
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
64
+ }
65
+
58
66
  saveRDS(sobj, outfile)
59
67
  }
@@ -2,21 +2,20 @@ library(Seurat)
2
2
  library(dplyr)
3
3
  library(hitype)
4
4
 
5
- source("{{biopipen_dir}}/utils/misc.R")
6
-
7
5
  sobjfile = {{in.sobjfile | r}}
8
6
  outfile = {{out.outfile | r}}
9
7
  tissue = {{envs.hitype_tissue | r}}
10
8
  db = {{envs.hitype_db | r}}
11
9
  newcol = {{envs.newcol | r}}
10
+ merge_same_labels = {{envs.merge | r}}
12
11
 
13
12
  if (is.null(db)) { stop("`envs.hitype_db` is not set") }
14
13
 
15
- print("- Reading Seurat object...")
14
+ log_info("Reading Seurat object...")
16
15
  sobj = readRDS(sobjfile)
17
16
 
18
17
  # prepare gene sets
19
- print("- Preparing gene sets...")
18
+ log_info("Preparing gene sets...")
20
19
  if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
21
20
  gs_list = gs_prepare(eval(as.symbol(db)), tissue)
22
21
  } else {
@@ -24,10 +23,10 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
24
23
  }
25
24
 
26
25
  # run RunHitype
27
- print("- Running RunHitype...")
26
+ log_info("Running RunHitype...")
28
27
  sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
29
28
 
30
- print("- Renaming cell types...")
29
+ log_info("Renaming cell types...")
31
30
  hitype_levels = sobj@meta.data %>%
32
31
  select(seurat_clusters, hitype) %>%
33
32
  distinct(seurat_clusters, .keep_all = TRUE) %>%
@@ -42,10 +41,15 @@ if (is.null(newcol)) {
42
41
  sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
43
42
  }
44
43
 
45
- print("- Saving Seurat object...")
44
+ if (merge_same_labels) {
45
+ log_info("Merging clusters with the same labels...")
46
+ sobj = merge_clusters_with_same_labels(sobj, newcol)
47
+ }
48
+
49
+ log_info("Saving Seurat object...")
46
50
  saveRDS(sobj, outfile)
47
51
 
48
- print("- Saving the mappings ...")
52
+ log_info("Saving the mappings ...")
49
53
  if (is.null(newcol)) {
50
54
  celltypes = sobj@meta.data %>%
51
55
  group_by(seurat_clusters_id) %>%
@@ -1,4 +1,3 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
1
  library(scCATCH)
3
2
  library(Seurat)
4
3
 
@@ -6,6 +5,7 @@ sobjfile = {{in.sobjfile | r}}
6
5
  outfile = {{out.outfile | r}}
7
6
  sccatch_args = {{envs.sccatch_args | r}}
8
7
  newcol = {{envs.newcol | r}}
8
+ merge_same_labels = {{envs.merge | r}}
9
9
 
10
10
  if (!is.null(sccatch_args$marker)) {
11
11
  cellmatch = readRDS(sccatch_args$marker)
@@ -17,14 +17,20 @@ if (is.integer(sccatch_args$use_method)) {
17
17
  sccatch_args$use_method = as.character(sccatch_args$use_method)
18
18
  }
19
19
 
20
+ log_info("Reading Seurat object...")
20
21
  sobj = readRDS(sobjfile)
21
22
 
23
+ log_info("Running createscCATCH ...")
22
24
  obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
23
25
  sccatch_args$object = obj
24
26
 
27
+ log_info("Running findmarkergene ...")
25
28
  obj = do_call(findmarkergene, sccatch_args)
29
+
30
+ log_info("Running findcelltype ...")
26
31
  obj = findcelltype(object = obj)
27
32
 
33
+ log_info("Saving the mappings ...")
28
34
  write.table(
29
35
  obj@celltype,
30
36
  file = file.path(dirname(outfile), "cluster2celltype.tsv"),
@@ -36,7 +42,7 @@ celltypes = as.list(obj@celltype$cell_type)
36
42
  names(celltypes) = obj@celltype$cluster
37
43
 
38
44
  if (length(celltypes) == 0) {
39
- warning("No cell types annotated from the database!")
45
+ log_warn("- No cell types annotated from the database!")
40
46
  } else {
41
47
  if (is.null(newcol)) {
42
48
  sobj$seurat_clusters_id = Idents(sobj)
@@ -49,5 +55,12 @@ if (length(celltypes) == 0) {
49
55
  sobj[[newcol]] = Idents(sobj)
50
56
  Idents(sobj) = "seurat_clusters"
51
57
  }
58
+
59
+ if (merge_same_labels) {
60
+ log_info("Merging clusters with the same labels ...")
61
+ sobj = merge_clusters_with_same_labels(sobj, newcol)
62
+ }
52
63
  }
64
+
65
+ log_info("Saving Seurat object ...")
53
66
  saveRDS(sobj, outfile)
@@ -1,34 +1,37 @@
1
1
  library(dplyr)
2
2
  library(HGNChelper)
3
3
  library(Seurat)
4
+ library(rlang)
4
5
 
5
- source("{{biopipen_dir}}/utils/misc.R")
6
- source("{{biopipen_dir}}/scripts/scrna/sctype.R")
6
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "sctype.R" | source_r }}
7
7
 
8
8
  sobjfile = {{in.sobjfile | r}}
9
9
  outfile = {{out.outfile | r}}
10
10
  tissue = {{envs.sctype_tissue | r}}
11
11
  db = {{envs.sctype_db | r}}
12
12
  newcol = {{envs.newcol | r}}
13
+ merge_same_labels = {{envs.merge | r}}
13
14
 
14
15
  if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
15
16
 
16
- print("- Reading Seurat object...")
17
+ log_info("Reading Seurat object...")
17
18
  sobj = readRDS(sobjfile)
18
19
 
19
20
  # prepare gene sets
20
- print("- Preparing gene sets...")
21
+ log_info("Preparing gene sets...")
21
22
  gs_list = gene_sets_prepare(db, tissue)
22
23
 
23
24
  scRNAseqData = GetAssayData(sobj, layer = "scale.data")
24
25
  idents = as.character(unique(Idents(sobj)))
25
26
  idents = idents[order(as.numeric(idents))]
26
27
 
28
+ log_info("Working on different levels of cell type labels ...")
27
29
  cell_types_list = list()
28
30
  for (i in seq_along(gs_list)) {
31
+ log_info("- Working on level {i} ...")
29
32
  if (is.null(gs_list[[i]])) next
30
33
 
31
- print(paste0("- Calculating cell-type scores for level ", i, "..."))
34
+ log_info(" Calculating cell-type scores ...")
32
35
  es.max = sctype_score(
33
36
  scRNAseqData = scRNAseqData,
34
37
  scaled = TRUE,
@@ -36,7 +39,7 @@ for (i in seq_along(gs_list)) {
36
39
  gs2 = gs_list[[i]]$gs_negative
37
40
  )
38
41
 
39
- print(paste0("- Merging cell-type scores by cluster for level ", i, "..."))
42
+ log_info(" Merging cell-type scores by cluster ...")
40
43
  cl_resutls = do_call(
41
44
  "rbind",
42
45
  lapply(
@@ -59,12 +62,12 @@ for (i in seq_along(gs_list)) {
59
62
  write("\n####### sctype_scores_count ########", stderr())
60
63
  write(capture.output(sctype_scores_count), stderr())
61
64
  write("\n####################################", stderr())
62
- warning("Scores tied in the above clusters.", immediate. = TRUE)
65
+ log_info(" Scores tied in the above clusters.", immediate. = TRUE)
63
66
  }
64
67
 
65
68
  if (length(gs_list) == 1 || i > 1) {
66
69
  # set low-confident (low ScType score) clusters to "unknown"
67
- print("- Setting low-confident clusters to 'Unknown'...")
70
+ log_info(" Setting low-confident clusters to 'Unknown'...")
68
71
  sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
69
72
  }
70
73
 
@@ -82,7 +85,7 @@ for (i in seq_along(gs_list)) {
82
85
  if (length(cell_types_list) == 1) {
83
86
  celltypes = cell_types_list[[1]]
84
87
  } else {
85
- print("- Merging cell types at all levels ...")
88
+ log_info("Merging cell types at all levels ...")
86
89
  celltypes = list()
87
90
 
88
91
  for (i in idents) {
@@ -97,7 +100,18 @@ if (length(cell_types_list) == 1) {
97
100
  }
98
101
 
99
102
 
100
- print("- Renaming cell types...")
103
+ log_info("Renaming cell types...")
104
+ ct_numbering = list()
105
+ for (key in names(celltypes)) {
106
+ ct = celltypes[[key]]
107
+ ct_numbering[[ct]] = ct_numbering[[ct]] %||% 0
108
+ if (ct_numbering[[ct]] > 0) {
109
+ celltypes[[key]] = paste0(ct, ".", ct_numbering[[ct]])
110
+ }
111
+ ct_numbering[[ct]] = ct_numbering[[ct]] + 1
112
+ }
113
+
114
+ celltypes = as.list(celltypes)
101
115
  if (is.null(newcol)) {
102
116
  sobj$seurat_clusters_id = sobj$seurat_clusters
103
117
  celltypes$object = sobj
@@ -109,12 +123,18 @@ if (is.null(newcol)) {
109
123
  sobj[[newcol]] = Idents(sobj)
110
124
  Idents(sobj) = "seurat_clusters"
111
125
  }
112
-
113
- print("- Saving Seurat object...")
114
- saveRDS(sobj, outfile)
115
-
116
- print("- Saving the mappings ...")
117
126
  celltypes$object = NULL
127
+ gc()
128
+
129
+ if (merge_same_labels) {
130
+ log_info("Merging clusters with the same labels...")
131
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
132
+ celltypes <- lapply(celltypes, function(ct) {
133
+ sub("\\.\\d+$", "", ct)
134
+ })
135
+ }
136
+
137
+ log_info("Saving the mappings ...")
118
138
  write.table(
119
139
  data.frame(
120
140
  Cluster = names(celltypes),
@@ -126,3 +146,6 @@ write.table(
126
146
  quote = FALSE,
127
147
  row.names = FALSE
128
148
  )
149
+
150
+ log_info("Saving Seurat object...")
151
+ saveRDS(sobj, outfile)
@@ -1,5 +1,8 @@
1
1
  set.seed(8525)
2
2
 
3
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
4
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "CellTypeAnnotation-common.R" | source_r }}
5
+
3
6
  {% if envs.tool == "hitype" %}
4
7
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
5
8
  {% elif envs.tool == "sctype" %}
@@ -1,5 +1,6 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
+
3
4
  library(Seurat)
4
5
  library(rlang)
5
6
  library(tidyr)
@@ -1,7 +1,7 @@
1
1
  library(Seurat)
2
2
  library(dplyr)
3
3
 
4
- source("{{biopipen_dir}}/utils/misc.R")
4
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
5
5
 
6
6
  srtfile = {{in.srtobj | r}}
7
7
  {% if in.configfile %}
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(SeuratWrappers)
4
4
  library(Seurat)
@@ -1,6 +1,6 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/caching.R")
3
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "caching.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
4
4
 
5
5
  library(rlang)
6
6
  library(dplyr)
@@ -70,8 +70,8 @@ if (defassay == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
70
70
 
71
71
  srtobj <- PrepSCTFindMarkers(srtobj)
72
72
  # compose a new SeuratCommand to record it to srtobj@commands
73
- commands <- names(srtobj@commands)
74
- scommand <- srtobj@commands[[commands[length(commands)]]]
73
+ commands <- names(pbmc_small@commands)
74
+ scommand <- pbmc_small@commands[[commands[length(commands)]]]
75
75
  scommand@name <- "PrepSCTFindMarkers"
76
76
  scommand@time.stamp <- Sys.time()
77
77
  scommand@assay.used <- "SCT"
@@ -1,5 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
3
3
 
4
4
  library(rlang)
5
5
  library(dplyr)
@@ -41,8 +41,8 @@ if (DefaultAssay(srtobj) == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@com
41
41
 
42
42
  srtobj <- PrepSCTFindMarkers(srtobj)
43
43
  # compose a new SeuratCommand to record it to srtobj@commands
44
- commands <- names(srtobj@commands)
45
- scommand <- srtobj@commands[[commands[length(commands)]]]
44
+ commands <- names(pbmc_small@commands)
45
+ scommand <- pbmc_small@commands[[commands[length(commands)]]]
46
46
  scommand@name <- "PrepSCTFindMarkers"
47
47
  scommand@time.stamp <- Sys.time()
48
48
  scommand@assay.used <- "SCT"
@@ -1,4 +1,5 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+
2
3
  library(Seurat)
3
4
  library(dplyr)
4
5
 
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(Seurat)
4
4
  library(rlang)
@@ -1,6 +1,7 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
- source("{{biopipen_dir}}/utils/gsea.R")
3
- source("{{biopipen_dir}}/utils/mutate_helpers.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
3
+ {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
4
+
4
5
  library(rlang)
5
6
  library(Seurat)
6
7
  library(tidyseurat)
@@ -1,4 +1,4 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
2
 
3
3
  library(rlang)
4
4
  library(Seurat)
@@ -0,0 +1,73 @@
1
+ # srtobj, clustrees_defaults, clustrees
2
+ log_info("clustrees:")
3
+ if (
4
+ (is.null(clustrees) || length(clustrees) == 0) &&
5
+ (is.null(clustrees_defaults$prefix) || clustrees_defaults$prefix == "")) {
6
+ log_warn("- no cases, skipping intentionally ...")
7
+ } else { # clustrees set or prefix is not empty
8
+ library(clustree)
9
+ odir = file.path(outdir, "clustrees")
10
+ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
11
+
12
+ if ((is.null(clustrees) || length(clustrees) == 0) && clustrees_defaults$prefix == "_auto") {
13
+ clustrees <- list()
14
+ for (key in names(srtobj@commands)) {
15
+ if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
16
+ pref <- substring(key, 14)
17
+ if (pref == "") {
18
+ pref <- "seurat_clusters"
19
+ }
20
+
21
+ clustrees[[pref]] <- list(prefix = pref)
22
+ }
23
+ }
24
+ }
25
+ if (length(clustrees) == 0) {
26
+ log_warn("- no cases found, skipping ...")
27
+ } else {
28
+ reports <- list()
29
+ for (name in names(clustrees)) {
30
+ if (is.null(clustrees[[name]]$prefix)) {
31
+ stop(paste0("clustrees: prefix is required for case: ", name))
32
+ }
33
+ case <- list_update(clustrees_defaults, clustrees[[name]])
34
+
35
+ devpars <- case$devpars
36
+ devpars$width <- devpars$width %||% clustrees_defaults$devpars$width %||% 800
37
+ devpars$height <- devpars$height %||% clustrees_defaults$devpars$height %||% 1000
38
+ devpars$res <- devpars$res %||% clustrees_defaults$devpars$res %||% 100
39
+ case$devpars <- NULL
40
+ prefix <- sub("\\.$", "", case$prefix)
41
+ log_info("- Case: {name} ...")
42
+ case$prefix <- paste0(prefix, ".")
43
+ case$x <- srtobj@meta.data %>% select(starts_with(case$prefix))
44
+ case$x <- case$x[complete.cases(case$x), , drop = FALSE]
45
+
46
+ command <- srtobj@commands[[paste0("FindClusters.", prefix)]] %||%
47
+ (if(prefix == "seurat_clusters") srtobj@commands$FindClusters else NULL)
48
+
49
+ clustree_file <- file.path(odir, paste0(prefix, ".clustree.png"))
50
+ png(clustree_file, width = devpars$width, height = devpars$height, res = devpars$res)
51
+ p <- do_call(clustree, case)
52
+ print(p)
53
+ dev.off()
54
+
55
+ if (is.null(command)) {
56
+ resolution <- substring(colnames(case$x), nchar(case$prefix) + 1)
57
+ } else {
58
+ resolution <- command$resolution
59
+ }
60
+ resolution_used <- resolution[length(resolution)]
61
+
62
+ reports[[length(reports) + 1]] <- list(
63
+ kind = "table_image",
64
+ src = clustree_file,
65
+ name = name,
66
+ descr = paste0("Resolutions: ", paste(resolution, collapse = ", "), "; resolution used: ", resolution_used)
67
+ )
68
+ }
69
+ reports$h1 <- "Clustree plots"
70
+ reports$ui <- "table_of_images"
71
+ do.call(add_report, reports)
72
+ }
73
+ }
@@ -1,13 +1,14 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
- dimplots = {{envs.dimplots | r: todot="-", skip=1}}
3
+ # dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
4
+ # dimplots = {{envs.dimplots | r: todot="-", skip=1}}
5
+ log_info("dimplots:")
5
6
 
6
7
  odir = file.path(outdir, "dimplots")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
8
9
 
9
10
  do_one_dimplot = function(name) {
10
- log_info(paste0("Doing dimplots for: ", name))
11
+ log_info("- Case: {name}")
11
12
 
12
13
  case = list_update(dimplots_defaults, dimplots[[name]])
13
14
  case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
@@ -1,7 +1,8 @@
1
1
  # Loaded variables: srtfile, outdir, srtobj
2
2
 
3
- features_defaults = {{envs.features_defaults | r: todot="-"}}
4
- features = {{envs.features | r: todot="-", skip=1}}
3
+ # features_defaults = {{envs.features_defaults | r: todot="-"}}
4
+ # features = {{envs.features | r: todot="-", skip=1}}
5
+ log_info("features:")
5
6
 
6
7
  odir = file.path(outdir, "features")
7
8
  dir.create(odir, recursive=TRUE, showWarnings=FALSE)
@@ -36,7 +37,7 @@ dir.create(odir, recursive=TRUE, showWarnings=FALSE)
36
37
  }
37
38
 
38
39
  do_one_features = function(name) {
39
- log_info("Doing features for: {name}")
40
+ log_info("- Case: {name}")
40
41
 
41
42
  case = list_update(features_defaults, features[[name]])
42
43
  case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
@@ -105,7 +106,7 @@ do_one_features = function(name) {
105
106
  if (is.null(ncol)) { ncol = 1 }
106
107
  list(
107
108
  width = 400 * ncol,
108
- height = ceiling(length(features) / ncol + max_nchar_idents * .05) * 150,
109
+ height = ceiling(length(features) / ncol) * (max_nchar_idents * .1 + 275),
109
110
  res = 100
110
111
  )
111
112
  }
@@ -398,7 +399,7 @@ do_one_features = function(name) {
398
399
  devpars = list_update(default_devpars(case$features, case$ncol), devpars)
399
400
  if (kind == "heatmap") {
400
401
  if (!exists("downsample") || is.null(downsample)) {
401
- log_warn("- `downsample` is not specified for `heatmap`, using `downsample=1000`")
402
+ log_warn(" 'downsample' is not specified for `heatmap`, using `downsample=1000`")
402
403
  downsample = 1000
403
404
  }
404
405
  if (is.numeric(downsample)) {