biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -2,16 +2,23 @@ library(rlang)
2
2
  library(hdf5r)
3
3
  library(dplyr)
4
4
  library(Seurat)
5
+ library(biopipen.utils)
5
6
 
6
7
  sobjfile <- {{in.sobjfile | r}}
7
8
  outfile <- {{out.outfile | r}}
8
9
  newcol <- {{envs.newcol | r}}
9
10
  merge_same_labels <- {{envs.merge | r}}
10
11
  celltypist_args <- {{envs.celltypist_args | r}}
12
+ outtype <- {{envs.outtype | r }}
13
+ if (identical(outtype, "input")) {
14
+ outtype <- tolower(tools::file_ext(outfile)) # rds, h5ad, qs/qs2
15
+ }
11
16
 
12
17
  outdir <- dirname(outfile)
13
18
  outprefix <- file.path(outdir, tools::file_path_sans_ext(basename(outfile)))
14
19
 
20
+ log <- get_logger()
21
+
15
22
  if (is.null(celltypist_args$model)) {
16
23
  stop("Please specify a model for celltypist (envs.celltypist_args.model)")
17
24
  } else if (!file.exists(celltypist_args$model)) {
@@ -30,74 +37,61 @@ if (!file.exists(modelfile)) {
30
37
  }
31
38
 
32
39
  sobj <- NULL
33
- outtype <- tolower(tools::file_ext(outfile)) # .rds, .h5ad, .h5seurat
34
40
  if (!endsWith(sobjfile, ".h5ad")) {
35
- log_info("Convert input to H5AD ...")
36
- library(SeuratDisk)
37
-
38
- assay <- celltypist_args$assay
39
- if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
40
- h5s_file <- paste0(outprefix, ".h5seurat")
41
- if (file.exists(h5s_file) && (file.mtime(h5s_file) < file.mtime(sobjfile))) {
42
- file.remove(h5s_file)
43
- }
44
- if (!file.exists(h5s_file)) {
45
- log_info("Reading RDS file ...")
46
- sobj <- readRDS(sobjfile)
47
- assay <- assay %||% DefaultAssay(sobj)
48
- # In order to convert to h5ad
49
- # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
50
- sobj_v3 <- sobj
51
- sobj_v3$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
52
- DefaultAssay(sobj_v3) <- "RNAv3"
53
- sobj_v3$RNA <- NULL
54
- sobj_v3 <- RenameAssays(sobj_v3, RNAv3 = "RNA")
55
-
56
- log_info("Saving to H5Seurat file ...")
57
- SaveH5Seurat(sobj_v3, h5s_file)
58
- rm(sobj_v3)
59
- } else if (outtype == "rds") {
60
- log_info("Reading RDS file ...")
61
- sobj <- readRDS(sobjfile)
62
- assay <- assay %||% DefaultAssay(sobj)
63
- log_info("Using existing H5Seurat file ...")
64
- } else {
65
- log_info("Using existing H5Seurat file ...")
41
+ sobj <- read_obj(sobjfile)
42
+ if (is.null(celltypist_args$over_clustering)) {
43
+ # find the default ident name in meta.data
44
+ for (col in colnames(sobj@meta.data)) {
45
+ if (!is.factor(sobj@meta.data[[col]])) { next }
46
+ if (isTRUE(all.equal(Idents(sobj), sobj@meta.data[[col]]))) {
47
+ celltypist_args$over_clustering <- col
48
+ break
49
+ }
66
50
  }
67
- sobjfile <- h5s_file
68
51
  }
69
- if (!endsWith(sobjfile, ".h5seurat")) {
70
- stop(paste0("Unknown input file format: ",
71
- tools::file_ext(sobjfile),
72
- ". Supported formats: .rds, .RDS, .h5ad, .h5seurat"))
52
+ if (is.null(celltypist_args$over_clustering)) {
53
+ celltypist_args$over_clustering <- FALSE
73
54
  }
74
- if (!endsWith(sobjfile, ".h5ad")) { # .h5seurat
55
+ if (!isFALSE(celltypist_args$over_clustering)) {
56
+ destfile <- paste0(outprefix, ".", celltypist_args$over_clustering, ".h5ad")
57
+ } else {
75
58
  destfile <- paste0(outprefix, ".h5ad")
76
- if (file.exists(destfile) && (file.mtime(destfile) < file.mtime(sobjfile))) {
77
- file.remove(destfile)
78
- }
79
- if (file.exists(destfile)) {
80
- log_info("Using existing H5AD file ...")
81
- } else {
82
- log_info("Converting to H5AD file ...")
83
- Convert(sobjfile, dest = destfile, assay = assay %||% "RNA")
84
- }
85
- sobjfile <- destfile
86
59
  }
60
+
61
+ if (file.exists(destfile) && (file.mtime(destfile) < file.mtime(sobjfile))) {
62
+ file.remove(destfile)
63
+ }
64
+ if (file.exists(destfile)) {
65
+ log$warn("Using existing H5AD file: {destfile} ...")
66
+ } else {
67
+ log$info("Converting to H5AD file ...")
68
+ ConvertSeuratToAnnData(
69
+ sobj,
70
+ outfile = destfile,
71
+ assay = celltypist_args$assay %||% "RNA",
72
+ log = log
73
+ )
74
+ }
75
+ sobjfile <- destfile
87
76
  }
88
77
 
89
78
  # sobjfile h5ad ensured
90
79
  # use celltypist to annotate
91
- log_info("Annotating cell types using celltypist ...")
80
+ log$info("Annotating cell types using celltypist ...")
81
+ # celltypist_script <- file.path(
82
+ # "{ {biopipen_dir} }", "scripts", "scrna", "celltypist-wrapper.py"
83
+ # )
84
+ # In case this script is running in the cloud and <biopipen_dir> can not be found in there
85
+ # In stead, we use the python command, which is associated with the cloud environment,
86
+ # to get the biopipen directory
87
+ biopipen_dir <- get_biopipen_dir(celltypist_args$python)
92
88
  celltypist_script <- file.path(
93
- "{{biopipen_dir}}", "scripts", "scrna", "celltypist-wrapper.py"
89
+ biopipen_dir, "scripts", "scrna", "celltypist-wrapper.py"
94
90
  )
95
91
 
96
92
  if (outtype == "h5ad") {
97
93
  celltypist_outfile <- outfile
98
- } else if (outtype == "h5seurat") {
99
- celltypist_outfile <- paste0(outprefix, ".celltypist.h5ad")
100
- } else if (outtype == "rds") {
94
+ } else if (outtype == "rds" || outtype == "qs" || outtype == "qs2") {
101
95
  ext <- if (is.null(sobj)) ".h5ad" else ".txt"
102
96
  celltypist_outfile <- paste0(outprefix, ".celltypist", ext)
103
97
  } else {
@@ -106,7 +100,7 @@ if (outtype == "h5ad") {
106
100
 
107
101
  if (file.exists(celltypist_outfile) &&
108
102
  (file.mtime(celltypist_outfile) > file.mtime(sobjfile))) {
109
- log_info("Using existing celltypist results ...")
103
+ log$warn("Using existing celltypist results: {celltypist_outfile} ...")
110
104
  } else {
111
105
  command <- paste(
112
106
  paste0("CELLTYPIST_FOLDER='", outdir, "'"),
@@ -123,76 +117,29 @@ if (file.exists(celltypist_outfile) &&
123
117
  if (isTRUE(celltypist_args$majority_voting)) {
124
118
  command <- paste(command, "-v")
125
119
  }
126
- log_info("Running celltypist:")
127
- log_debug("- {command}")
120
+ log$info("Running celltypist:")
121
+ log$debug("- {command}")
128
122
  rc <- system(command)
129
123
  if (rc != 0) {
130
- stop("Failed to run celltypist")
124
+ stop("Failed to run celltypist. Check the job.stderr file to see the error message.")
131
125
  }
132
126
  }
133
127
 
134
128
  if (outtype == "h5ad") {
135
- # log_info("Using H5AD from celltypist as output directly ...")
136
- # file.rename(paste0(out_prefix, ".h5ad"), outfile)
137
- if (merge_same_labels) {
138
- log_warn("- Merging clusters with the same labels is not supported for h5ad outfile ...")
139
- }
140
- } else if (outtype == "h5seurat") {
141
- log_info("Converting H5AD from celltypist to H5Seurat ...")
142
- # outfile is cleaned by the pipeline anyway
143
- Convert(
144
- celltypist_outfile,
145
- assay = assay %||% 'RNA',
146
- dest = outfile,
147
- overwrite = TRUE
148
- )
149
129
  if (merge_same_labels) {
150
- log_warn("- Merging clusters with the same labels is not supported for h5seurat outfile ...")
130
+ log$warn("- Merging clusters with the same labels is not supported and is ignored for h5ad outfile ...")
151
131
  }
152
- } else if (outtype == "rds") {
132
+ } else if (outtype == "rds" || outtype == "qs" || outtype == "qs2") {
153
133
  if (is.null(sobj)) {
154
- log_info("Converting H5AD from celltypist to RDS ...")
155
- h5seurat_file <- paste0(outprefix, ".celltypist.h5seurat")
156
- if (file.exists(h5seurat_file) &&
157
- (file.mtime(h5seurat_file) > file.mtime(celltypist_outfile))) {
158
- log_info("- Using existing H5Seurat file ...")
159
- } else {
160
- log_info("- Converting to h5seurat ...")
161
- Convert(
162
- celltypist_outfile,
163
- assay = assay %||% 'RNA', dest = h5seurat_file, overwrite = TRUE)
164
- }
165
- log_info("- Converting to RDS ...")
166
- # Fix Missing required datasets 'levels' and 'values'
167
- # https://github.com/mojaveazure/seurat-disk/issues/109#issuecomment-1722394184
168
- f <- H5File$new(h5seurat_file, "r+")
169
- groups <- f$ls(recursive = TRUE)
170
-
171
- for (name in groups$name[grepl("categories", groups$name)]) {
172
- names <- strsplit(name, "/")[[1]]
173
- names <- c(names[1:length(names) - 1], "levels")
174
- new_name <- paste(names, collapse = "/")
175
- f[[new_name]] <- f[[name]]
176
- }
177
-
178
- for (name in groups$name[grepl("codes", groups$name)]) {
179
- names <- strsplit(name, "/")[[1]]
180
- names <- c(names[1:length(names) - 1], "values")
181
- new_name <- paste(names, collapse = "/")
182
- f[[new_name]] <- f[[name]]
183
- grp <- f[[new_name]]
184
- grp$write(args = list(1:grp$dims), value = grp$read() + 1)
185
- }
186
- f$close_all()
187
- # end
188
-
189
- sobj <- LoadH5Seurat(h5seurat_file)
190
- if (merge_same_labels) {
191
- log_info("Merging clusters with the same labels ...")
192
- sobj <- merge_clusters_with_same_labels(sobj, newcol)
193
- }
134
+ log$info("Reading H5AD from celltypist ...")
135
+ sobj <- ConvertAnnDataToSeurat(
136
+ infile = celltypist_outfile,
137
+ outfile = NULL,
138
+ assay = celltypist_args$assay %||% "RNA",
139
+ log = log
140
+ )
194
141
  } else {
195
- log_info("Attaching celltypist results to Seurat object ...")
142
+ log$info("Attaching celltypist results to Seurat object ...")
196
143
 
197
144
  celltypist_out <- read.table(
198
145
  celltypist_outfile, sep = "\t", header = TRUE, row.names = 1)
@@ -205,48 +152,50 @@ if (outtype == "h5ad") {
205
152
  drop = FALSE
206
153
  ]
207
154
  )
155
+ }
208
156
 
209
- if (celltypist_args$majority_voting) {
210
- prediction <- "majority_voting"
157
+ if (celltypist_args$majority_voting) {
158
+ prediction <- "majority_voting"
211
159
 
212
- if (!is.null(newcol)) {
213
- sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
160
+ if (!is.null(newcol)) {
161
+ sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
162
+ } else {
163
+ over_clustering <- celltypist_args$over_clustering
164
+ if (over_clustering %in% colnames(sobj@meta.data)) {
165
+ sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
214
166
  } else {
215
- over_clustering <- celltypist_args$over_clustering
216
- if (over_clustering %in% colnames(sobj@meta.data)) {
217
- sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
218
- } else {
219
- over_clustering <- "over_clustering"
220
- }
221
-
222
- # make a map of original cluster id to new cluster id
223
- cluster_map <- data.frame(
224
- seurat_clusters_id = sobj@meta.data[[over_clustering]],
225
- seurat_clusters = sobj@meta.data[[prediction]]
226
- ) %>%
227
- group_by(seurat_clusters_id) %>%
228
- summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
229
- mutate(seurat_clusters = make.unique(seurat_clusters))
230
- cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
231
- if (over_clustering != "seurat_clusters") {
232
- sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
233
- }
234
- Idents(sobj) <- "seurat_clusters"
235
- cluster_map$object <- sobj
236
- log_info("Renaming clusters ...")
237
- sobj <- do_call(RenameIdents, cluster_map)
238
- sobj@meta.data$seurat_clusters <- Idents(sobj)
167
+ over_clustering <- "over_clustering"
239
168
  }
240
- } else if (!is.null(newcol)) {
241
- sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
242
- }
243
- if (merge_same_labels) {
244
- log_info("Merging clusters with the same labels ...")
245
- sobj <- merge_clusters_with_same_labels(sobj, newcol)
169
+
170
+ # make a map of original cluster id to new cluster id
171
+ cluster_map <- data.frame(
172
+ seurat_clusters_id = sobj@meta.data[[over_clustering]],
173
+ seurat_clusters = sobj@meta.data[[prediction]]
174
+ ) %>%
175
+ group_by(seurat_clusters_id) %>%
176
+ summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
177
+ mutate(seurat_clusters = make.unique(seurat_clusters))
178
+ cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
179
+ if (over_clustering != "seurat_clusters") {
180
+ sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
181
+ }
182
+ Idents(sobj) <- "seurat_clusters"
183
+ cluster_map$object <- sobj
184
+ log$info("Renaming clusters ...")
185
+ sobj <- do_call(RenameIdents, cluster_map)
186
+ sobj@meta.data$seurat_clusters <- Idents(sobj)
246
187
  }
188
+ } else if (!is.null(newcol)) {
189
+ sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
247
190
  }
248
- log_info("Saving Seurat object in RDS ...")
249
- saveRDS(sobj, outfile)
191
+
192
+ if (merge_same_labels) {
193
+ log$info("Merging clusters with the same labels ...")
194
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
195
+ }
196
+
197
+ log$info("Saving the object ...")
198
+ save_obj(sobj, outfile)
250
199
  } else {
251
200
  stop(paste0("Unknown output type: ", outtype))
252
201
  }
@@ -6,17 +6,19 @@ celltypes <- {{envs.cell_types | r}}
6
6
  newcol <- {{envs.newcol | r}}
7
7
  merge_same_labels <- {{envs.merge | r}}
8
8
 
9
+ log <- biopipen.utils::get_logger()
10
+
9
11
  if (is.null(celltypes) || length(celltypes) == 0) {
10
- log_warn("No cell types are given!")
12
+ log$warn("No cell types are given!")
11
13
 
12
14
  if (merge_same_labels) {
13
- log_warn("Ignoring 'envs.merge' because no cell types are given!")
15
+ log$warn("Ignoring 'envs.merge' because no cell types are given!")
14
16
  }
15
17
  # create a symbolic link to the input file
16
18
  file.symlink(sobjfile, outfile)
17
19
  } else {
18
- log_info("Loading Seurat object ...")
19
- sobj <- readRDS(sobjfile)
20
+ log$info("Loading Seurat object ...")
21
+ sobj <- biopipen.utils::read_obj(sobjfile)
20
22
  idents <- Idents(sobj)
21
23
  if (is.factor(idents)) {
22
24
  idents <- levels(idents)
@@ -28,7 +30,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
28
30
  celltypes <- c(celltypes, idents[(length(celltypes) + 1):length(idents)])
29
31
  } else if (length(celltypes) > length(idents)) {
30
32
  celltypes <- celltypes[1:length(idents)]
31
- log_warn("The length of cell types is longer than the number of clusters!")
33
+ log$warn("The length of cell types is longer than the number of clusters!")
32
34
  }
33
35
  for (i in seq_along(celltypes)) {
34
36
  if (celltypes[i] == "-" || celltypes[i] == "") {
@@ -37,7 +39,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
37
39
  }
38
40
  names(celltypes) <- idents
39
41
 
40
- log_info("Renaming cell types ...")
42
+ log$info("Renaming cell types ...")
41
43
  if (is.null(newcol)) {
42
44
  has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
43
45
  sobj$seurat_clusters_id <- Idents(sobj)
@@ -45,7 +47,7 @@ if (is.null(celltypes) || length(celltypes) == 0) {
45
47
  sobj <- do_call(RenameIdents, celltypes)
46
48
  sobj$seurat_clusters <- Idents(sobj)
47
49
  if (has_na) {
48
- log_info("Filtering clusters if NA ...")
50
+ log$info("Filtering clusters if NA ...")
49
51
  sobj <- subset(
50
52
  sobj,
51
53
  subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
@@ -59,9 +61,9 @@ if (is.null(celltypes) || length(celltypes) == 0) {
59
61
  }
60
62
 
61
63
  if (merge_same_labels) {
62
- log_info("Merging clusters with the same labels ...")
64
+ log$info("Merging clusters with the same labels ...")
63
65
  sobj <- merge_clusters_with_same_labels(sobj, newcol)
64
66
  }
65
67
 
66
- saveRDS(sobj, outfile)
68
+ biopipen.utils::save_obj(sobj, outfile)
67
69
  }
@@ -1,6 +1,7 @@
1
1
  library(Seurat)
2
2
  library(dplyr)
3
3
  library(hitype)
4
+ library(biopipen.utils)
4
5
 
5
6
  sobjfile = {{in.sobjfile | r}}
6
7
  outfile = {{out.outfile | r}}
@@ -11,11 +12,13 @@ merge_same_labels = {{envs.merge | r}}
11
12
 
12
13
  if (is.null(db)) { stop("`envs.hitype_db` is not set") }
13
14
 
14
- log_info("Reading Seurat object...")
15
- sobj = readRDS(sobjfile)
15
+ log <- get_logger()
16
+
17
+ log$info("Reading Seurat object...")
18
+ sobj = biopipen.utils::read_obj(sobjfile)
16
19
 
17
20
  # prepare gene sets
18
- log_info("Preparing gene sets...")
21
+ log$info("Preparing gene sets...")
19
22
  if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
20
23
  gs_list = gs_prepare(eval(as.symbol(db)), tissue)
21
24
  } else {
@@ -23,10 +26,10 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
23
26
  }
24
27
 
25
28
  # run RunHitype
26
- log_info("Running RunHitype...")
29
+ log$info("Running RunHitype...")
27
30
  sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
28
31
 
29
- log_info("Renaming cell types...")
32
+ log$info("Renaming cell types...")
30
33
  hitype_levels = sobj@meta.data %>%
31
34
  select(seurat_clusters, hitype) %>%
32
35
  distinct(seurat_clusters, .keep_all = TRUE) %>%
@@ -42,14 +45,14 @@ if (is.null(newcol)) {
42
45
  }
43
46
 
44
47
  if (merge_same_labels) {
45
- log_info("Merging clusters with the same labels...")
48
+ log$info("Merging clusters with the same labels...")
46
49
  sobj = merge_clusters_with_same_labels(sobj, newcol)
47
50
  }
48
51
 
49
- log_info("Saving Seurat object...")
50
- saveRDS(sobj, outfile)
52
+ log$info("Saving Seurat object...")
53
+ biopipen.utils::save_obj(sobj, outfile)
51
54
 
52
- log_info("Saving the mappings ...")
55
+ log$info("Saving the mappings ...")
53
56
  if (is.null(newcol)) {
54
57
  celltypes = sobj@meta.data %>%
55
58
  group_by(seurat_clusters_id) %>%
@@ -1,5 +1,6 @@
1
1
  library(scCATCH)
2
2
  library(Seurat)
3
+ library(biopipen.utils)
3
4
 
4
5
  sobjfile = {{in.sobjfile | r}}
5
6
  outfile = {{out.outfile | r}}
@@ -7,8 +8,10 @@ sccatch_args = {{envs.sccatch_args | r}}
7
8
  newcol = {{envs.newcol | r}}
8
9
  merge_same_labels = {{envs.merge | r}}
9
10
 
11
+ log <- get_logger()
12
+
10
13
  if (!is.null(sccatch_args$marker)) {
11
- cellmatch = readRDS(sccatch_args$marker)
14
+ cellmatch = read_obj(sccatch_args$marker)
12
15
  sccatch_args$if_use_custom_marker = TRUE
13
16
  }
14
17
  sccatch_args$marker = cellmatch
@@ -17,20 +20,20 @@ if (is.integer(sccatch_args$use_method)) {
17
20
  sccatch_args$use_method = as.character(sccatch_args$use_method)
18
21
  }
19
22
 
20
- log_info("Reading Seurat object...")
21
- sobj = readRDS(sobjfile)
23
+ log$info("Reading Seurat object...")
24
+ sobj = read_obj(sobjfile)
22
25
 
23
- log_info("Running createscCATCH ...")
26
+ log$info("Running createscCATCH ...")
24
27
  obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
25
28
  sccatch_args$object = obj
26
29
 
27
- log_info("Running findmarkergene ...")
30
+ log$info("Running findmarkergene ...")
28
31
  obj = do_call(findmarkergene, sccatch_args)
29
32
 
30
- log_info("Running findcelltype ...")
33
+ log$info("Running findcelltype ...")
31
34
  obj = findcelltype(object = obj)
32
35
 
33
- log_info("Saving the mappings ...")
36
+ log$info("Saving the mappings ...")
34
37
  write.table(
35
38
  obj@celltype,
36
39
  file = file.path(dirname(outfile), "cluster2celltype.tsv"),
@@ -42,7 +45,7 @@ celltypes = as.list(obj@celltype$cell_type)
42
45
  names(celltypes) = obj@celltype$cluster
43
46
 
44
47
  if (length(celltypes) == 0) {
45
- log_warn("- No cell types annotated from the database!")
48
+ log$warn("- No cell types annotated from the database!")
46
49
  } else {
47
50
  if (is.null(newcol)) {
48
51
  sobj$seurat_clusters_id = Idents(sobj)
@@ -57,10 +60,10 @@ if (length(celltypes) == 0) {
57
60
  }
58
61
 
59
62
  if (merge_same_labels) {
60
- log_info("Merging clusters with the same labels ...")
63
+ log$info("Merging clusters with the same labels ...")
61
64
  sobj = merge_clusters_with_same_labels(sobj, newcol)
62
65
  }
63
66
  }
64
67
 
65
- log_info("Saving Seurat object ...")
66
- saveRDS(sobj, outfile)
68
+ log$info("Saving Seurat object ...")
69
+ save_obj(sobj, outfile)
@@ -2,8 +2,9 @@ library(dplyr)
2
2
  library(HGNChelper)
3
3
  library(Seurat)
4
4
  library(rlang)
5
+ library(biopipen.utils)
5
6
 
6
- {{ biopipen_dir | joinpaths: "scripts", "scrna", "sctype.R" | source_r }}
7
+ {% include biopipen_dir + "/scripts/scrna/sctype.R" %}
7
8
 
8
9
  sobjfile = {{in.sobjfile | r}}
9
10
  outfile = {{out.outfile | r}}
@@ -14,24 +15,26 @@ merge_same_labels = {{envs.merge | r}}
14
15
 
15
16
  if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
16
17
 
17
- log_info("Reading Seurat object...")
18
- sobj = readRDS(sobjfile)
18
+ log <- get_logger()
19
+
20
+ log$info("Reading Seurat object...")
21
+ sobj = biopipen.utils::read_obj(sobjfile)
19
22
 
20
23
  # prepare gene sets
21
- log_info("Preparing gene sets...")
24
+ log$info("Preparing gene sets...")
22
25
  gs_list = gene_sets_prepare(db, tissue)
23
26
 
24
27
  scRNAseqData = GetAssayData(sobj, layer = "scale.data")
25
28
  idents = as.character(unique(Idents(sobj)))
26
29
  idents = idents[order(as.numeric(idents))]
27
30
 
28
- log_info("Working on different levels of cell type labels ...")
31
+ log$info("Working on different levels of cell type labels ...")
29
32
  cell_types_list = list()
30
33
  for (i in seq_along(gs_list)) {
31
- log_info("- Working on level {i} ...")
34
+ log$info("- Working on level {i} ...")
32
35
  if (is.null(gs_list[[i]])) next
33
36
 
34
- log_info(" Calculating cell-type scores ...")
37
+ log$info(" Calculating cell-type scores ...")
35
38
  es.max = sctype_score(
36
39
  scRNAseqData = scRNAseqData,
37
40
  scaled = TRUE,
@@ -39,7 +42,7 @@ for (i in seq_along(gs_list)) {
39
42
  gs2 = gs_list[[i]]$gs_negative
40
43
  )
41
44
 
42
- log_info(" Merging cell-type scores by cluster ...")
45
+ log$info(" Merging cell-type scores by cluster ...")
43
46
  cl_resutls = do_call(
44
47
  "rbind",
45
48
  lapply(
@@ -62,12 +65,12 @@ for (i in seq_along(gs_list)) {
62
65
  write("\n####### sctype_scores_count ########", stderr())
63
66
  write(capture.output(sctype_scores_count), stderr())
64
67
  write("\n####################################", stderr())
65
- log_info(" Scores tied in the above clusters.", immediate. = TRUE)
68
+ log$info(" Scores tied in the above clusters.", immediate. = TRUE)
66
69
  }
67
70
 
68
71
  if (length(gs_list) == 1 || i > 1) {
69
72
  # set low-confident (low ScType score) clusters to "unknown"
70
- log_info(" Setting low-confident clusters to 'Unknown'...")
73
+ log$info(" Setting low-confident clusters to 'Unknown'...")
71
74
  sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
72
75
  }
73
76
 
@@ -85,7 +88,7 @@ for (i in seq_along(gs_list)) {
85
88
  if (length(cell_types_list) == 1) {
86
89
  celltypes = cell_types_list[[1]]
87
90
  } else {
88
- log_info("Merging cell types at all levels ...")
91
+ log$info("Merging cell types at all levels ...")
89
92
  celltypes = list()
90
93
 
91
94
  for (i in idents) {
@@ -100,7 +103,7 @@ if (length(cell_types_list) == 1) {
100
103
  }
101
104
 
102
105
 
103
- log_info("Renaming cell types...")
106
+ log$info("Renaming cell types...")
104
107
  ct_numbering = list()
105
108
  for (key in names(celltypes)) {
106
109
  ct = celltypes[[key]]
@@ -127,14 +130,14 @@ celltypes$object = NULL
127
130
  gc()
128
131
 
129
132
  if (merge_same_labels) {
130
- log_info("Merging clusters with the same labels...")
133
+ log$info("Merging clusters with the same labels...")
131
134
  sobj <- merge_clusters_with_same_labels(sobj, newcol)
132
135
  celltypes <- lapply(celltypes, function(ct) {
133
136
  sub("\\.\\d+$", "", ct)
134
137
  })
135
138
  }
136
139
 
137
- log_info("Saving the mappings ...")
140
+ log$info("Saving the mappings ...")
138
141
  write.table(
139
142
  data.frame(
140
143
  Cluster = names(celltypes),
@@ -147,5 +150,5 @@ write.table(
147
150
  row.names = FALSE
148
151
  )
149
152
 
150
- log_info("Saving Seurat object...")
151
- saveRDS(sobj, outfile)
153
+ log$info("Saving Seurat object...")
154
+ biopipen.utils::save_obj(sobj, outfile)
@@ -1,7 +1,15 @@
1
1
  set.seed(8525)
2
2
 
3
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
4
- {{ biopipen_dir | joinpaths: "scripts", "scrna", "CellTypeAnnotation-common.R" | source_r }}
3
+ merge_clusters_with_same_labels <- function(sobj, newcol) {
4
+ if (is.null(newcol)) {
5
+ sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
6
+ Idents(sobj) <- "seurat_clusters"
7
+ } else {
8
+ sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
9
+ }
10
+
11
+ sobj
12
+ }
5
13
 
6
14
  {% if envs.tool == "hitype" %}
7
15
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
@@ -37,7 +37,7 @@ cases <- {{envs.cases | r}} # nolint
37
37
  overlap <- overlap %||% c()
38
38
  overlaps <- list()
39
39
  log_info("- Loading seurat object ...")
40
- srtobj <- readRDS(srtfile)
40
+ srtobj <- biopipen.utils::read_obj(srtfile)
41
41
 
42
42
  if (!is.null(mutaters) && length(mutaters) > 0) {
43
43
  log_info("- Mutating seurat object ...")