biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,48 +1,85 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
1
  library(Seurat)
2
+ library(rlang)
3
+ library(dplyr)
4
+ library(tidyseurat)
3
5
 
4
- sobjfile = {{in.sobjfile | r}}
5
- outfile = {{out.outfile | r}}
6
- celltypes = {{envs.cell_types | r}}
7
- newcol = {{envs.newcol | r}}
6
+ sobjfile <- {{in.sobjfile | r}}
7
+ outfile <- {{out.outfile | r}}
8
+ celltypes <- {{envs.cell_types | r}}
9
+ newcol <- {{envs.newcol | r}}
10
+ ident <- {{envs.ident | r }}
11
+ merge_same_labels <- {{envs.merge | r}}
12
+ more_cell_types <- {{envs.more_cell_types | r}}
13
+
14
+ log <- biopipen.utils::get_logger()
8
15
 
9
16
  if (is.null(celltypes) || length(celltypes) == 0) {
10
- warning("No cell types are given!")
17
+ log$warn("No cell types are given!")
18
+ if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
19
+ log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
20
+ }
11
21
 
22
+ if (merge_same_labels) {
23
+ log$warn("Ignoring 'envs.merge' because no cell types are given!")
24
+ }
12
25
  # create a symbolic link to the input file
13
26
  file.symlink(sobjfile, outfile)
14
27
  } else {
15
- sobj = readRDS(sobjfile)
16
- idents = as.character(unique(Idents(sobj)))
17
- idents = idents[order(as.numeric(idents))]
18
-
19
- if (length(celltypes) < length(idents)) {
20
- celltypes = c(celltypes, idents[(length(celltypes) + 1):length(idents)])
21
- } else if (length(celltypes) > length(idents)) {
22
- celltypes = celltypes[1:length(idents)]
23
- warning(
24
- "The length of cell types is longer than the number of clusters!",
25
- immediate. = TRUE
26
- )
28
+ log$info("Loading Seurat object ...")
29
+ sobj <- biopipen.utils::read_obj(sobjfile)
30
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
31
+ Idents(sobj) <- ident
32
+ idents <- Idents(sobj)
33
+ if (is.factor(idents)) {
34
+ idents <- levels(idents)
35
+ } else {
36
+ idents <- as.character(unique(idents))
27
37
  }
28
- for (i in seq_along(celltypes)) {
29
- if (celltypes[i] == "-" || celltypes[i] == "") {
30
- celltypes[i] = idents[i]
38
+ process_celltypes <- function(ct, key = NULL) {
39
+ if (length(ct) < length(idents)) {
40
+ ct <- c(ct, idents[(length(ct) + 1):length(idents)])
41
+ } else if (length(ct) > length(idents)) {
42
+ ct <- ct[1:length(idents)]
43
+ if (is.null(key)) {
44
+ log$warn("The length of cell types is longer than the number of clusters!")
45
+ } else {
46
+ log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
47
+ }
48
+ }
49
+ for (i in seq_along(ct)) {
50
+ if (ct[i] == "-" || ct[i] == "") {
51
+ ct[i] <- idents[i]
52
+ }
31
53
  }
54
+ names(ct) <- idents
55
+ return(ct)
32
56
  }
33
- names(celltypes) = idents
34
57
 
58
+ if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
59
+ for (key in names(more_cell_types)) {
60
+ ct <- more_cell_types[[key]]
61
+ ct <- process_celltypes(ct, key)
62
+ log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
63
+ sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
64
+ }
65
+ }
66
+
67
+ celltypes <- process_celltypes(celltypes)
68
+
69
+ log$info("Renaming cell types ...")
35
70
  if (is.null(newcol)) {
36
- sobj$seurat_clusters_id = Idents(sobj)
37
- celltypes$object = sobj
38
- sobj = do_call(RenameIdents, celltypes)
39
- sobj$seurat_clusters = Idents(sobj)
71
+ sobj <- rename_idents(sobj, ident, celltypes)
72
+ log$info("Filtering clusters if NA ...")
73
+ sobj <- filter(sobj, !!sym(ident) != "NA" & !is.na(!!sym(ident)))
40
74
  } else {
41
- celltypes$object = sobj
42
- sobj = do_call(RenameIdents, celltypes)
43
- sobj[[newcol]] = Idents(sobj)
44
- Idents(sobj) = "seurat_clusters"
75
+ sobj[[newcol]] <- celltypes[as.character(Idents(sobj))]
76
+ }
77
+
78
+ if (merge_same_labels) {
79
+ log$info("Merging clusters with the same labels ...")
80
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
45
81
  }
46
82
 
47
- saveRDS(sobj, outfile)
83
+ log$info("Saving Seurat object ...")
84
+ biopipen.utils::save_obj(sobj, outfile)
48
85
  }
@@ -1,22 +1,26 @@
1
- library(Seurat)
1
+ library(rlang)
2
2
  library(dplyr)
3
3
  library(hitype)
4
4
 
5
- source("{{biopipen_dir}}/utils/misc.R")
6
-
7
5
  sobjfile = {{in.sobjfile | r}}
8
6
  outfile = {{out.outfile | r}}
9
7
  tissue = {{envs.hitype_tissue | r}}
10
8
  db = {{envs.hitype_db | r}}
11
9
  newcol = {{envs.newcol | r}}
10
+ ident = {{envs.ident | r }}
11
+ merge_same_labels = {{envs.merge | r}}
12
12
 
13
13
  if (is.null(db)) { stop("`envs.hitype_db` is not set") }
14
14
 
15
- print("- Reading Seurat object...")
16
- sobj = readRDS(sobjfile)
15
+ log <- get_logger()
16
+
17
+ log$info("Reading Seurat object...")
18
+ sobj = biopipen.utils::read_obj(sobjfile)
19
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
20
+ Idents(sobj) <- ident
17
21
 
18
22
  # prepare gene sets
19
- print("- Preparing gene sets...")
23
+ log$info("Preparing gene sets...")
20
24
  if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
21
25
  gs_list = gs_prepare(eval(as.symbol(db)), tissue)
22
26
  } else {
@@ -24,33 +28,34 @@ if (startsWith(db, "hitypedb_") && !grepl(".", db, fixed = TRUE)) {
24
28
  }
25
29
 
26
30
  # run RunHitype
27
- print("- Running RunHitype...")
31
+ log$info("Running RunHitype...")
28
32
  sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
29
33
 
30
- print("- Renaming cell types...")
31
- hitype_levels = sobj@meta.data %>%
32
- select(seurat_clusters, hitype) %>%
33
- distinct(seurat_clusters, .keep_all = TRUE) %>%
34
- arrange(as.numeric(seurat_clusters)) %>%
35
- pull("hitype")
34
+ log$info("Renaming cell types...")
35
+ hitype_labels <- sobj@meta.data %>%
36
+ distinct(!!sym(ident), hitype)
37
+ hitype_labels <- split(hitype_labels$hitype, hitype_labels[[ident]])
36
38
 
37
39
  if (is.null(newcol)) {
38
- sobj$seurat_clusters_id = sobj$seurat_clusters
39
- sobj$seurat_clusters = factor(sobj$hitype, levels = hitype_levels)
40
- Idents(sobj) = "seurat_clusters"
40
+ sobj <- rename_idents(sobj, ident, hitype_labels)
41
41
  } else {
42
- sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
42
+ sobj[[newcol]] = sobj$hitype
43
+ }
44
+
45
+ if (merge_same_labels) {
46
+ log$info("Merging clusters with the same labels...")
47
+ sobj = merge_clusters_with_same_labels(sobj, newcol)
43
48
  }
44
49
 
45
- print("- Saving Seurat object...")
46
- saveRDS(sobj, outfile)
50
+ log$info("Saving Seurat object...")
51
+ biopipen.utils::save_obj(sobj, outfile)
47
52
 
48
- print("- Saving the mappings ...")
53
+ log$info("Saving the mappings ...")
49
54
  if (is.null(newcol)) {
50
55
  celltypes = sobj@meta.data %>%
51
- group_by(seurat_clusters_id) %>%
56
+ group_by(!!sym(backup_col)) %>%
52
57
  summarize(CellType = hitype[1]) %>%
53
- select(Cluster = seurat_clusters_id, CellType) %>%
58
+ select(Cluster = !!sym(backup_col), CellType) %>%
54
59
  ungroup()
55
60
  } else {
56
61
  celltypes = sobj@meta.data %>%
@@ -1,35 +1,42 @@
1
- source("{{biopipen_dir}}/utils/misc.R")
2
1
  library(scCATCH)
3
2
  library(Seurat)
3
+ library(biopipen.utils)
4
4
 
5
5
  sobjfile = {{in.sobjfile | r}}
6
6
  outfile = {{out.outfile | r}}
7
7
  sccatch_args = {{envs.sccatch_args | r}}
8
8
  newcol = {{envs.newcol | r}}
9
+ ident = {{envs.ident | r }}
10
+ merge_same_labels = {{envs.merge | r}}
11
+
12
+ log <- get_logger()
9
13
 
10
- if (is.null(sccatch_args$tissue)) { stop("`envs.sccatch_args.tissue` origin of cells must be defined.") }
11
- if (is.null(sccatch_args$species)) {
12
- sccatch_args$species = "Human"
13
- }
14
14
  if (!is.null(sccatch_args$marker)) {
15
- cellmatch = readRDS(sccatch_args$marker)
15
+ cellmatch = read_obj(sccatch_args$marker)
16
16
  sccatch_args$if_use_custom_marker = TRUE
17
17
  }
18
18
  sccatch_args$marker = cellmatch
19
19
 
20
- if (is.null(sccatch_args$cancer)) {
21
- sccatch_args$cancer = "Normal"
22
- }
23
20
  if (is.integer(sccatch_args$use_method)) {
24
21
  sccatch_args$use_method = as.character(sccatch_args$use_method)
25
22
  }
26
23
 
27
- sobj = readRDS(sobjfile)
24
+ log$info("Reading Seurat object...")
25
+ sobj = read_obj(sobjfile)
26
+ ident <- ident %||% GetIdentityColumn(sobj)
27
+ Idents(sobj) <- ident
28
28
 
29
+ log$info("Running createscCATCH ...")
29
30
  obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
30
31
  sccatch_args$object = obj
31
32
 
33
+ log$info("Running findmarkergene ...")
32
34
  obj = do_call(findmarkergene, sccatch_args)
35
+
36
+ log$info("Running findcelltype ...")
37
+ obj = findcelltype(object = obj)
38
+
39
+ log$info("Saving the mappings ...")
33
40
  write.table(
34
41
  obj@celltype,
35
42
  file = file.path(dirname(outfile), "cluster2celltype.tsv"),
@@ -41,18 +48,19 @@ celltypes = as.list(obj@celltype$cell_type)
41
48
  names(celltypes) = obj@celltype$cluster
42
49
 
43
50
  if (length(celltypes) == 0) {
44
- warning("No cell types annotated from the database!")
51
+ log$warn("- No cell types annotated from the database!")
45
52
  } else {
46
53
  if (is.null(newcol)) {
47
- sobj$seurat_clusters_id = Idents(sobj)
48
- celltypes$object = sobj
49
- sobj = do_call(RenameIdents, celltypes)
50
- sobj$seurat_clusters = Idents(sobj)
54
+ sobj <- rename_idents(sobj, ident, celltypes)
51
55
  } else {
52
- celltypes$object = sobj
53
- sobj = do_call(RenameIdents, celltypes)
54
- sobj[[newcol]] = Idents(sobj)
55
- Idents(sobj) = "seurat_clusters"
56
+ sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
57
+ }
58
+
59
+ if (merge_same_labels) {
60
+ log$info("Merging clusters with the same labels ...")
61
+ sobj = merge_clusters_with_same_labels(sobj, newcol)
56
62
  }
57
63
  }
58
- saveRDS(sobj, outfile)
64
+
65
+ log$info("Saving Seurat object ...")
66
+ save_obj(sobj, outfile)
@@ -1,34 +1,43 @@
1
1
  library(dplyr)
2
2
  library(HGNChelper)
3
3
  library(Seurat)
4
+ library(rlang)
5
+ library(biopipen.utils)
4
6
 
5
- source("{{biopipen_dir}}/utils/misc.R")
6
- source("{{biopipen_dir}}/scripts/scrna/sctype.R")
7
+ {% include biopipen_dir + "/scripts/scrna/sctype.R" %}
7
8
 
8
9
  sobjfile = {{in.sobjfile | r}}
9
10
  outfile = {{out.outfile | r}}
10
11
  tissue = {{envs.sctype_tissue | r}}
11
12
  db = {{envs.sctype_db | r}}
12
13
  newcol = {{envs.newcol | r}}
14
+ ident = {{envs.ident | r }}
15
+ merge_same_labels = {{envs.merge | r}}
13
16
 
14
17
  if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
15
18
 
16
- print("- Reading Seurat object...")
17
- sobj = readRDS(sobjfile)
19
+ log <- get_logger()
20
+
21
+ log$info("Reading Seurat object...")
22
+ sobj = biopipen.utils::read_obj(sobjfile)
23
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
24
+ Idents(sobj) <- ident
18
25
 
19
26
  # prepare gene sets
20
- print("- Preparing gene sets...")
27
+ log$info("Preparing gene sets...")
21
28
  gs_list = gene_sets_prepare(db, tissue)
22
29
 
23
- scRNAseqData = GetAssayData(sobj, slot = "scale.data")
30
+ scRNAseqData = GetAssayData(sobj, layer = "scale.data")
24
31
  idents = as.character(unique(Idents(sobj)))
25
32
  idents = idents[order(as.numeric(idents))]
26
33
 
34
+ log$info("Working on different levels of cell type labels ...")
27
35
  cell_types_list = list()
28
36
  for (i in seq_along(gs_list)) {
37
+ log$info("- Working on level {i} ...")
29
38
  if (is.null(gs_list[[i]])) next
30
39
 
31
- print(paste0("- Calculating cell-type scores for level ", i, "..."))
40
+ log$info(" Calculating cell-type scores ...")
32
41
  es.max = sctype_score(
33
42
  scRNAseqData = scRNAseqData,
34
43
  scaled = TRUE,
@@ -36,7 +45,7 @@ for (i in seq_along(gs_list)) {
36
45
  gs2 = gs_list[[i]]$gs_negative
37
46
  )
38
47
 
39
- print(paste0("- Merging cell-type scores by cluster for level ", i, "..."))
48
+ log$info(" Merging cell-type scores by cluster ...")
40
49
  cl_resutls = do_call(
41
50
  "rbind",
42
51
  lapply(
@@ -59,12 +68,12 @@ for (i in seq_along(gs_list)) {
59
68
  write("\n####### sctype_scores_count ########", stderr())
60
69
  write(capture.output(sctype_scores_count), stderr())
61
70
  write("\n####################################", stderr())
62
- warning("Scores tied in the above clusters.", immediate. = TRUE)
71
+ log$info(" Scores tied in the above clusters.", immediate. = TRUE)
63
72
  }
64
73
 
65
74
  if (length(gs_list) == 1 || i > 1) {
66
75
  # set low-confident (low ScType score) clusters to "unknown"
67
- print("- Setting low-confident clusters to 'Unknown'...")
76
+ log$info(" Setting low-confident clusters to 'Unknown'...")
68
77
  sctype_scores$type[as.numeric(as.character(sctype_scores$scores)) < sctype_scores$ncells/4] = "Unknown"
69
78
  }
70
79
 
@@ -82,7 +91,7 @@ for (i in seq_along(gs_list)) {
82
91
  if (length(cell_types_list) == 1) {
83
92
  celltypes = cell_types_list[[1]]
84
93
  } else {
85
- print("- Merging cell types at all levels ...")
94
+ log$info("Merging cell types at all levels ...")
86
95
  celltypes = list()
87
96
 
88
97
  for (i in idents) {
@@ -97,24 +106,35 @@ if (length(cell_types_list) == 1) {
97
106
  }
98
107
 
99
108
 
100
- print("- Renaming cell types...")
109
+ log$info("Renaming cell types...")
110
+ ct_numbering = list()
111
+ for (key in names(celltypes)) {
112
+ ct = celltypes[[key]]
113
+ ct_numbering[[ct]] = ct_numbering[[ct]] %||% 0
114
+ if (ct_numbering[[ct]] > 0) {
115
+ celltypes[[key]] = paste0(ct, ".", ct_numbering[[ct]])
116
+ }
117
+ ct_numbering[[ct]] = ct_numbering[[ct]] + 1
118
+ }
119
+
120
+ celltypes = as.list(celltypes)
101
121
  if (is.null(newcol)) {
102
- sobj$seurat_clusters_id = sobj$seurat_clusters
103
- celltypes$object = sobj
104
- sobj = do_call(RenameIdents, celltypes)
105
- sobj$seurat_clusters = Idents(sobj)
122
+ sobj <- rename_idents(sobj, ident, celltypes)
106
123
  } else {
107
- celltypes$object = sobj
108
- sobj = do_call(RenameIdents, celltypes)
109
- sobj[[newcol]] = Idents(sobj)
110
- Idents(sobj) = "seurat_clusters"
124
+ sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
111
125
  }
112
-
113
- print("- Saving Seurat object...")
114
- saveRDS(sobj, outfile)
115
-
116
- print("- Saving the mappings ...")
117
126
  celltypes$object = NULL
127
+ gc()
128
+
129
+ if (merge_same_labels) {
130
+ log$info("Merging clusters with the same labels...")
131
+ sobj <- merge_clusters_with_same_labels(sobj, newcol)
132
+ celltypes <- lapply(celltypes, function(ct) {
133
+ sub("\\.\\d+$", "", ct)
134
+ })
135
+ }
136
+
137
+ log$info("Saving the mappings ...")
118
138
  write.table(
119
139
  data.frame(
120
140
  Cluster = names(celltypes),
@@ -126,3 +146,6 @@ write.table(
126
146
  quote = FALSE,
127
147
  row.names = FALSE
128
148
  )
149
+
150
+ log$info("Saving Seurat object...")
151
+ biopipen.utils::save_obj(sobj, outfile)
@@ -1,13 +1,49 @@
1
+ library(Seurat)
2
+ library(biopipen.utils)
1
3
  set.seed(8525)
2
4
 
5
+ backup_col <- {{envs.backup_col | r}}
6
+
7
+
8
+ merge_clusters_with_same_labels <- function(sobj, newcol = NULL) {
9
+ if (is.null(newcol)) {
10
+ newcol <- biopipen.utils::GetIdentityColumn(sobj)
11
+ sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
12
+ Idents(sobj) <- newcol
13
+ } else {
14
+ sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
15
+ }
16
+
17
+ sobj
18
+ }
19
+
20
+ rename_idents <- function(sobj, ident_col, mapping) {
21
+ orig_ident_col <- biopipen.utils::GetIdentityColumn(sobj)
22
+ if (!identical(ident_col, orig_ident_col)) {
23
+ Idents(sobj) <- ident_col
24
+ mapping$object <- sobj
25
+ sobj <- do_call(RenameIdents, mapping)
26
+ } else {
27
+ if (!is.null(backup_col)) {
28
+ sobj@meta.data[[backup_col]] <- Idents(sobj)
29
+ }
30
+ mapping$object <- sobj
31
+ sobj <- do_call(RenameIdents, mapping)
32
+ }
33
+ sobj@meta.data[[ident_col]] <- Idents(sobj)
34
+ sobj
35
+ }
36
+
3
37
  {% if envs.tool == "hitype" %}
4
38
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
5
39
  {% elif envs.tool == "sctype" %}
6
40
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-sctype.R" %}
7
41
  {% elif envs.tool == "sccatch" %}
8
42
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-sccatch.R" %}
43
+ {% elif envs.tool == "celltypist" %}
44
+ {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-celltypist.R" %}
9
45
  {% elif envs.tool == "direct" %}
10
46
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-direct.R" %}
11
47
  {% else %}
12
- stop(paste0("Unknown tool: ", {{envs.tool}}))
48
+ stop("Unknown tool: {{envs.tool}}")
13
49
  {% endif %}