biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,7 +1,7 @@
1
1
  library(Seurat)
2
2
  library(dplyr)
3
3
 
4
- source("{{biopipen_dir}}/utils/misc.R")
4
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
5
5
 
6
6
  srtfile = {{in.srtobj | r}}
7
7
  {% if in.configfile %}
@@ -0,0 +1,109 @@
1
+ library(SeuratWrappers)
2
+ library(Seurat)
3
+ library(purrr)
4
+ library(stringr)
5
+ library(biopipen.utils)
6
+
7
+ infile = {{in.infile | r}}
8
+ outfile = {{out.outfile | r}}
9
+ envs = {{envs.alra_args | r}}
10
+
11
+ log <- get_logger()
12
+
13
+ log$info("Loading Seurat object")
14
+ sobj <- read_obj(infile)
15
+ assay <- DefaultAssay(sobj)
16
+
17
+ # https://github.com/mojaveazure/seurat-disk/issues/102
18
+ # https://github.com/simoncmo/shared_seurat_scripts/blob/main/function_seurat_janitor.R
19
+ # Try to fix the issue with SCTModel
20
+ log$info("Trying to fix SCTModel issue (see mojaveazure/seurat-disk#102)")
21
+ # --------------------------------------------------------------------------
22
+ # Handle missing median_umi
23
+ fix_median_umi = function(SCTModel_obj){
24
+ err_message = ''
25
+ tryCatch({ test <- methods::validObject(SCTModel_obj) },
26
+ error = function(error_message) {
27
+ err_message <<- as.character(error_message)
28
+ })
29
+ missing_medium_umi = stringr::str_detect(err_message, 'median_umi')
30
+
31
+ if(missing_medium_umi){
32
+ message('Missing medium_umi, calculate again from cell.attributes$umi')
33
+ slot(SCTModel_obj, 'median_umi') = median(SCTModel_obj@cell.attributes$umi)
34
+ }
35
+ return(SCTModel_obj)
36
+ }
37
+
38
+ # Cleaning empty objects
39
+ # General purpose
40
+ clean_seurat_obj_list = function(obj_list, attirbute_to_check){
41
+ if(missing(attirbute_to_check)) {stop("Need attributes to check for cleaning")}
42
+ # Object type
43
+ obj_type = class(obj_list[[1]])[[1]]
44
+
45
+ # Count
46
+ obj_size = unlist(purrr::map(obj_list, function(object){
47
+ nrow(slot(object, attirbute_to_check))
48
+ }))
49
+
50
+ # Remove empty
51
+ if(length(obj_size ==0) != 0 ){
52
+ message(str_glue('Removing {length(obj_size ==0)} empty object from the {obj_type} object list'))
53
+ obj_list = obj_list[obj_size!=0]
54
+ message(str_glue('{length(obj_list)} {obj_type} object(s) left'))
55
+ }
56
+ obj_list
57
+ }
58
+
59
+ # for SCTModel.list slot
60
+ clean_seurat_SCTModel_list = function(sct_model_list){
61
+ clean_seurat_obj_list(obj_list = sct_model_list, attirbute_to_check = 'cell.attributes')
62
+ }
63
+
64
+ fix_seurat_SCT = function(obj){
65
+ # Check first
66
+ if(!'SCT' %in% Assays(obj)){
67
+ message('SCT assay not found. Nothing to fix')
68
+ return(obj)
69
+ }
70
+
71
+ # Model list
72
+ sct_model_list = obj$SCT@SCTModel.list
73
+ # 1. clean SCTModel list
74
+ sct_model_list = clean_seurat_SCTModel_list(sct_model_list)
75
+
76
+ # 2. fix missing median_umi
77
+ sct_model_list = map(sct_model_list, function(sct_model){
78
+ fix_median_umi(sct_model)
79
+ })
80
+
81
+ # Add back and retrun
82
+ obj$SCT@SCTModel.list = sct_model_list
83
+
84
+ return(obj)
85
+ }
86
+ # --------------------------------------------------------------------------
87
+ sobj = fix_seurat_SCT(sobj)
88
+
89
+ log$info("Imputing expression values, using ALRA")
90
+ envs$object <- sobj
91
+ sobj = do_call(RunALRA, envs)
92
+ envs$object <- NULL
93
+ gc()
94
+
95
+ log$info("Renaming assays")
96
+ sobj = RenameAssays(sobj, assay.name = assay, new.assay.name = "RAW")
97
+ sobj = RenameAssays(sobj, assay.name = "alra", new.assay.name = assay)
98
+ DefaultAssay(sobj) <- assay
99
+
100
+ sobj@misc$impute_method = "alra"
101
+
102
+ log$info("Saving Seurat object")
103
+ save_obj(sobj, outfile)
104
+
105
+ # choosek_plot_file = file.path(dirname(outfile), "choosek.png")
106
+ # png(choosek_plot_file, width = 1200, height = 1000, res = 100)
107
+ # p = ALRAChooseKPlot(sobj)
108
+ # print(p)
109
+ # dev.off()
@@ -0,0 +1,256 @@
1
+ tryCatch(
2
+ {
3
+ # in order to load Rmagic
4
+ workdir <- {{ job.outdir | r }}
5
+ conda_prefix <- Sys.getenv("CONDA_PREFIX")
6
+ setwd(workdir)
7
+ if (!dir.exists("miniconda3")) {
8
+ file.symlink(conda_prefix, "miniconda3")
9
+ }
10
+ },
11
+ error = function(e) {}
12
+ )
13
+
14
+ python <- {{ envs.rmagic_args.python | r }}
15
+ Sys.setenv(RETICULATE_PYTHON = ifelse(grepl("/", python, fixed = TRUE), python, Sys.which(python)))
16
+ # reticulate::use_python(python, require = TRUE)
17
+
18
+ library(Rmagic)
19
+ Rmagic:::load_pymagic()
20
+ pymagic <- tryCatch({
21
+ Rmagic:::pymagic
22
+ }, error = function(e) {
23
+ NULL
24
+ })
25
+ if (is.null(pymagic)) {
26
+ stop("Failed to load pymagic module. Please check your Python environment.\n ",
27
+ "Current python used by reticulate: ", reticulate::py_config()$python)
28
+ }
29
+
30
+ library(Matrix)
31
+ library(Seurat)
32
+ library(biopipen.utils)
33
+
34
+ log <- get_logger()
35
+
36
+ infile <- {{ in.infile | r }}
37
+ outfile <- {{ out.outfile | r }}
38
+ threshold <- {{ envs.rmagic_args.threshold | r }}
39
+
40
+ log$info("Loading Seurat object ...")
41
+ sobj <- read_obj(infile)
42
+
43
+ if (threshold > 0) {
44
+ # only use the genes with expression in number of cells greater than threshold
45
+ log$info("Fetching genes with expression great than threshold ({threshold}) ...")
46
+ # get the expression matrix
47
+ layers <- Layers(sobj)
48
+ layer <- ifelse(!"counts" %in% layers, "data", "counts")
49
+ counts <- GetAssayData(sobj, layer = layer)
50
+ # Percent of cells expressing each gene
51
+ dropout_rates <- Matrix::rowSums(counts == 0) / ncol(counts)
52
+
53
+ # Genes to impute
54
+ genes_to_impute <- names(dropout_rates[dropout_rates > threshold])
55
+
56
+ log$info("- Will impute for {length(genes_to_impute)}/{length(dropout_rates)} genes ...")
57
+ rm(counts)
58
+ rm(dropout_rates)
59
+ gc()
60
+ } else {
61
+ genes_to_impute <- NULL
62
+ }
63
+
64
+ # get the expression matrix
65
+ data_impute <- t(GetAssayData(sobj, layer = "data"))
66
+
67
+ log$info("Running MAGIC ...")
68
+ check.int.or.null <- function(x) {
69
+ if (is.numeric(x = x)) {
70
+ x <- as.integer(x = x)
71
+ } else if (!is.null(x = x) && is.na(x = x)) {
72
+ x <- NULL
73
+ }
74
+ x
75
+ }
76
+
77
+ check.double.or.null <- function(x) {
78
+ if (is.numeric(x = x)) {
79
+ x <- as.integer(x = x)
80
+ } else if (!is.null(x = x) && is.na(x = x)) {
81
+ x <- NULL
82
+ }
83
+ x
84
+ }
85
+
86
+ check.int.or.string <- function(x, str) {
87
+ if (is.numeric(x = x)) {
88
+ x <- as.integer(x = x)
89
+ } else if (is.null(x = x) || is.na(x = x)) {
90
+ x <- str
91
+ }
92
+ x
93
+ }
94
+ # the magic function is defined in the Rmagic package
95
+ # it has a bug at line 138 when genes are given as a character vector
96
+ # See also https://github.com/KrishnaswamyLab/MAGIC/issues/227
97
+ magic_patched <- function(
98
+ data,
99
+ genes = NULL,
100
+ knn = 5,
101
+ knn.max = NULL,
102
+ decay = 1,
103
+ t = 3,
104
+ npca = 100,
105
+ solver = "exact",
106
+ init = NULL,
107
+ t.max = 20,
108
+ knn.dist.method = "euclidean",
109
+ verbose = 1,
110
+ n.jobs = 1,
111
+ seed = NULL,
112
+ # deprecated args
113
+ k = NULL, alpha = NULL,
114
+ ...) {
115
+ # check installation
116
+ # if (!reticulate::py_module_available(module = "magic") ||
117
+ # !exists("pymagic") || is.null(pymagic)) {
118
+ # Rmagic:::load_pymagic()
119
+ # }
120
+ # check for deprecated arguments
121
+ if (!is.null(k)) {
122
+ message("Argument k is deprecated. Using knn instead.")
123
+ knn <- k
124
+ }
125
+ if (!is.null(alpha)) {
126
+ message("Argument alpha is deprecated. Using decay instead.")
127
+ decay <- alpha
128
+ }
129
+ # validate parameters
130
+ knn <- as.integer(x = knn)
131
+ t.max <- as.integer(x = t.max)
132
+ n.jobs <- as.integer(x = n.jobs)
133
+ npca <- check.int.or.null(npca)
134
+ knn.max <- check.int.or.null(knn.max)
135
+ seed <- check.int.or.null(seed)
136
+ verbose <- check.int.or.null(verbose)
137
+ decay <- check.double.or.null(decay)
138
+ t <- check.int.or.string(t, "auto")
139
+ if (!methods::is(object = data, "Matrix")) {
140
+ data <- as.matrix(x = data)
141
+ }
142
+ # if (length(genes) <= 1 && (is.null(x = genes) || is.na(x = genes))) {
143
+ # ^^^^^^^^^^^^^^^^ bug here
144
+ if (length(genes) <= 1 && (is.null(x = genes) || (length(genes) == 1 && is.na(x = genes)))) {
145
+ genes <- NULL
146
+ gene_names <- colnames(x = data)
147
+ } else if (is.numeric(x = genes)) {
148
+ gene_names <- colnames(x = data)[genes]
149
+ genes <- as.integer(x = genes - 1)
150
+ } else if (length(x = genes) == 1 && genes == "all_genes") {
151
+ gene_names <- colnames(x = data)
152
+ } else if (length(x = genes) == 1 && genes == "pca_only") {
153
+ gene_names <- paste0("PC", 1:npca)
154
+ } else {
155
+ # character vector
156
+ if (!all(genes %in% colnames(x = data))) {
157
+ warning(paste0(
158
+ "Genes ",
159
+ genes[!(genes %in% colnames(data))],
160
+ " not found.",
161
+ collapse = ", "
162
+ ))
163
+ }
164
+ genes <- which(x = colnames(x = data) %in% genes)
165
+ gene_names <- colnames(x = data)[genes]
166
+ genes <- as.integer(x = genes - 1)
167
+ }
168
+ # store parameters
169
+ params <- list(
170
+ "data" = data,
171
+ "knn" = knn,
172
+ "knn.max" = knn.max,
173
+ "decay" = decay,
174
+ "t" = t,
175
+ "npca" = npca,
176
+ "solver" = solver,
177
+ "knn.dist.method" = knn.dist.method
178
+ )
179
+ # use pre-initialized values if given
180
+ operator <- NULL
181
+ if (!is.null(x = init)) {
182
+ if (!methods::is(init, "magic")) {
183
+ warning("object passed to init is not a phate object")
184
+ } else {
185
+ operator <- init$operator
186
+ operator$set_params(
187
+ knn = knn,
188
+ knn_max = knn.max,
189
+ decay = decay,
190
+ t = t,
191
+ n_pca = npca,
192
+ solver = solver,
193
+ knn_dist = knn.dist.method,
194
+ n_jobs = n.jobs,
195
+ random_state = seed,
196
+ verbose = verbose,
197
+ ...
198
+ )
199
+ }
200
+ }
201
+ if (is.null(x = operator)) {
202
+ operator <- pymagic$MAGIC(
203
+ knn = knn,
204
+ knn_max = knn.max,
205
+ decay = decay,
206
+ t = t,
207
+ n_pca = npca,
208
+ solver = solver,
209
+ knn_dist = knn.dist.method,
210
+ n_jobs = n.jobs,
211
+ random_state = seed,
212
+ verbose = verbose,
213
+ ...
214
+ )
215
+ }
216
+ result <- operator$fit_transform(
217
+ data,
218
+ genes = genes,
219
+ t_max = t.max
220
+ )
221
+ colnames(x = result) <- gene_names
222
+ rownames(x = result) <- rownames(data)
223
+ result <- as.data.frame(x = result)
224
+ result <- list(
225
+ "result" = result,
226
+ "operator" = operator,
227
+ "params" = params
228
+ )
229
+ class(x = result) <- c("magic", "list")
230
+ return(result)
231
+ }
232
+
233
+ data_impute <- magic_patched(data_impute, genes = genes_to_impute)
234
+
235
+ if (threshold > 0) {
236
+ data <- t(GetAssayData(sobj, layer = "data"))
237
+ data_impute <- cbind(data[, setdiff(colnames(data), genes_to_impute)], Matrix::as.matrix(data_impute$result))
238
+ rm(data)
239
+ gc()
240
+ } else {
241
+ # if threshold is 0, then we need to transpose the data back
242
+ data_impute <- t(Matrix::as.matrix(data_impute$result))
243
+ }
244
+
245
+ log$info("Adding imputed data to Seurat object ...")
246
+ # Add imputed data to the Seurat object
247
+ sobj <- SetAssayData(
248
+ sobj,
249
+ layer = "data",
250
+ new.data = t(data_impute)
251
+ )
252
+
253
+ sobj@misc$impute_method <- "rmagic"
254
+
255
+ log$info("Saving Seurat object ...")
256
+ save_obj(sobj, outfile)
@@ -3,7 +3,7 @@ library(Seurat)
3
3
 
4
4
  infile = {{in.infile | r}}
5
5
  outfile = {{out.outfile | r}}
6
- joboutdir = "{{job.outdir}}/"
6
+ joboutdir = {{job.outdir | append: "/" | r}}
7
7
  drop_thre = {{envs.scimpute_args.drop_thre | r}}
8
8
  kcluster = {{(envs.scimpute_args.kcluster | default: None | r}}
9
9
  ncores = {{envs.scimpute_args.ncores | r}}
@@ -12,7 +12,7 @@ refgene = {{envs.scimpute_args.refgene | r}}
12
12
  setwd(joboutdir)
13
13
 
14
14
  labels = NULL
15
- sobj = readRDS(infile)
15
+ sobj = read_obj(infile)
16
16
  counts = as.data.frame(sobj@assays$RNA@counts)
17
17
  kc = length(unique(Idents(sobj)))
18
18
  if (kc > 0) {
@@ -38,6 +38,9 @@ scimpute(
38
38
  imputed = readRDS(file.path(joboutdir, "scimpute_count.rds"))
39
39
  outobj = CreateSeuratObject(counts = imputed)
40
40
 
41
- outobj@meta.data = sobj@meta.data[rownames(outobj@meta.data),,drop=FALSE]
42
- attr(outobj, "impute") = "scimpute"
43
- saveRDS(outobj, outfile)
41
+ outobj@meta.data = sobj@meta.data[rownames(outobj@meta.data), , drop=FALSE]
42
+ # remember that it is the counts being imputed, we still need to
43
+ # normalize the data
44
+ outobj@misc$impute_method = "scimpute"
45
+
46
+ save_obj(outobj, outfile)
@@ -0,0 +1,7 @@
1
+ {% if envs.tool == "rmagic" %}
2
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-rmagic.R" %}
3
+ {% elif envs.tool == "scimpute" %}
4
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-scimpute.R" %}
5
+ {% elif envs.tool == "alra" %}
6
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-alra.R" %}
7
+ {% endif %}
@@ -0,0 +1,51 @@
1
+ library(loomR)
2
+ library(DropletUtils)
3
+ library(Matrix)
4
+
5
+ loomfile <- {{in.loomfile | r}}
6
+ outdir <- {{out.outdir | r}}
7
+
8
+ lfile <- connect(filename = loomfile, mode = "r")
9
+
10
+ # Extract the expression matrix (genes x cells)
11
+ expr_matrix <- t(lfile[["matrix"]][, ])
12
+ if (!inherits(expr_matrix, "dgCMatrix")) {
13
+ expr_matrix <- Matrix::Matrix(expr_matrix, sparse = TRUE)
14
+ }
15
+
16
+ # Extract gene names and IDs
17
+ gene_names <- lfile[["row_attrs/Gene"]][]
18
+
19
+ gene_ids <- tryCatch({
20
+ lfile[["row_attrs/GeneID"]][]
21
+ }, error = function(e) {
22
+ NULL
23
+ })
24
+
25
+ if (is.null(gene_ids)) {
26
+ gene_ids <- gene_names
27
+ }
28
+
29
+ # Extract cell barcodes
30
+ cell_barcodes <- lfile[["col_attrs/CellID"]][]
31
+
32
+ # Close the LOOM file connection
33
+ lfile$close_all()
34
+
35
+ # Create a data frame for gene information
36
+ gene_info <- data.frame(
37
+ gene_id = gene_ids,
38
+ gene_name = gene_names
39
+ )
40
+
41
+ # Write the data to 10X format
42
+
43
+ write10xCounts(
44
+ path = outdir,
45
+ x = expr_matrix,
46
+ gene.id = gene_info$gene_id,
47
+ gene.symbol = gene_info$gene_name,
48
+ barcodes = cell_barcodes,
49
+ version = "3",
50
+ overwrite = TRUE
51
+ )
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from contextlib import suppress
5
+ from biopipen.core.filters import dict_to_cli_args
6
+ from biopipen.utils.misc import run_command
7
+
8
+ cellsnpout = {{in.cellsnpout | quote}} # noqa: E999 # pyright: ignore
9
+ outdir = {{out.outdir | quote}} # pyright: ignore
10
+ envs: dict = {{envs | repr}} # pyright: ignore
11
+ mquad = envs.pop("mquad")
12
+ ncores = envs.pop("ncores")
13
+ seed = envs.pop("seed", 8525)
14
+
15
+ with suppress(RuntimeError):
16
+ run_command([mquad], fg=True)
17
+ print("")
18
+
19
+ envs["cellData"] = cellsnpout
20
+ envs["outDir"] = outdir
21
+ envs["randSeed"] = seed
22
+ envs["nproc"] = ncores
23
+
24
+ cmd = [mquad, *dict_to_cli_args(envs, sep="=")]
25
+ run_command(cmd, fg=True, bufsize=1)