biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.8.1
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,7 +1,8 @@
1
1
  [pipen_cli_run]
2
2
  bam=biopipen.ns.bam
3
- bcftools=biopipen.ns.bcftools
4
3
  bed=biopipen.ns.bed
4
+ cellranger=biopipen.ns.cellranger
5
+ cellranger_pipeline=biopipen.ns.cellranger_pipeline
5
6
  cnv=biopipen.ns.cnv
6
7
  cnvkit=biopipen.ns.cnvkit
7
8
  cnvkit_pipeline=biopipen.ns.cnvkit_pipeline
@@ -10,10 +11,13 @@ gene=biopipen.ns.gene
10
11
  gsea=biopipen.ns.gsea
11
12
  misc=biopipen.ns.misc
12
13
  plot=biopipen.ns.plot
14
+ protein=biopipen.ns.protein
15
+ regulatory=biopipen.ns.regulatory
13
16
  rnaseq=biopipen.ns.rnaseq
14
17
  scrna=biopipen.ns.scrna
15
- scrna_basic=biopipen.ns.scrna_basic
16
18
  scrna_metabolic_landscape=biopipen.ns.scrna_metabolic_landscape
19
+ snp=biopipen.ns.snp
20
+ stats=biopipen.ns.stats
17
21
  tcgamaf=biopipen.ns.tcgamaf
18
22
  tcr=biopipen.ns.tcr
19
23
  vcf=biopipen.ns.vcf
biopipen/ns/bcftools.py DELETED
@@ -1,111 +0,0 @@
1
- """handling VCF files using bcftools"""
2
- from ..core.proc import Proc
3
- from ..core.config import config
4
-
5
-
6
- class BcftoolsAnnotate(Proc):
7
- """Add or remove annotations from VCF files
8
-
9
- Input:
10
- infile: The input VCF file
11
- annfile: The annotation file
12
-
13
- Output:
14
- outfile: The annotated VCF file
15
-
16
- Envs:
17
- bcftools: Path to bcftools
18
- tabix: Path to tabix, used to index infile and annfile
19
- annfile: The annotation file. If `in.annfile` is provided,
20
- this is ignored
21
- ncores: Number of cores (`--nthread`) to use
22
- cols: Overwrite `-c/--columns`
23
- header: Headers to be added
24
- args: Other arguments for `bcftools annotate`
25
- """
26
- input = "infile:file, annfile:file"
27
- output = "outfile:file:{{in.infile | basename}}"
28
- lang = config.lang.python
29
- envs = {
30
- "bcftools": config.exe.bcftools,
31
- "tabix": config.exe.tabix,
32
- "annfile": "",
33
- "cols": [],
34
- "header": [],
35
- "args": {},
36
- "ncores": config.misc.ncores,
37
- }
38
- script = "file://../scripts/bcftools/BcftoolsAnnotate.py"
39
-
40
-
41
- class BcftoolsFilter(Proc):
42
- """Apply fixed threshold filters to VCF files
43
-
44
- Input:
45
- infile: The input VCF file
46
-
47
- Output:
48
- outfile: The filtered VCF file. If the `in.infile` is gzipped, this is
49
- gzipped as well.
50
-
51
- Envs:
52
- bcftools: Path to bcftools
53
- ncores: Number of cores (`--nthread`) to use
54
- keep: Whether we should keep the filtered variants or not.
55
- args: Other arguments for `bcftools annotate`
56
- ncores: `nthread`
57
- tmpdir: Path to save the intermediate files
58
- Since the filters need to be applied one by one by bcftools
59
- includes: and
60
- excludes: include/exclude only sites for which EXPRESSION is true.
61
- See: https://samtools.github.io/bcftools/bcftools.html#expressions
62
- If provided, `envs.args.include/exclude` will be ignored.
63
- If `str`/`list` used, The filter names will be `Filter%d`
64
- A dict is used when keys are filter names and values are expressions
65
- """
66
- input = "infile:file"
67
- output = "outfile:file:{{in.infile | basename}}"
68
- lang = config.lang.python
69
- envs = {
70
- "bcftools": config.exe.bcftools,
71
- "keep": True,
72
- "ncores": config.misc.ncores,
73
- "includes": None,
74
- "excludes": None,
75
- "tmpdir": config.path.tmpdir,
76
- "args": {},
77
- }
78
- script = "file://../scripts/bcftools/BcftoolsFilter.py"
79
-
80
-
81
- class BcftoolsSort(Proc):
82
- """Sort VCF files
83
-
84
- Input:
85
- infile: The input VCF file
86
-
87
- Output:
88
- outfile: The sorted VCF file.
89
-
90
- Envs:
91
- bcftools: Path to bcftools
92
- gz: Whether to gzip the output file
93
- index: Whether to index the output file (tbi) (`envs.gz` forced to True)
94
- tmpdir: Path to save the intermediate files
95
- args: Other arguments for `bcftools sort`. For example `max-mem`.
96
- See also https://samtools.github.io/bcftools/bcftools.html#sort
97
- """
98
- input = "infile:file"
99
- output = (
100
- "outfile:file:{{in.infile | stem0}}.vcf"
101
- "{% if envs.gz or envs.index %}.gz{% endif %}"
102
- )
103
- lang = config.lang.python
104
- envs = {
105
- "bcftools": config.exe.bcftools,
106
- "gz": True,
107
- "index": True,
108
- "tmpdir": config.path.tmpdir,
109
- "args": {},
110
- }
111
- script = "file://../scripts/bcftools/BcftoolsSort.py"
@@ -1,255 +0,0 @@
1
- """Basic analysis for single cell RNA-seq data
2
-
3
- - QC
4
- - Clustering
5
- - Marker genes
6
- - Enrichment analysis
7
- """
8
- from __future__ import annotations
9
- from pathlib import Path
10
- from typing import Type
11
-
12
- from pipen.utils import mark, is_loading_pipeline
13
- from pipen_annotate import annotate
14
- from pipen_args import ProcGroup
15
-
16
- from ..core.proc import Proc
17
-
18
-
19
- class ScrnaBasic(ProcGroup):
20
- """Basic analysis for single cell RNA-seq data
21
-
22
- Including QC, clustering, marker genes, and enrichment analysis.
23
-
24
- See also the docs for details
25
- <https://pwwang.github.io/biopipen/pipelines/scrna_basic/>
26
-
27
- Args:
28
- infile: The input file. Either a tab-delimited file containing
29
- the information of metadata and paths to results of cellranger
30
- or a seurat object has been saved as RDS file (with extension
31
- `.rds` or `.RDS`), which QC is assumed to be done.
32
- As for the tab-delimited file, it should have two columns:
33
- `Sample` and `RNAData`. `Sample` should be the first column with
34
- unique identifiers for the samples and `RNAData` indicates where the
35
- barcodes, genes, expression matrices are.
36
- is_seurat (flag): Whether the input file is a seurat object
37
- in RDS format.
38
- If this process group runs independently, this argument should
39
- not be set. It will be recognized automatically by the extension
40
- of `infile`. However, if this process group is run as a part of
41
- a pipeline, this argument should be set manually since `infile`
42
- should not be set in this case. It will be passed by other processes
43
- clustering (choice;required): Which clustering method to use.
44
- - supervised: Mapping the cells to given reference.
45
- Using Seurat Reference Mapping procedure.
46
- See: <https://satijalab.org/seurat/articles/multimodal_reference_mapping.html>
47
- - unsupervised: Clustering the cells without reference.
48
- Using Seurat FindClusters procedure.
49
- - both: Both supervised and unsupervised clustering.
50
- Performing both of the above procedures. The unsupervised
51
- clustering will be added as `seurat_clusters_unsupervised`
52
- to the metadata.
53
- ref: The reference file for supervised clustering. It should be an
54
- RDS file (with extension `.rds` or `.RDS`) containing a seurat
55
- object, or a h5 file (with extension `.h5` or `.h5seurat`) that
56
- can be loaded by `Seurat::LoadH5Seurat()`.
57
- """ # noqa: E501
58
-
59
- DEFAULTS = {
60
- "infile": None,
61
- "is_seurat": False,
62
- "clustering": None,
63
- "ref": None,
64
- }
65
-
66
- def post_init(self) -> None:
67
- if self.opts.infile:
68
- suffix = Path(self.opts.infile).suffix
69
- self.opts.is_seurat = suffix in (".rds", ".RDS")
70
-
71
- @ProcGroup.add_proc
72
- def p_input(self) -> Type[Proc]:
73
- """Build the input for the process group"""
74
- from .misc import File2Proc
75
-
76
- @mark(board_config_hidden=True)
77
- class ScrnaBasicInput(File2Proc):
78
- """Input file for scrna_basic process group
79
-
80
- To specify the input file, use the `infile` argument of the
81
- process group.
82
- """
83
-
84
- if self.opts.infile:
85
- input_data = [self.opts.infile]
86
-
87
- return ScrnaBasicInput
88
-
89
- @ProcGroup.add_proc
90
- def p_prepare(self) -> Type[Proc]:
91
- """Prepare the input data into a Seurat object and do QC"""
92
- if self.opts.is_seurat:
93
- return self.p_input
94
-
95
- from .scrna import SeuratPreparing
96
-
97
- class ScrnaBasicPrepareAndQC(SeuratPreparing):
98
- requires = self.p_input
99
-
100
- return ScrnaBasicPrepareAndQC
101
-
102
- @ProcGroup.add_proc
103
- def p_supervised(self) -> Type[Proc]:
104
- if (
105
- self.opts.clustering == "unsupervised"
106
- and not is_loading_pipeline()
107
- ):
108
- return None
109
-
110
- from .scrna import SeuratMap2Ref
111
-
112
- @annotate.format_doc(indent=3)
113
- class ScrnaBasicSupervised(SeuratMap2Ref):
114
- """{{Summary}}
115
-
116
- **Only available when the group argument `clustering` is set to
117
- `supervised` or `both`.**
118
-
119
- Envs:
120
- ref (pgarg): {{Envs.ref.help | indent(20)}}.
121
- Defaults to the `ref` argument of the process group.
122
- """
123
- requires = self.p_prepare
124
- envs = {
125
- "ref": self.opts.ref,
126
- }
127
-
128
- return ScrnaBasicSupervised
129
-
130
- @ProcGroup.add_proc
131
- def p_supervised_stats(self) -> Type[Proc]:
132
- if not self.p_supervised and not is_loading_pipeline():
133
- return None
134
-
135
- from .scrna import SeuratClusterStats
136
-
137
- @annotate.format_doc(indent=3)
138
- class ScrnaBasicSupervisedStats(SeuratClusterStats):
139
- """{{Summary}}
140
-
141
- **Only available when the group argument `clustering` is set to
142
- `supervised` or `both`.**
143
- """
144
- requires = self.p_supervised
145
-
146
- return ScrnaBasicSupervisedStats
147
-
148
- @ProcGroup.add_proc
149
- def p_unsupervised(self) -> Type[Proc]:
150
- if (
151
- self.opts.clustering == "supervised"
152
- and not is_loading_pipeline()
153
- ):
154
- return None
155
-
156
- from .scrna import SeuratClustering
157
-
158
- class ScrnaBasicUnsupervised(SeuratClustering):
159
- requires = self.p_prepare
160
-
161
- return ScrnaBasicUnsupervised
162
-
163
- @ProcGroup.add_proc
164
- def p_unsupervised_anno(self) -> Type[Proc]:
165
- if not self.p_unsupervised and not is_loading_pipeline():
166
- return None
167
-
168
- from .scrna import CellTypeAnnotation
169
-
170
- class ScrnaBasicAnnotation(CellTypeAnnotation):
171
- requires = self.p_unsupervised
172
-
173
- return ScrnaBasicAnnotation
174
-
175
- @ProcGroup.add_proc
176
- def p_unsupervised_stats(self) -> Type[Proc]:
177
- if not self.p_unsupervised_anno and not is_loading_pipeline():
178
- return None
179
-
180
- from .scrna import SeuratClusterStats
181
-
182
- class ScrnaBasicUnsupervisedStats(SeuratClusterStats):
183
- requires = self.p_unsupervised_anno
184
-
185
- return ScrnaBasicUnsupervisedStats
186
-
187
- @ProcGroup.add_proc
188
- def p_merge(self) -> Type[Proc]:
189
- if self.opts.clustering == "supervised" and not is_loading_pipeline():
190
- return self.p_supervised
191
-
192
- if self.opts.clustering == "unsupervised" and not is_loading_pipeline():
193
- return self.p_unsupervised_anno
194
-
195
- @mark(board_config_hidden=True)
196
- class ScrnaBasicMerge(Proc):
197
- """Merge the supervised and unsupervised clustering results
198
-
199
- Add unsupervised clustering as metadata to the seurat object
200
- with supervised clustering.
201
-
202
- The unsupervised clustering results are stored in the metadata
203
- `seurat_clusters_unsupervised`.
204
-
205
- **Only available when the group argument `clustering` is set to
206
- `both`.**
207
- """
208
- requires = [self.p_supervised, self.p_unsupervised_anno]
209
- lang = self.p_supervised.lang
210
- input = "sobjfile:file, uobjfile:file"
211
- output = "outfile:file:{{in.sobjfile | stem}}.rds"
212
- script = """
213
- library(Seurat)
214
- sobj <- readRDS({{in.sobjfile | quote}})
215
- uobj <- readRDS({{in.uobjfile | quote}})
216
- umeta <- as.list(uobj$seurat_clusters)
217
- names(umeta) <- rownames(uobj)
218
- sobj <- AddMetaData(
219
- sobj,
220
- metadata=umeta,
221
- col.name="seurat_clusters_unsupervised"
222
- )
223
- saveRDS(sobj, {{out.outfile | quote}})
224
- """
225
-
226
- return ScrnaBasicMerge
227
-
228
- @ProcGroup.add_proc
229
- def p_findmarkers(self) -> Type[Proc]:
230
- from .scrna import MarkersFinder
231
-
232
- @annotate.format_doc(indent=3)
233
- class ScrnaBasicMarkers(MarkersFinder):
234
- """{{Summary}}
235
-
236
- If the group argument `clustering` is set to `"both"`,
237
- you can set `group-by` to `"seurat_clusters_unsupervised"` in
238
- a different case to find the markers for the unsupervised clusters.
239
- """
240
- requires = self.p_merge
241
-
242
- return ScrnaBasicMarkers
243
-
244
- @ProcGroup.add_proc
245
- def p_scgsea(self) -> Type[Proc]:
246
- from .scrna import ScFGSEA
247
-
248
- class ScrnaBasicScGSEA(ScFGSEA):
249
- requires = self.p_merge
250
-
251
- return ScrnaBasicScGSEA
252
-
253
-
254
- if __name__ == "__main__":
255
- ScrnaBasic().as_pipen().run()
@@ -1,36 +0,0 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
-
3
- <script>
4
- import { Image, DataTable } from "$libs";
5
- </script>
6
-
7
- {%- macro report_job(job, h=1) -%}
8
-
9
- {% if envs.stats %}
10
- <h{{h}}>Sample Information</h{{h}}>
11
- {% endif %}
12
- {% if envs.exclude_cols and isinstance(envs.exclude_cols, str) %}
13
- {% set excluded_cols = envs.exclude_cols | replace: " ", "" | split: "," %}
14
- {% else %}
15
- {% set excluded_cols = envs.exclude_cols %}
16
- {% endif %}
17
-
18
- <DataTable
19
- data={ {{ job.out.outfile | datatable: sep="\t", excluded=excluded_cols }} }
20
- pageSize={50}
21
- />
22
-
23
- {% if envs.stats %}
24
- <h{{h}}>Statistics</h{{h}}>
25
- {%- set stat_imgs = job.outdir | glob: "*.png" -%}
26
- {{- table_of_images(stat_imgs) -}}
27
- {% endif %}
28
-
29
- {%- endmacro -%}
30
-
31
- {%- macro head_job(job) -%}
32
- <h1>{{job.in.infile | stem | escape }}</h1>
33
- {%- endmacro -%}
34
-
35
- {{ report_jobs(jobs, head_job, report_job) }}
36
-
@@ -1,32 +0,0 @@
1
- {% from_ os import path %}
2
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
3
-
4
- <script>
5
- import { Image } from "$libs";
6
- </script>
7
-
8
- {%- macro report_job(job, h=1) -%}
9
-
10
- {% assign boxplotpng = job.out.outdir | joinpaths: "boxplot.png" %}
11
- {% assign heatmappng = job.out.outdir | joinpaths: "heatmap.png" %}
12
-
13
- {% if path.exists(boxplotpng) %}
14
- <Image src={{boxplotpng | quote}} />
15
- {% endif %}
16
-
17
- {% if path.exists(heatmappng) %}
18
- <Image src={{heatmappng | quote}} />
19
- {% endif %}
20
-
21
- {%- endmacro -%}
22
-
23
- {%- macro head_job(job) -%}
24
- {% if job.in.configfile %}
25
- {% assign name = job.in.configfile | toml_load | attr: "name" %}
26
- {% else %}
27
- {% assign name = job.ennvs.config | attr: "name" %}
28
- {% endif %}
29
- <h1>{{name | escape}}</h1>
30
- {%- endmacro -%}
31
-
32
- {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,35 +0,0 @@
1
- {% from "utils/gsea.liq" import fgsea_report -%}
2
- {% from "utils/misc.liq" import report_jobs -%}
3
- <script>
4
- import { Image, DataTable } from "$libs";
5
- </script>
6
-
7
- {%- macro report_job(job, h=1) -%}
8
- {%- set secdirs = job.out.outdir | glob: "*" -%}
9
- {%- if len(secdirs) == 1 -%}
10
- {%- set secname = secdirs | first | basename -%}
11
- {%- for casedir in secdirs | first | glob: "*" -%}
12
- {%- if secname == "DEFAULT" -%}
13
- <h{{h}}>{{ casedir | basename | escape }}</h{{h}}>
14
- {%- else -%}
15
- <h{{h}}>{{secname | escape }} - {{ casedir | basename | escape }}</h{{h}}>
16
- {%- endif -%}
17
- {{ fgsea_report(casedir, h + 1) }}
18
- {%- endfor -%}
19
- {%- else -%}
20
- {%- for secdir in secdirs -%}
21
- {%- set sec = secdir | basename -%}
22
- <h{{h}}>{{sec | escape}}</h{{h}}>
23
- {%- for casedir in secdir | glob: "*" -%}
24
- <h{{h+1}}>{{casedir | basename | escape}}</h{{h+1}}>
25
- {{ fgsea_report(casedir, h + 2) }}
26
- {%- endfor -%}
27
- {%- endfor -%}
28
- {%- endif -%}
29
- {%- endmacro -%}
30
-
31
- {%- macro head_job(job) -%}
32
- <h1>{{job.in.srtobj | stem0 | escape}}</h1>
33
- {%- endmacro -%}
34
-
35
- {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,82 +0,0 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
- {% from_ os import path %}
3
- <script>
4
- import { DataTable, Image } from "$libs";
5
- import { Tabs, Tab, TabContent } from "$ccs";
6
- </script>
7
-
8
- {%- macro report_job(job, h=1) -%}
9
- {%- set stats_reports_file = job.out.outdir | joinpaths: "stats", "report_toc.json" -%}
10
- {%- set features_reports_file = job.out.outdir | joinpaths: "features", "report_toc.json" -%}
11
- {%- set dimplots_reports_file = job.out.outdir | joinpaths: "dimplots", "report_toc.json" -%}
12
-
13
- {%- if stats_reports_file | exists -%}
14
- {%- set stats = stats_reports_file | config: "json" -%}
15
- {% for key, value in stats.items() -%}
16
- <h{{h}}>{{key | escape}}</h{{h}}>
17
- <Tabs>
18
- {% if 'bar' in value -%}
19
- <Tab label="Bar plot" />
20
- {% endif -%}
21
- {% if 'pie' in value -%}
22
- <Tab label="Pie chart" />
23
- {% endif -%}
24
- {% if 'table' in value -%}
25
- <Tab label="Table" />
26
- {% endif -%}
27
- <svelte:fragment slot="content">
28
- {% if 'bar' in value -%}
29
- <TabContent>
30
- <Image src="{{job.out.outdir}}/stats/{{value.bar}}" />
31
- </TabContent>
32
- {% endif -%}
33
- {% if 'pie' in value -%}
34
- <TabContent>
35
- <Image src="{{job.out.outdir}}/stats/{{value.pie}}" />
36
- </TabContent>
37
- {% endif -%}
38
- {% if 'table' in value -%}
39
- <TabContent>
40
- <DataTable src="{{job.out.outdir}}/stats/{{value.table}}"
41
- data={ {{job.out.outdir | joinpaths: "stats", value.table | datatable: sep="\t", nrows=100 }} }
42
- />
43
- </TabContent>
44
- {% endif -%}
45
- </svelte:fragment>
46
- </Tabs>
47
- {%- endfor -%}
48
- {%- endif -%}
49
-
50
- {%- if features_reports_file | exists -%}
51
- {%- set features = features_reports_file | config: "json" %}
52
- {% for key, value in features.items() -%}
53
- <h{{h}}>{{key | escape}}</h{{h}}>
54
- {% for val in value -%}
55
- {% if "name" in val -%}
56
- <h{{h+1}}>{{val.name | escape}}</h{{h+1}}>
57
- {%- endif -%}
58
- {% if val.kind == "table" -%}
59
- <DataTable src="{{job.out.outdir}}/features/{{val.file}}"
60
- data={ {{job.out.outdir | joinpaths: "features", val.file | datatable: sep="\t", nrows=100 }} }
61
- />
62
- {% else -%}
63
- <Image src="{{job.out.outdir}}/features/{{val.file}}" />
64
- {% endif -%}
65
- {%- endfor -%}
66
- {%- endfor -%}
67
- {%- endif -%}
68
-
69
- {%- if dimplots_reports_file | exists -%}
70
- {%- set dimplots = dimplots_reports_file | config: "json" %}
71
- {% for key, value in dimplots.items() -%}
72
- <h{{h}}>{{key | escape}}</h{{h}}>
73
- <Image src="{{job.out.outdir}}/dimplots/{{value}}" />
74
- {%- endfor -%}
75
- {%- endif -%}
76
- {%- endmacro -%}
77
-
78
- {%- macro head_job(job) -%}
79
- <h1>{{job.in.srtobj | stem}}</h1>
80
- {%- endmacro -%}
81
-
82
- {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,20 +0,0 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
- <script>
3
- import { Image } from "$libs";
4
- </script>
5
-
6
- {%- macro report_job(job, h=1) -%}
7
- <h{{h}}>Reference UMAP</h{{h}}>
8
- {% set imgs = job.outdir | glob: "Reference_UMAP_*.png" %}
9
- {{ table_of_images(imgs) }}
10
-
11
- <h{{h}}>Query UMAP</h{{h}}>
12
- {% set imgs = job.outdir | glob: "Query_UMAP_*.png" %}
13
- {{ table_of_images(imgs) }}
14
- {%- endmacro -%}
15
-
16
- {%- macro head_job(job) -%}
17
- <h1>{{job.in.sobjfile | stem0 | escape}}</h1>
18
- {%- endmacro -%}
19
-
20
- {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,38 +0,0 @@
1
- {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
- {% from_ os import path %}
3
- <script>
4
- import { Image, DataTable } from "$libs";
5
- import { Tile } from "$ccs";
6
- </script>
7
-
8
- {%- macro report_job(job, h=1) -%}
9
- <h{{h}}>Applied filters</h{{h}}>
10
- <Tile>
11
- <p>Cell filters: {{envs.cell_qc | str | escape}}</p>
12
- <p>Gene filters: {{
13
- proc.envs.gene_qc
14
- | str
15
- | replace: "{", "&#123"
16
- | replace: "}", "&#125"
17
- }}</p>
18
-
19
- <DataTable
20
- src={{job.outdir | joinpaths: 'plots', 'dim.txt' | quote}}
21
- data={ {{job.outdir | joinpaths: 'plots', 'dim.txt' | datatable: sep="\t"}} } />
22
- </Tile>
23
-
24
- <h{{h}}>Violin plots</h{{h}}>
25
- {% set qcimgs = job.outdir | glob: "plots", "*.vln.png" %}
26
- {{ table_of_images(qcimgs) }}
27
-
28
- <h{{h}}>Scatter plots</h{{h}}>
29
- {% set qcimgs = job.outdir | glob: "plots", "*.scatter.png" %}
30
- {{ table_of_images(qcimgs) }}
31
-
32
- {%- endmacro -%}
33
-
34
- {%- macro head_job(job) -%}
35
- <h1>{{job.in.metafile | stem}}</h1>
36
- {%- endmacro -%}
37
-
38
- {{ report_jobs(jobs, head_job, report_job) }}