biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
biopipen/ns/gsea.py CHANGED
@@ -1,8 +1,10 @@
1
1
  """Gene set enrichment analysis"""
2
+ from pipen.utils import mark
2
3
  from ..core.proc import Proc
3
4
  from ..core.config import config
4
5
 
5
6
 
7
+ @mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` instead.')
6
8
  class GSEA(Proc):
7
9
  """Gene set enrichment analysis
8
10
 
@@ -51,6 +53,7 @@ class GSEA(Proc):
51
53
  plugin_opts = {"report": "file://../reports/gsea/GSEA.svelte"}
52
54
 
53
55
 
56
+ @mark(deprecated='[{proc.name}] is deprecated, use `FGSEA` directly.')
54
57
  class PreRank(Proc):
55
58
  """PreRank the genes for GSEA analysis
56
59
 
@@ -100,59 +103,82 @@ class PreRank(Proc):
100
103
  class FGSEA(Proc):
101
104
  """Gene set enrichment analysis using `fgsea`
102
105
 
103
- Need `devtools::install_github("ctlab/fgsea")`
104
-
105
106
  Input:
106
- infile: The expression file.
107
- Either a tab-delimited matrix or an RDS file (on envs.inopts)
107
+ infile: The expression file (genes x samples).
108
+ Either a tab-delimited file.
108
109
  metafile: The meta data file, determining the class of the samples
109
- Two columns are required
110
- Sample: The unique sample id for each sample
111
- `[Group]`: The groups/classes of the samples
112
- gmtfile: The GMT file of reference gene sets
113
- configfile: The configuration file in TOML format to specify some envs.
114
- `clscol`: If not provided, will use `envs.clscol`
115
- `classes`: Defines pos and neg labels. If not provided, use will
116
- `envs.classes`.
110
+ Two columns are required. If column `Sample` is found, it will be used
111
+ as the samples; otherwise the first column should be the samples.
112
+ The other column should be the group/class of the samples, whose
113
+ name is specified by `envs.clscol`.
117
114
 
118
115
  Output:
119
- outdir: The output directory
116
+ outdir: The output directory containing the results, including
117
+ the table and plots.
120
118
 
121
119
  Envs:
122
- inopts: The options for `read.table()` to read the input file
123
- If `rds` will use `readRDS()`
124
- metaopts: The options for `read.table()` to read the meta file
125
- method: The method to do the preranking.
126
- Supported: `s2n(signal_to_noise)`, `abs_s2n(abs_signal_to_noise)`,
127
- `t_test`, `ratio_of_classes`, `diff_of_classes` and
128
- `log2_ratio_of_classes`.
120
+ ncores (type=int): Number of cores for parallelization
121
+ Passed to `nproc` of `fgseaMultilevel()`.
122
+ case: The case label for the positive class.
123
+ control: The control label for the negative class.
124
+ When there are only two classes in `in.metafile` at column `envs.clscol`,
125
+ either `case` or `control` can be specified and the other will be
126
+ automatically set to the other class.
127
+ gmtfile: The pathways in GMT format, with the gene names/ids in the same format as the seurat object.
128
+ One could also use a URL to a GMT file. For example, from <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/Pathways/>.
129
+ method (choice): The method to do the preranking.
130
+ - signal_to_noise: Signal to noise.
131
+ The larger the differences of the means (scaled by the standard deviations);
132
+ that is, the more distinct the gene expression is in each phenotype and the more the gene
133
+ acts as a "class marker".
134
+ - s2n: Alias of signal_to_noise.
135
+ - abs_signal_to_noise: The absolute value of signal_to_noise.
136
+ - abs_s2n: Alias of abs_signal_to_noise.
137
+ - t_test: T test.
138
+ Uses the difference of means scaled by the standard deviation and number of samples.
139
+ - ratio_of_classes: Also referred to as fold change.
140
+ Uses the ratio of class means to calculate fold change for natural scale data.
141
+ - diff_of_classes: Difference of class means.
142
+ Uses the difference of class means to calculate fold change for nature scale data
143
+ - log2_ratio_of_classes: Log2 ratio of class means.
144
+ Uses the log2 ratio of class means to calculate fold change for natural scale data.
145
+ This is the recommended statistic for calculating fold change for log scale data.
129
146
  clscol: The column of metafile specifying the classes of the samples
130
- classes: The classes to specify the pos and neg labels.
131
- It could be a pair of labels (e.g. `["CASE", "CNTRL"]`), where
132
- the first one is pos and second is neg. Or you can have multiple
133
- pairs of labels (e.g. `[["CASE1", "CNTRL"], ["CASE2", "CNTRL"]]`)
134
- top: Do gsea table and enrich plot for top N pathways. If it is < 1,
135
- will apply it to `padj`
136
- `<rest>`: Rest arguments for `fgsea()`
147
+ When `in.metafile` is not specified, it can also be specified as a list of
148
+ classes, in the same order as the samples in `in.infile`.
149
+ top (type=auto): Do gsea table and enrich plot for top N pathways.
150
+ If it is < 1, will apply it to `padj`, selecting pathways with `padj` < `top`.
151
+ eps (type=float): This parameter sets the boundary for calculating the p value.
152
+ See <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
153
+ minsize (type=int): Minimal size of a gene set to test. All pathways below the threshold are excluded.
154
+ maxsize (type=int): Maximal size of a gene set to test. All pathways above the threshold are excluded.
155
+ rest (type=json;order=98): Rest arguments for [`fgsea()`](https://rdrr.io/bioc/fgsea/man/fgsea.html)
156
+ See also <https://rdrr.io/bioc/fgsea/man/fgseaMultilevel.html>
157
+ cases (type=json;order=99): If you have multiple cases, you can specify them here.
158
+ The keys are the names of the cases and the values are the above options except `mutaters`.
159
+ If some options are not specified, the default values specified above will be used.
160
+ If no cases are specified, the default case will be added with the name `GSEA`.
137
161
 
138
162
  Requires:
139
163
  bioconductor-fgsea:
140
164
  - check: {{proc.lang}} -e "library(fgsea)"
141
- """
142
- input = "infile:file, metafile:file, gmtfile:file, configfile:file"
165
+ """ # noqa: E501
166
+ input = "infile:file, metafile:file"
143
167
  output = "outdir:dir:{{in.infile | stem}}.fgsea"
144
168
  lang = config.lang.rscript
145
169
  envs = {
146
- "inopts": {"header": True, "row.names": -1},
147
- "metaopts": {"header": True, "row.names": -1},
148
- "method": "s2n",
149
- "clscol": None,
150
- "classes": None,
151
- "top": 20,
152
170
  "ncores": config.misc.ncores,
153
- "minSize": 10,
154
- "maxSize": 100,
171
+ "case": None,
172
+ "control": None,
173
+ "gmtfile": None,
174
+ "method": "signal_to_noise",
175
+ "clscol": None,
176
+ "top": 10,
155
177
  "eps": 0,
178
+ "minsize": 10,
179
+ "maxsize": 100,
180
+ "rest": {},
181
+ "cases": {},
156
182
  }
157
183
  script = "file://../scripts/gsea/FGSEA.R"
158
184
  plugin_opts = {"report": "file://../reports/gsea/FGSEA.svelte"}
biopipen/ns/misc.py CHANGED
@@ -80,7 +80,7 @@ class Str2File(Proc):
80
80
  name: The name of the output file
81
81
  """
82
82
  input = "str, name"
83
- output = "outfile:file:{{in.name}}"
83
+ output = "outfile:file:{{in.name | default: 'unnamed.txt'}}"
84
84
  lang = config.lang.python
85
85
  envs = {"name": None}
86
86
  script = "file://../scripts/misc/Str2File.py"
@@ -105,17 +105,42 @@ class Shell(Proc):
105
105
  output = "outfile:file:{{in.infile | basename}}"
106
106
  envs = {"cmd": "", "outdir": False}
107
107
  lang = config.lang.bash
108
- script = """
109
- infile={{in.infile | quote}}
110
- outfile={{out.outfile | quote}}
111
- is_outdir={{envs.outdir | int}}
112
- cmd={{envs.cmd | quote}}
113
- if [[ -z "$cmd" ]]; then
114
- echo "No command given." 1>&2
115
- exit 1
116
- fi
117
- if [[ $is_outdir -eq 1 ]]; then
118
- mkdir -p "$outfile"
119
- fi
120
- eval "$cmd"
108
+ script = "file://../scripts/misc/Shell.sh"
109
+
110
+
111
+ class Plot(Proc):
112
+ """Plot given data using plotthis package in R
113
+
114
+ Input:
115
+ datafile: The input data file in RDS or qs/qs2 format.
116
+ If it is not in RDS nor qs/qs2 format, read.table will be used
117
+ to read the data file with the options provided by `envs.read_opts`.
118
+
119
+ Output:
120
+ plotfile: The output plot file in PNG format
121
+
122
+ envs:
123
+ fn: The plot function to use. Required.
124
+ devpars (ns): The device parameters for the plot.
125
+ - width: The width of the plot in pixels.
126
+ - height: The height of the plot in pixels.
127
+ - res: The resolution of the plot in DPI.
128
+ more_formats: The additional formats to save the plot in other than PNG.
129
+ The file will be saved in the same directory as the plotfile.
130
+ save_code: Whether to save the R code used for plotting.
131
+ read_opts: Options to read the data file.
132
+ If the data file is not in RDS nor qs/qs2 format, these options
133
+ will be passed to `read.table`.
134
+ <more>: Additional parameters to the plot function.
121
135
  """
136
+ input = "datafile:file"
137
+ output = "plotfile:file:{{in.datafile | stem}}.png"
138
+ envs = {
139
+ "fn": None,
140
+ "devpars": {"res": 100},
141
+ "more_formats": [],
142
+ "save_code": False,
143
+ "read_opts": {},
144
+ }
145
+ lang = config.lang.rscript
146
+ script = "file://../scripts/misc/Plot.R"
biopipen/ns/plot.py CHANGED
@@ -1,8 +1,16 @@
1
1
  """Plotting data"""
2
2
 
3
+ import warnings
4
+
3
5
  from ..core.proc import Proc
4
6
  from ..core.config import config
5
7
 
8
+ warnings.warn(
9
+ "The `biopipen.ns.plot` module is deprecated and will be removed in the future. "
10
+ "Please use `biopipen.ns.misc.Plot` process instead.",
11
+ DeprecationWarning,
12
+ )
13
+
6
14
 
7
15
  class VennDiagram(Proc):
8
16
  """Plot Venn diagram
@@ -35,7 +43,7 @@ class VennDiagram(Proc):
35
43
  envs = {
36
44
  "inopts": {"row.names": -1, "header": False},
37
45
  "intype": "raw",
38
- "devpars": {"res": 100, "width": 1000, "height": 1000},
46
+ "devpars": {"res": 100, "width": 800, "height": 600},
39
47
  "args": {},
40
48
  "ggs": None,
41
49
  }
@@ -114,3 +122,298 @@ class Heatmap(Proc):
114
122
  "globals": "",
115
123
  }
116
124
  script = "file://../scripts/plot/Heatmap.R"
125
+
126
+
127
+ class ROC(Proc):
128
+ """Plot ROC curve using [`plotROC`](https://cran.r-project.org/web/packages/plotROC/vignettes/examples.html).
129
+
130
+ Input:
131
+ infile: The input file for data, tab-separated.
132
+ The first column should be ids of the records (this is optional if `envs.noids` is True).
133
+ The second column should be the labels of the records (1 for positive, 0 for negative).
134
+ If they are not binary, you can specify the positive label by `envs.pos_label`.
135
+ From the third column, it should be the scores of the different models.
136
+
137
+ Output:
138
+ outfile: The output figure file
139
+
140
+ Envs:
141
+ noids: Whether the input file has ids (first column) or not.
142
+ pos_label: The positive label.
143
+ ci: Whether to use `geom_rocci()` instead of `geom_roc()`.
144
+ devpars: The parameters for `png()`
145
+ args: Additional arguments for `geom_roc()` or `geom_rocci()` if `envs.ci` is True.
146
+ style_roc: Arguments for `style_roc()`
147
+ """ # noqa: E501
148
+ input = "infile:file"
149
+ output = "outfile:file:{{in.infile | stem}}.roc.png"
150
+ lang = config.lang.rscript
151
+ envs = {
152
+ "noids": False,
153
+ "pos_label": 1,
154
+ "ci": False,
155
+ "devpars": {"res": 100, "width": 750, "height": 600},
156
+ "args": {"labels": False},
157
+ "style_roc": {},
158
+ "show_auc": True,
159
+ }
160
+ script = "file://../scripts/plot/ROC.R"
161
+
162
+
163
+ class Manhattan(Proc):
164
+ """Plot Manhattan plot.
165
+
166
+ Using the [`ggmanh`](https://bioconductor.org/packages/devel/bioc/vignettes/ggmanh/inst/doc/ggmanh.html) package.
167
+ Requires `ggmanh` v1.9.6 or later.
168
+
169
+ Input:
170
+ infile: The input file for data
171
+ It should contain at least three columns, the chromosome, the position
172
+ and the p-value of the SNPs.
173
+ Header is required.
174
+
175
+ Output:
176
+ outfile: The output figure file
177
+
178
+ Envs:
179
+ chrom_col: The column for chromosome
180
+ An integer (1-based) or a string indicating the column name.
181
+ pos_col: The column for position
182
+ An integer (1-based) or a string indicating the column name.
183
+ pval_col: The column for p-value
184
+ An integer (1-based) or a string indicating the column name.
185
+ label_col: The column for label.
186
+ Once specified, the significant SNPs will be labeled on the plot.
187
+ devpars (ns): The parameters for `png()`
188
+ - res (type=int): The resolution
189
+ - width (type=int): The width
190
+ - height (type=int): The height
191
+ title: The title of the plot
192
+ ylabel: The y-axis label
193
+ rescale (flag): Whether to rescale the p-values
194
+ rescale_ratio_threshold (type=float): Threshold of that triggers the rescale
195
+ signif (auto): A single value or a list of values to indicate the significance levels
196
+ Multiple values should be also separated by comma (`,`).
197
+ The minimum value will be used as the cutoff to determine if the SNPs are significant.
198
+ hicolors (auto): The colors for significant and non-significant SNPs
199
+ If a single color is given, the non-significant SNPs will be in grey.
200
+ Set it to None to disable the highlighting.
201
+ thin_n (type=int): Number of max points per horizontal partitions of the plot.
202
+ `0` or `None` to disable thinning.
203
+ thin_bins (type=int): Number of bins to partition the data.
204
+ zoom (auto): Chromosomes to zoom in
205
+ Each chromosome should be separated by comma (`,`) or in a list. Single chromosome is also accepted.
206
+ Ranges are also accepted, see `envs.chroms`.
207
+ Each chromosome will be saved in a separate file.
208
+ zoom_devpars (ns): The parameters for the zoomed plot
209
+ - width (type=int): The width
210
+ - height (type=int): The height, inherited from `devpars` by default
211
+ - res (type=int): The resolution, inherited from `devpars` by default
212
+ chroms (auto): The chromosomes and order to plot
213
+ A hyphen (`-`) can be used to indicate a range.
214
+ For example `chr1-22,chrX,chrY,chrM` will plot all autosomes, X, Y and M.
215
+ if `auto`, only the chromosomes in the data will be plotted in the order
216
+ they appear in the data.
217
+ args (ns): Additional arguments for `manhattan_plot()`.
218
+ See <https://rdrr.io/github/leejs-abv/ggmanh/man/manhattan_plot.html>.
219
+ Note that `-` will be replaced by `.` in the argument names.
220
+ - <more>: Additional arguments for `manhattan_plot()`
221
+ """ # noqa: E501
222
+ input = "infile:file"
223
+ output = "outfile:file:{{in.infile | stem0}}.manhattan.png"
224
+ lang = config.lang.rscript
225
+ envs = {
226
+ "chrom_col": 1,
227
+ "pos_col": 2,
228
+ "pval_col": 3,
229
+ "label_col": None,
230
+ "devpars": {"res": 100, "width": 1000, "height": 500},
231
+ "zoom_devpars": {"width": 500, "height": None, "res": None},
232
+ "title": None,
233
+ "ylabel": "-log10(p-value)",
234
+ "rescale": True,
235
+ "rescale_ratio_threshold": 5,
236
+ "signif": [5e-8, 1e-5],
237
+ "hicolors": None,
238
+ "thin_n": None,
239
+ "thin_bins": 200,
240
+ "zoom": None,
241
+ "chroms": "auto",
242
+ "args": {},
243
+ }
244
+ script = "file://../scripts/plot/Manhattan.R"
245
+
246
+
247
+ class QQPlot(Proc):
248
+ """Generate QQ-plot or PP-plot using qqplotr.
249
+
250
+ See <https://cran.r-project.org/web/packages/qqplotr/vignettes/introduction.html>.
251
+
252
+ Input:
253
+ infile: The input file for data
254
+ It should contain at least one column of p-values or the values to be
255
+ plotted. Header is required.
256
+ theorfile: The file for theoretical values (optional)
257
+ This file should contain at least one column of theoretical values.
258
+ The values will be passed to `envs.theor_qfunc` to calculate the theoretical
259
+ quantiles.
260
+ Header is required.
261
+
262
+ Output:
263
+ outfile: The output figure file
264
+
265
+ Envs:
266
+ val_col: The column for values to be plotted
267
+ An integer (1-based) or a string indicating the column name.
268
+ devpars (ns): The parameters for `png()`
269
+ - res (type=int): The resolution
270
+ - width (type=int): The width
271
+ - height (type=int): The height
272
+ xlabel: The x-axis label
273
+ ylabel: The y-axis label
274
+ title: The title of the plot
275
+ trans: The transformation of the values
276
+ You can use `-log10` to transform the values to `-log10(values)`.
277
+ Otherwise you can a direct R function or a custom R function.
278
+ For example `function(x) -log10(x)`.
279
+ kind (choice): The kind of the plot, `qq` or `pp`
280
+ - qq: QQ-plot
281
+ - pp: PP-plot
282
+ theor_col: The column for theoretical values in `in.theorfile` if provided,
283
+ otherwise in `in.infile`.
284
+ An integer (1-based) or a string indicating the column name.
285
+ If `distribution` of `band`, `line`, or `point` is `custom`, this column
286
+ must be provided.
287
+ theor_trans: The transformation of the theoretical values.
288
+ The `theor_funs` have default functions to take the theoretical values.
289
+ This transformation will be applied to the theoretical values before
290
+ passing to the `theor_funs`.
291
+ theor_funs (ns): The R functions to generate density, quantile and deviates
292
+ of the theoretical distribution base on the theoretical values
293
+ if `distribution` of `band`, `line`, or `point` is `custom`.
294
+ - dcustom: The density function, used by band
295
+ - qcustom: The quantile function, used by point
296
+ - rcustom: The deviates function, used by line
297
+ args (ns): The common arguments for `envs.band`, `envs.line` and `envs.point`.
298
+ - distribution: The distribution of the theoretical quantiles
299
+ When `custom` is used, the `envs.theor_col` should be provided and
300
+ `values` will be added to `dparams` automatically.
301
+ - dparams (type=json): The parameters for the distribution
302
+ - <more>: Other shared arguments between `stat_*_band`, `stat_*_line`
303
+ and `stat_*_point`.
304
+ band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`.
305
+ See <https://rdrr.io/cran/qqplotr/man/stat_qq_band.html> and
306
+ <https://rdrr.io/cran/qqplotr/man/stat_pp_band.html>.
307
+ Set to `None` or `band.disabled` to True to disable the band.
308
+ - disabled (flag): Disable the band
309
+ - distribution: The distribution of the theoretical quantiles
310
+ When `custom` is used, the `envs.theor_col` should be provided and
311
+ `values` will be added to `dparams` automatically.
312
+ - dparams (type=json): The parameters for the distribution
313
+ - <more>: Additional arguments for `stat_qq_band()` or `stat_pp_band()`
314
+ line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`.
315
+ See <https://rdrr.io/cran/qqplot/man/stat_qq_line.html> and
316
+ <https://rdrr.io/cran/qqplot/man/stat_pp_line.html>.
317
+ Set to `None` or `line.disabled` to True to disable the line.
318
+ - disabled (flag): Disable the line
319
+ - distribution: The distribution of the theoretical quantiles
320
+ When `custom` is used, the `envs.theor_col` should be provided and
321
+ `values` will be added to `dparams` automatically.
322
+ - dparams (type=json): The parameters for the distribution
323
+ - <more>: Additional arguments for `stat_qq_line()` or `stat_pp_line()`
324
+ point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`.
325
+ See <https://rdrr.io/cran/qqplot/man/stat_qq_point.html> and
326
+ <https://rdrr.io/cran/qqplot/man/stat_pp_point.html>.
327
+ Set to `None` or `point.disabled` to True to disable the point.
328
+ - disabled (flag): Disable the point
329
+ - distribution: The distribution of the theoretical quantiles
330
+ When `custom` is used, the `envs.theor_col` should be provided and
331
+ `values` will be added to `dparams` automatically.
332
+ - dparams (type=json): The parameters for the distribution
333
+ - <more>: Additional arguments for `geom_qq_point()` or `geom_pp_point()`
334
+ ggs (list): Additional ggplot expression to adjust the plot.
335
+ """
336
+ input = "infile:file, theorfile:file"
337
+ output = "outfile:file:{{in.infile | stem}}.{{envs.kind}}.png"
338
+ lang = config.lang.rscript
339
+ envs = {
340
+ "val_col": 1,
341
+ "theor_col": None,
342
+ "theor_trans": None,
343
+ "theor_funs": {
344
+ "dcustom": """
345
+ function(x, values, ...) {
346
+ density(values, from = min(values), to = max(values), n = length(x))$y
347
+ }
348
+ """,
349
+ "qcustom": "function(p, values, ...) {quantile(values, probs = p)}",
350
+ "rcustom": "function(n, values, ...) { sample(values, n, replace = TRUE) }",
351
+ },
352
+ "args": {"distribution": "norm", "dparams": {}},
353
+ "devpars": {"res": 100, "width": 1000, "height": 1000},
354
+ "xlabel": "Theoretical Quantiles",
355
+ "ylabel": "Observed Quantiles",
356
+ "title": "QQ-plot",
357
+ "trans": None,
358
+ "kind": "qq",
359
+ "band": {"disabled": False, "distribution": None, "dparams": None},
360
+ "line": {"disabled": False, "distribution": None, "dparams": None},
361
+ "point": {"disabled": False, "distribution": None, "dparams": None},
362
+ "ggs": None,
363
+ }
364
+ script = "file://../scripts/plot/QQPlot.R"
365
+
366
+
367
+ class Scatter(Proc):
368
+ """Generate scatter plot using ggplot2.
369
+
370
+ [`ggpmisc`](https://cran.r-project.org/web/packages/ggpmisc/index.html) is used
371
+ for the stats and labels.
372
+ See also https://cran.r-project.org/web/packages/ggpmisc/vignettes/model-based-annotations.html
373
+
374
+ Input:
375
+ infile: The input file for data
376
+ It should contain at least two columns for x and y values.
377
+ Header is required.
378
+
379
+ Output:
380
+ outfile: The output figure file
381
+
382
+ Envs:
383
+ x_col: The column for x values
384
+ An integer (1-based) or a string indicating the column name.
385
+ y_col: The column for y values
386
+ An integer (1-based) or a string indicating the column name.
387
+ devpars (ns): The parameters for `png()`
388
+ - res (type=int): The resolution
389
+ - width (type=int): The width
390
+ - height (type=int): The height
391
+ args (ns): Additional arguments for `geom_point()`
392
+ See <https://ggplot2.tidyverse.org/reference/geom_point.html>.
393
+ - <more>: Additional arguments for `geom_point()`
394
+ mapping: Extra mapping for all geoms, including `stats`.
395
+ Should be `aes(color = group)` but all these are valid: `color = group` or
396
+ `(color = group)`.
397
+ ggs (list): Additional ggplot expression to adjust the plot.
398
+ formula: The formula for the model
399
+ stats (type=json): The stats to add to the plot.
400
+ A dict with keys available stats in `ggpmisc` (without `stat_`).
401
+ See <https://cran.r-project.org/web/packages/ggpmisc/vignettes/model-based-annotations.html#statistics>.
402
+ The values should be the arguments for the stats.
403
+ If you want a stat to be added multiple times, add a suffix `#x` to the key.
404
+ For example, `poly_line#1` and `poly_line#2` will add two polynomial lines.
405
+ """ # noqa: E501
406
+ input = "infile:file"
407
+ output = "outfile:file:{{in.infile | stem}}.scatter.png"
408
+ lang = config.lang.rscript
409
+ envs = {
410
+ "x_col": 1,
411
+ "y_col": 2,
412
+ "devpars": {"res": 100, "width": 1000, "height": 800},
413
+ "args": {},
414
+ "mapping": None,
415
+ "ggs": [],
416
+ "formula": "y ~ x",
417
+ "stats": {},
418
+ }
419
+ script = "file://../scripts/plot/Scatter.R"