biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +28 -0
  3. biopipen/core/filters.py +79 -4
  4. biopipen/core/proc.py +12 -3
  5. biopipen/core/testing.py +75 -3
  6. biopipen/ns/bam.py +148 -6
  7. biopipen/ns/bed.py +75 -0
  8. biopipen/ns/cellranger.py +186 -0
  9. biopipen/ns/cellranger_pipeline.py +126 -0
  10. biopipen/ns/cnv.py +19 -3
  11. biopipen/ns/cnvkit.py +1 -1
  12. biopipen/ns/cnvkit_pipeline.py +20 -12
  13. biopipen/ns/delim.py +34 -35
  14. biopipen/ns/gene.py +68 -23
  15. biopipen/ns/gsea.py +63 -37
  16. biopipen/ns/misc.py +39 -14
  17. biopipen/ns/plot.py +304 -1
  18. biopipen/ns/protein.py +183 -0
  19. biopipen/ns/regulatory.py +290 -0
  20. biopipen/ns/rnaseq.py +142 -5
  21. biopipen/ns/scrna.py +2053 -473
  22. biopipen/ns/scrna_metabolic_landscape.py +228 -382
  23. biopipen/ns/snp.py +659 -0
  24. biopipen/ns/stats.py +484 -0
  25. biopipen/ns/tcr.py +683 -98
  26. biopipen/ns/vcf.py +236 -2
  27. biopipen/ns/web.py +97 -6
  28. biopipen/reports/bam/CNVpytor.svelte +4 -9
  29. biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
  30. biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
  31. biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
  32. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  33. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  34. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  35. biopipen/reports/common.svelte +15 -0
  36. biopipen/reports/protein/ProdigySummary.svelte +16 -0
  37. biopipen/reports/scrna/CellsDistribution.svelte +4 -39
  38. biopipen/reports/scrna/DimPlots.svelte +1 -1
  39. biopipen/reports/scrna/MarkersFinder.svelte +6 -126
  40. biopipen/reports/scrna/MetaMarkers.svelte +3 -75
  41. biopipen/reports/scrna/RadarPlots.svelte +4 -20
  42. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
  43. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
  44. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
  45. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  46. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  47. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  48. biopipen/reports/snp/PlinkHet.svelte +18 -0
  49. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  50. biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
  51. biopipen/reports/tcr/ClonalStats.svelte +16 -0
  52. biopipen/reports/tcr/CloneResidency.svelte +3 -93
  53. biopipen/reports/tcr/Immunarch.svelte +4 -155
  54. biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
  55. biopipen/reports/tcr/TESSA.svelte +11 -28
  56. biopipen/reports/utils/misc.liq +22 -7
  57. biopipen/scripts/bam/BamMerge.py +11 -15
  58. biopipen/scripts/bam/BamSampling.py +90 -0
  59. biopipen/scripts/bam/BamSort.py +141 -0
  60. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  61. biopipen/scripts/bam/BamSubsetByBed.py +38 -0
  62. biopipen/scripts/bam/CNAClinic.R +41 -5
  63. biopipen/scripts/bam/CNVpytor.py +153 -54
  64. biopipen/scripts/bam/ControlFREEC.py +13 -14
  65. biopipen/scripts/bam/SamtoolsView.py +33 -0
  66. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  67. biopipen/scripts/bed/BedConsensus.py +5 -5
  68. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  69. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  70. biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
  71. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  72. biopipen/scripts/cellranger/CellRangerCount.py +138 -0
  73. biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
  74. biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
  75. biopipen/scripts/cnv/AneuploidyScore.R +55 -20
  76. biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
  77. biopipen/scripts/cnv/TMADScore.R +25 -9
  78. biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
  79. biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
  80. biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
  81. biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
  82. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  83. biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
  84. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  85. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  86. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
  87. biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
  88. biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
  89. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  90. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  91. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  92. biopipen/scripts/delim/RowsBinder.R +1 -1
  93. biopipen/scripts/delim/SampleInfo.R +116 -118
  94. biopipen/scripts/gene/GeneNameConversion.R +67 -0
  95. biopipen/scripts/gene/GenePromoters.R +61 -0
  96. biopipen/scripts/gsea/Enrichr.R +5 -5
  97. biopipen/scripts/gsea/FGSEA.R +184 -50
  98. biopipen/scripts/gsea/GSEA.R +2 -2
  99. biopipen/scripts/gsea/PreRank.R +5 -5
  100. biopipen/scripts/misc/Config2File.py +2 -2
  101. biopipen/scripts/misc/Plot.R +80 -0
  102. biopipen/scripts/misc/Shell.sh +15 -0
  103. biopipen/scripts/misc/Str2File.py +2 -2
  104. biopipen/scripts/plot/Heatmap.R +3 -3
  105. biopipen/scripts/plot/Manhattan.R +147 -0
  106. biopipen/scripts/plot/QQPlot.R +146 -0
  107. biopipen/scripts/plot/ROC.R +88 -0
  108. biopipen/scripts/plot/Scatter.R +112 -0
  109. biopipen/scripts/plot/VennDiagram.R +5 -9
  110. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  111. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  112. biopipen/scripts/protein/Prodigy.py +119 -0
  113. biopipen/scripts/protein/ProdigySummary.R +140 -0
  114. biopipen/scripts/protein/RMSD.py +178 -0
  115. biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
  116. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
  117. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
  118. biopipen/scripts/regulatory/MotifScan.py +159 -0
  119. biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
  120. biopipen/scripts/regulatory/motifs-common.R +324 -0
  121. biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
  122. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
  123. biopipen/scripts/rnaseq/Simulation.R +21 -0
  124. biopipen/scripts/rnaseq/UnitConversion.R +325 -54
  125. biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
  126. biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
  127. biopipen/scripts/scrna/CellCellCommunication.py +150 -0
  128. biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
  129. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  130. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
  131. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
  132. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
  133. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
  134. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
  135. biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
  136. biopipen/scripts/scrna/CellsDistribution.R +456 -167
  137. biopipen/scripts/scrna/DimPlots.R +1 -1
  138. biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
  139. biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
  140. biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
  141. biopipen/scripts/scrna/ExprImputation.R +7 -0
  142. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  143. biopipen/scripts/scrna/MQuad.py +25 -0
  144. biopipen/scripts/scrna/MarkersFinder.R +679 -400
  145. biopipen/scripts/scrna/MetaMarkers.R +265 -161
  146. biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
  147. biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
  148. biopipen/scripts/scrna/RadarPlots.R +355 -134
  149. biopipen/scripts/scrna/ScFGSEA.R +298 -100
  150. biopipen/scripts/scrna/ScSimulation.R +65 -0
  151. biopipen/scripts/scrna/ScVelo.py +617 -0
  152. biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
  153. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
  154. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
  155. biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
  156. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
  157. biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
  158. biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
  159. biopipen/scripts/scrna/SeuratClustering.R +36 -233
  160. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  161. biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
  162. biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
  163. biopipen/scripts/scrna/SeuratPreparing.R +223 -173
  164. biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
  165. biopipen/scripts/scrna/SeuratTo10X.R +27 -0
  166. biopipen/scripts/scrna/Slingshot.R +65 -0
  167. biopipen/scripts/scrna/Subset10X.R +2 -2
  168. biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
  169. biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
  170. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  171. biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
  172. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
  173. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
  174. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
  175. biopipen/scripts/snp/MatrixEQTL.R +217 -0
  176. biopipen/scripts/snp/Plink2GTMat.py +148 -0
  177. biopipen/scripts/snp/PlinkCallRate.R +199 -0
  178. biopipen/scripts/snp/PlinkFilter.py +100 -0
  179. biopipen/scripts/snp/PlinkFreq.R +291 -0
  180. biopipen/scripts/snp/PlinkFromVcf.py +81 -0
  181. biopipen/scripts/snp/PlinkHWE.R +85 -0
  182. biopipen/scripts/snp/PlinkHet.R +96 -0
  183. biopipen/scripts/snp/PlinkIBD.R +196 -0
  184. biopipen/scripts/snp/PlinkSimulation.py +124 -0
  185. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  186. biopipen/scripts/stats/ChowTest.R +146 -0
  187. biopipen/scripts/stats/DiffCoexpr.R +152 -0
  188. biopipen/scripts/stats/LiquidAssoc.R +135 -0
  189. biopipen/scripts/stats/Mediation.R +108 -0
  190. biopipen/scripts/stats/MetaPvalue.R +130 -0
  191. biopipen/scripts/stats/MetaPvalue1.R +74 -0
  192. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  193. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  194. biopipen/scripts/tcr/Attach2Seurat.R +3 -2
  195. biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
  196. biopipen/scripts/tcr/CDR3Clustering.R +343 -0
  197. biopipen/scripts/tcr/ClonalStats.R +526 -0
  198. biopipen/scripts/tcr/CloneResidency.R +255 -131
  199. biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
  200. biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
  201. biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
  202. biopipen/scripts/tcr/GIANA/query.py +164 -162
  203. biopipen/scripts/tcr/Immunarch-basic.R +31 -9
  204. biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
  205. biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
  206. biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
  207. biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
  208. biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
  209. biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
  210. biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
  211. biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
  212. biopipen/scripts/tcr/Immunarch.R +63 -11
  213. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  214. biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
  215. biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
  216. biopipen/scripts/tcr/SampleDiversity.R +1 -1
  217. biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
  218. biopipen/scripts/tcr/ScRepLoading.R +166 -0
  219. biopipen/scripts/tcr/TCRClusterStats.R +176 -22
  220. biopipen/scripts/tcr/TCRDock.py +110 -0
  221. biopipen/scripts/tcr/TESSA.R +102 -118
  222. biopipen/scripts/tcr/VJUsage.R +5 -5
  223. biopipen/scripts/tcr/immunarch-patched.R +142 -0
  224. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  225. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  226. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  227. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  228. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  229. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  230. biopipen/scripts/vcf/TruvariBench.sh +14 -7
  231. biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
  232. biopipen/scripts/vcf/TruvariConsistency.R +1 -1
  233. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  234. biopipen/scripts/vcf/VcfAnno.py +11 -11
  235. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  236. biopipen/scripts/vcf/VcfFilter.py +5 -5
  237. biopipen/scripts/vcf/VcfFix.py +7 -7
  238. biopipen/scripts/vcf/VcfFix_utils.py +13 -4
  239. biopipen/scripts/vcf/VcfIndex.py +3 -3
  240. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  241. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  242. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  243. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  244. biopipen/scripts/web/Download.py +8 -4
  245. biopipen/scripts/web/DownloadList.py +5 -5
  246. biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
  247. biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
  248. biopipen/scripts/web/gcloud_common.py +49 -0
  249. biopipen/utils/gene.py +108 -60
  250. biopipen/utils/misc.py +146 -20
  251. biopipen/utils/reference.py +64 -20
  252. biopipen/utils/reporter.py +177 -0
  253. biopipen/utils/vcf.py +1 -1
  254. biopipen-0.34.26.dist-info/METADATA +27 -0
  255. biopipen-0.34.26.dist-info/RECORD +292 -0
  256. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  257. {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
  258. biopipen/ns/bcftools.py +0 -111
  259. biopipen/ns/scrna_basic.py +0 -255
  260. biopipen/reports/delim/SampleInfo.svelte +0 -36
  261. biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
  262. biopipen/reports/scrna/ScFGSEA.svelte +0 -35
  263. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
  264. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
  265. biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
  266. biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
  267. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
  268. biopipen/reports/utils/gsea.liq +0 -110
  269. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  270. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  271. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  272. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  273. biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
  274. biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
  275. biopipen/scripts/scrna/ExprImpution.R +0 -7
  276. biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
  277. biopipen/scripts/scrna/Write10X.R +0 -11
  278. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
  279. biopipen/scripts/tcr/TCRClustering.R +0 -280
  280. biopipen/utils/common_docstrs.py +0 -61
  281. biopipen/utils/gene.R +0 -49
  282. biopipen/utils/gsea.R +0 -193
  283. biopipen/utils/io.R +0 -20
  284. biopipen/utils/misc.R +0 -114
  285. biopipen/utils/mutate_helpers.R +0 -433
  286. biopipen/utils/plot.R +0 -173
  287. biopipen/utils/rnaseq.R +0 -48
  288. biopipen/utils/single_cell.R +0 -115
  289. biopipen-0.21.0.dist-info/METADATA +0 -22
  290. biopipen-0.21.0.dist-info/RECORD +0 -218
@@ -0,0 +1,119 @@
1
+ import json
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+ from prodigy_prot.predict_IC import ( # type: ignore
6
+ Prodigy,
7
+ check_path,
8
+ parse_structure,
9
+ )
10
+
11
+ infile: str = {{in.infile | quote}} # pyright: ignore # noqa
12
+ outfile: str = {{out.outfile | quote}} # pyright: ignore
13
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
14
+ distance_cutoff = {{envs.distance_cutoff | float}} # pyright: ignore
15
+ acc_threshold = {{envs.acc_threshold | float}} # pyright: ignore
16
+ temperature = {{envs.temperature | float}} # pyright: ignore
17
+ contact_list = {{envs.contact_list | repr}} # pyright: ignore
18
+ pymol_selection = {{envs.pymol_selection | repr}} # pyright: ignore
19
+ selection = {{envs.selection | repr}} # pyright: ignore
20
+ outtype = {{envs.outtype | repr}} # pyright: ignore
21
+
22
+ raw_outfile = Path(outdir) / "_prodigy_raw.txt"
23
+ json_outfile = Path(outdir) / "_prodigy.json"
24
+ tsv_outfile = Path(outdir) / "_prodigy.tsv"
25
+
26
+ # log to the raw_outfile
27
+ logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
28
+ logger = logging.getLogger("Prodigy")
29
+
30
+ if isinstance(selection, str):
31
+ selection = [selection]
32
+
33
+ struct_path = check_path(infile)
34
+
35
+ # parse structure
36
+ structure, n_chains, n_res = parse_structure(struct_path)
37
+ logger.info(
38
+ "[+] Parsed structure file {0} ({1} chains, {2} residues)".format(
39
+ structure.id, n_chains, n_res
40
+ )
41
+ )
42
+ prodigy = Prodigy(structure, selection, temperature)
43
+ prodigy.predict(distance_cutoff=distance_cutoff, acc_threshold=acc_threshold)
44
+ prodigy.print_prediction(outfile=raw_outfile, quiet=False)
45
+
46
+ # Print out interaction network
47
+ if contact_list:
48
+ prodigy.print_contacts(f"{outdir}/prodigy.ic")
49
+
50
+ # Print out interaction network
51
+ if pymol_selection:
52
+ prodigy.print_pymol_script(f"{outdir}/prodigy.pml")
53
+
54
+ # [+] Reading structure file: <path/to/structure.cif>
55
+ # [+] Parsed structure file <structure> (4 chains, 411 residues)
56
+ # [+] No. of intermolecular contacts: 191
57
+ # [+] No. of charged-charged contacts: 17
58
+ # [+] No. of charged-polar contacts: 18
59
+ # [+] No. of charged-apolar contacts: 60
60
+ # [+] No. of polar-polar contacts: 5
61
+ # [+] No. of apolar-polar contacts: 41
62
+ # [+] No. of apolar-apolar contacts: 50
63
+ # [+] Percentage of apolar NIS residues: 33.90
64
+ # [+] Percentage of charged NIS residues: 30.48
65
+ # [++] Predicted binding affinity (kcal.mol-1): -21.3
66
+ # [++] Predicted dissociation constant (M) at 25.0˚C: 2.3e-16
67
+
68
+ output = {}
69
+ with open(raw_outfile, "r") as f:
70
+ for line in f:
71
+ if line.startswith("[+"):
72
+ line = line.lstrip("[").lstrip("+").lstrip("]").lstrip()
73
+ if line.startswith("Reading structure file"):
74
+ continue
75
+ if line.startswith("Parsed structure file"):
76
+ continue
77
+
78
+ key, value = line.split(":", 1)
79
+ key = key.strip()
80
+ value = value.strip()
81
+ if key == "No. of intermolecular contacts":
82
+ output["nIC"] = int(value)
83
+ elif key == "No. of charged-charged contacts":
84
+ output["nCCC"] = int(value)
85
+ elif key == "No. of charged-polar contacts":
86
+ output["nCPC"] = int(value)
87
+ elif key == "No. of charged-apolar contacts":
88
+ output["nCAPC"] = int(value)
89
+ elif key == "No. of polar-polar contacts":
90
+ output["nPPC"] = int(value)
91
+ elif key == "No. of apolar-polar contacts":
92
+ output["nAPPC"] = int(value)
93
+ elif key == "No. of apolar-apolar contacts":
94
+ output["nAPAPC"] = int(value)
95
+ elif key.startswith("Percentage of apolar NIS residues"):
96
+ output["pANISR"] = float(value)
97
+ elif key.startswith("Percentage of charged NIS residues"):
98
+ output["pCNISR"] = float(value)
99
+ elif key.startswith("Predicted binding affinity"):
100
+ output["BindingAffinity"] = float(value)
101
+ elif key.startswith("Predicted dissociation constant"):
102
+ output["DissociationConstant"] = float(value)
103
+
104
+ with open(json_outfile, "w") as f:
105
+ json.dump(output, f, indent=2)
106
+
107
+ with open(tsv_outfile, "w") as f:
108
+ f.write("\t".join(output.keys()) + "\n")
109
+ f.write("\t".join(map(str, output.values())) + "\n")
110
+
111
+ if outtype == "json":
112
+ json_outfile.rename(outfile)
113
+ json_outfile.symlink_to(outfile)
114
+ elif outtype == "tsv":
115
+ tsv_outfile.rename(outfile)
116
+ tsv_outfile.symlink_to(outfile)
117
+ else:
118
+ raw_outfile.rename(outfile)
119
+ raw_outfile.symlink_to(outfile)
@@ -0,0 +1,140 @@
1
+ library(rlang)
2
+ library(dplyr)
3
+ library(biopipen.utils)
4
+ library(plotthis)
5
+
6
+ infiles <- {{in.infiles | r}}
7
+ outdir <- {{out.outdir | r}}
8
+ joboutdir <- {{job.outdir | r}}
9
+ group <- {{envs.group | r}}
10
+
11
+ if (is.character(group)) {
12
+ group <- read.csv(group, header = FALSE, row.names = NULL)
13
+ colnames(group) <- c("Sample", "Group")
14
+ } else if (is.list(group)) {
15
+ group <- do_call(
16
+ rbind,
17
+ lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
18
+ )
19
+ } else if (!is.null(group)) {
20
+ stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
21
+ }
22
+
23
+ log <- get_logger()
24
+ reporter <- get_reporter()
25
+
26
+ log$info("Reading and merging metrics for each sample ...")
27
+ metrics <- NULL
28
+
29
+ for (infile in infiles) {
30
+ sample <- sub("_prodigy$", "", basename(dirname(infile)))
31
+ log$debug("- Reading metrics from {sample}")
32
+ metric <- read.table(
33
+ infile,
34
+ header = TRUE,
35
+ sep = "\t",
36
+ stringsAsFactors = FALSE,
37
+ check.names = FALSE,
38
+ row.names = NULL)
39
+ metric$Sample <- sample
40
+ metric <- metric %>% select(Sample, everything())
41
+ if (is.null(metrics)) {
42
+ metrics <- metric
43
+ } else {
44
+ metrics <- rbind(metrics, metric)
45
+ }
46
+ }
47
+
48
+ # Save metrics
49
+ write.table(
50
+ metrics,
51
+ file.path(outdir, "metrics.txt"),
52
+ sep = "\t",
53
+ quote = FALSE,
54
+ row.names = FALSE
55
+ )
56
+
57
+ reporter$add(
58
+ list(kind = "descr", content = "Metrics for all samples"),
59
+ list(kind = "table", src = file.path(outdir, "metrics.txt")),
60
+ h1 = "Metrics of all samples"
61
+ )
62
+
63
+ METRIC_DESCR = list(
64
+ nIC = "No. of intermolecular contacts",
65
+ nCCC = "No. of charged-charged contacts",
66
+ nCPC = "No. of charged-polar contacts",
67
+ nCAPC = "No. of charged-apolar contacts",
68
+ nPPC = "No. of polar-polar contacts",
69
+ nAPPC = "No. of apolar-polar contacts",
70
+ nAPAPC = "No. of apolar-apolar contacts",
71
+ pANISR = "Percentage of apolar NIS residues",
72
+ pCNISR = "Percentage of charged NIS residues",
73
+ BindingAffinity = "Predicted binding affinity (kcal.mol^-1)",
74
+ DissociationConstant = "Predicted dissociation constant (M)"
75
+ )
76
+
77
+ if (!is.null(group)) {
78
+ log$info("Merging group information ...")
79
+ metrics <- group %>%
80
+ left_join(metrics, by = "Sample") %>%
81
+ mutate(Group = factor(Group, levels = unique(Group)))
82
+ }
83
+
84
+ log$info("Plotting Prodigy metrics ...")
85
+ for (metric in names(METRIC_DESCR)) {
86
+ log$info("- {metric}: {METRIC_DESCR[[metric]]}")
87
+
88
+ reporter$add(
89
+ list(
90
+ kind = "descr",
91
+ content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
92
+ ),
93
+ h1 = metric
94
+ )
95
+
96
+ p <- plotthis::BarPlot(
97
+ x = "Sample",
98
+ y = metric,
99
+ x_text_angle = 90,
100
+ fill = "Group",
101
+ data = metrics
102
+ )
103
+
104
+ figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
105
+ height <- attr(p, "height") %||% 6
106
+ width <- attr(p, "width") %||% (nrow(metrics) * .3 + 2)
107
+ png(figfile, height = height * 100, res = 100, width = width * 100)
108
+ print(p)
109
+ dev.off()
110
+
111
+ reporter$add(
112
+ list(src = figfile, name = "By Sample"),
113
+ ui = "table_of_images",
114
+ h1 = metric
115
+ )
116
+
117
+ if (is.null(group)) { next }
118
+ # group: Sample, Group
119
+ p <- plotthis::BarPlot(
120
+ data = metrics,
121
+ x = "Group",
122
+ y = metric,
123
+ x_text_angle = 90
124
+ )
125
+
126
+ figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
127
+ height <- attr(p, "height") %||% 6
128
+ width <- attr(p, "width") %||% (length(unique(metrics$Group)) * 0.3 + 2)
129
+ png(figfile, height = height * 100, res = 100, width = width * 100)
130
+ print(p)
131
+ dev.off()
132
+
133
+ reporter$add(
134
+ list(src = figfile, name = "By Group"),
135
+ ui = "table_of_images",
136
+ h1 = metric
137
+ )
138
+ }
139
+
140
+ reporter$save(joboutdir)
@@ -0,0 +1,178 @@
1
+ from pathlib import Path
2
+ from shutil import which
3
+ from diot import Diot # noqa: F401
4
+ from biopipen.utils.misc import run_command, dict_to_cli_args
5
+
6
+ infile1: str = {{in.infile1 | quote}} # pyright: ignore # noqa
7
+ infile2: str = {{in.infile2 | quote}} # pyright: ignore # noqa
8
+ outfile: str = {{out.outfile | quote}} # pyright: ignore # noqa
9
+ outdir: str = {{job.outdir | quote}} # pyright: ignore # noqa
10
+ envs: dict = {{envs | repr}} # pyright: ignore # noqa
11
+ conv_tool = envs.pop("conv_tool", "maxit")
12
+ maxit = envs.pop("maxit", "maxit")
13
+ beem = envs.pop("beem", "BeEM")
14
+ ca_only = envs.pop("ca_only", False)
15
+ # aa20_only = envs.pop("aa20_only", False)
16
+ duel = envs.pop("duel", "keep")
17
+ calculate_rmsd = envs.pop("calculate_rmsd", "calculate_rmsd")
18
+
19
+
20
+ def cif_to_pdb(cif_file, pdb_file:Path):
21
+ if conv_tool == "maxit":
22
+ maxit_bin = Path(which(maxit)).resolve()
23
+ rcsbroot = Path(maxit_bin).parent.parent
24
+ args = {"input": cif_file, "output": pdb_file, "o": 2, "log": pdb_file.with_suffix(".log")}
25
+ run_command([maxit, *dict_to_cli_args(args, prefix="-")], fg=True, env={"RCSBROOT": rcsbroot})
26
+ else:
27
+ args = {"_": cif_file, "p": pdb_file.parent.joinpath(pdb_file.stem)}
28
+ args = dict_to_cli_args(args, prefix="-", sep="=")
29
+ run_command([beem, *args], fg=True)
30
+
31
+
32
+ def pdb_to_ca_pdb(pdb_file: Path, ca_pdb_file: Path):
33
+ """Extract C-alpha atoms from a PDB file and still keep the original order and metadata."""
34
+ with open(pdb_file, "r") as f, open(ca_pdb_file, "w") as fw:
35
+ for line in f:
36
+ if line.startswith("ATOM") and line[12:16].strip() == "CA":
37
+ fw.write(line)
38
+
39
+
40
+ # def pdb_to_aa20_pdb(pdb_file: Path, aa20_pdb_file: Path):
41
+ # """Extract the 20 amino acids from a PDB file and still keep the original order and metadata."""
42
+ # with open(pdb_file, "r") as f, open(aa20_pdb_file, "w") as fw:
43
+ # for line in f:
44
+ # if line.startswith("ATOM") and line[17:20].strip() in (
45
+ # "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY",
46
+ # "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER",
47
+ # "THR", "TRP", "TYR", "VAL",
48
+ # ):
49
+ # fw.write(line)
50
+
51
+
52
+ def deduel_pdb(pdb_file: Path, deduel_pdb_file: Path):
53
+ """Remove/Handle the duel atoms in a PDB file."""
54
+ def is_duel(atom1, atom2):
55
+ # 1 2
56
+ # 01234567890123456789012345
57
+ # ATOM 913 CA ATYR A 113
58
+ # ATOM 914 CA BTYR A 113
59
+ # The key should be "ATOM|CA |TYR| A| 113"
60
+ return (
61
+ atom1[:4] == atom2[:4] and
62
+ atom1[12:16] == atom2[12:16] and
63
+ atom1[17:20] == atom2[17:20] and
64
+ atom1[21] == atom2[21] and
65
+ atom1[22:26] == atom2[22:26] and
66
+ atom1[16] != atom2[16]
67
+ )
68
+
69
+ def clean_atom(atom):
70
+ return atom[:16] + " " + atom[17:]
71
+
72
+ last_atom = ""
73
+ with open(pdb_file, "r") as f, open(deduel_pdb_file, "w") as fw:
74
+ for line in f:
75
+ if not line.startswith("ATOM"):
76
+ fw.write(line)
77
+ continue
78
+ if not is_duel(last_atom, line):
79
+ if last_atom:
80
+ fw.write(clean_atom(last_atom))
81
+ last_atom = line
82
+ # is duel
83
+ elif duel == "keep":
84
+ fw.write(clean_atom(last_atom))
85
+ fw.write(clean_atom(line))
86
+ last_atom = ""
87
+ elif duel == "keep_first":
88
+ fw.write(clean_atom(last_atom))
89
+ last_atom = ""
90
+ elif duel == "keep_last":
91
+ fw.write(clean_atom(line))
92
+ last_atom = ""
93
+ elif duel == "average":
94
+ # Average the coordinates
95
+ x1 = float(last_atom[30:38])
96
+ y1 = float(last_atom[38:46])
97
+ z1 = float(last_atom[46:54])
98
+ x2 = float(line[30:38])
99
+ y2 = float(line[38:46])
100
+ z2 = float(line[46:54])
101
+ x = (x1 + x2) / 2.0
102
+ y = (y1 + y2) / 2.0
103
+ z = (z1 + z2) / 2.0
104
+ fw.write(clean_atom(last_atom[:30] + f"{x:8.3f}{y:8.3f}{z:8.3f}" + last_atom[54:]))
105
+ last_atom = ""
106
+
107
+ if last_atom:
108
+ fw.write(last_atom)
109
+
110
+
111
+ def index_of(lst, item) -> int:
112
+ try:
113
+ return lst.index(item)
114
+ except ValueError:
115
+ return -1
116
+
117
+
118
+ if infile1.endswith(".cif"):
119
+ pdb1 = Path(outdir) / f"{Path(infile1).stem}.pdb"
120
+ cif_to_pdb(infile1, pdb1)
121
+ infile1 = pdb1 # type: ignore
122
+
123
+ if infile2.endswith(".cif"):
124
+ pdb2 = Path(outdir) / f"{Path(infile2).stem}.pdb"
125
+ cif_to_pdb(infile2, pdb2)
126
+ infile2 = pdb2 # type: ignore
127
+
128
+ if ca_only:
129
+ ca_pdb1 = Path(outdir) / f"{Path(infile1).stem}.ca.pdb"
130
+ pdb_to_ca_pdb(infile1, ca_pdb1) # type: ignore
131
+ infile1 = ca_pdb1 # type: ignore
132
+
133
+ ca_pdb2 = Path(outdir) / f"{Path(infile2).stem}.ca.pdb"
134
+ pdb_to_ca_pdb(infile2, ca_pdb2) # type: ignore
135
+ infile2 = ca_pdb2 # type: ignore
136
+
137
+ # if aa20_only:
138
+ # aa20_pdb1 = Path(outdir) / f"{Path(infile1).stem}.aa20.pdb"
139
+ # pdb_to_aa20_pdb(infile1, aa20_pdb1) # type: ignore
140
+ # infile1 = aa20_pdb1 # type: ignore
141
+
142
+ # aa20_pdb2 = Path(outdir) / f"{Path(infile2).stem}.aa20.pdb"
143
+ # pdb_to_aa20_pdb(infile2, aa20_pdb2) # type: ignore
144
+ # infile2 = aa20_pdb2 # type: ignore
145
+
146
+ if duel != "keep":
147
+ deduel_pdb1 = Path(outdir) / f"{Path(infile1).stem}.deduel.pdb"
148
+ deduel_pdb(infile1, deduel_pdb1) # type: ignore
149
+ infile1 = deduel_pdb1 # type: ignore
150
+
151
+ deduel_pdb2 = Path(outdir) / f"{Path(infile2).stem}.deduel.pdb"
152
+ deduel_pdb(infile2, deduel_pdb2) # type: ignore
153
+ infile2 = deduel_pdb2 # type: ignore
154
+
155
+ envs["_"] = [infile1, infile2]
156
+ envs = dict_to_cli_args(envs, dashify=True)
157
+
158
+ idx_ur = index_of(envs, "--ur")
159
+ if idx_ur != -1:
160
+ envs[idx_ur] = "-ur"
161
+
162
+ idx_urks = index_of(envs, "--urks")
163
+ if idx_urks != -1:
164
+ envs[idx_urks] = "-urks"
165
+
166
+ idx_nh = index_of(envs, "--nh")
167
+ if idx_nh != -1:
168
+ envs[idx_nh] = "-nh"
169
+
170
+ out: str = run_command([calculate_rmsd, *envs], stdout="return") # type: ignore
171
+ out = out.strip()
172
+
173
+ try:
174
+ float(out)
175
+ except (ValueError, TypeError):
176
+ raise ValueError(out)
177
+
178
+ Path(outfile).write_text(out)
@@ -0,0 +1,102 @@
1
+ # Script for regulatory.MotifAffinityTest
2
+ {% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
3
+
4
+ library(BiocParallel)
5
+ library(BSgenome)
6
+ library(biopipen.utils)
7
+
8
+ motiffile <- {{in.motiffile | r}}
9
+ varfile <- {{in.varfile | r}}
10
+ outdir <- {{out.outdir | r}}
11
+ ncores <- {{envs.ncores | r}}
12
+ tool <- {{envs.tool | r}}
13
+ bcftools <- {{envs.bcftools | r}}
14
+ genome <- {{envs.genome | r}}
15
+ motif_col <- {{envs.motif_col | r}}
16
+ regulator_col <- {{envs.regulator_col | r}}
17
+ var_col <- {{envs.var_col | r}}
18
+ notfound <- {{envs.notfound | r}}
19
+ motifdb <- {{envs.motifdb | r}}
20
+ regmotifs <- {{envs.regmotifs | r}}
21
+ devpars <- {{envs.devpars | r}}
22
+ plot_nvars <- {{envs.plot_nvars | r}}
23
+ plots <- {{envs.plots | r}}
24
+ cutoff <- {{envs.cutoff | r}}
25
+ set.seed(8525)
26
+
27
+ if (is.null(motifdb) || !file.exists(motifdb)) {
28
+ stop("Motif database (envs.motifdb) is required and must exist")
29
+ }
30
+
31
+ if (is.null(genome)) {
32
+ stop("Reference genome (envs.ref) is required and must exist")
33
+ }
34
+
35
+ if (is.null(motiffile) || !file.exists(motiffile)) {
36
+ stop("Motif file (in.motiffile) is required and must exist")
37
+ }
38
+
39
+ if (is.null(varfile) || !file.exists(varfile)) {
40
+ stop("Variant file (in.varfile) is required and must exist")
41
+ }
42
+
43
+ if (is.null(motif_col) && is.null(regulator_col)) {
44
+ stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
45
+ }
46
+
47
+ log <- get_logger()
48
+
49
+ log$info("Reading input regulator/motif file ...")
50
+ in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
51
+
52
+
53
+ log$info("Ensuring motifs and regulators in the input data ...")
54
+ in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
55
+ genome_pkg <- get_genome_pkg(genome)
56
+
57
+ motif_var_pairs <- NULL
58
+ if (!is.null(var_col)) {
59
+ log$info("Obtaining motif-variant pairs to test ...")
60
+ if (!var_col %in% colnames(in_motifs)) {
61
+ stop("Variant column (envs.var_col) not found in the input motif file")
62
+ }
63
+
64
+ motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
65
+ }
66
+
67
+ log$info("Reading variant file ...")
68
+ if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
69
+ log$info("Converting VCF file to BED file ...")
70
+ varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
71
+ cmd <- c(
72
+ bcftools, "query",
73
+ "-f", "%CHROM\\t%POS0\\t%END\\t%ID\\t0\\t+\\t%REF\\t%ALT{0}\\n",
74
+ "-i", 'FILTER="PASS" || FILTER="." || FILTER=""',
75
+ "-o", varfile_bed,
76
+ varfile
77
+ )
78
+ run_command(cmd, fg = TRUE)
79
+
80
+ varfile <- varfile_bed
81
+ }
82
+
83
+ # `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
84
+ snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
85
+ colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
86
+
87
+ log$info("Reading motif database ...")
88
+ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
89
+
90
+ tool <- tolower(tool)
91
+ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
92
+
93
+ {% if envs.tool == "motifbreakr" %}
94
+ motifbreakr_args <- {{envs.motifbreakr_args | r}}
95
+ {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
96
+ {% else %}
97
+ atsnp_args <- list_update(
98
+ list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
99
+ {{envs.atsnp_args | r}}
100
+ )
101
+ {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
102
+ {% endif %}
@@ -0,0 +1,127 @@
1
+ library(atSNP)
2
+ library(rtracklayer)
3
+
4
+ log$info("Converting snpinfo to atSNP object ...")
5
+
6
+ # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
7
+ if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
8
+ stop("Only SNVs are supported by atSNP. Consider using motifbreakR instead if you have indels.")
9
+ }
10
+ atsnp_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$|\\.bed$", ".atsnp.txt", basename(varfile)))
11
+ snpinfo$name <- ifelse(
12
+ snpinfo$name == "." | is.na(snpinfo$name) | nchar(snpinfo$name) == 0,
13
+ sprintf("%s:%s", snpinfo$chrom, snpinfo$end),
14
+ snpinfo$name
15
+ )
16
+ snpinfo$a1 <- snpinfo$ref
17
+ snpinfo$a2 <- snpinfo$alt
18
+ snpinfo$chr <- snpinfo$chrom
19
+ snpinfo$snp <- snpinfo$end
20
+ snpinfo$snpid <- snpinfo$name
21
+ write.table(
22
+ snpinfo[, c("snpid", "a1", "a2", "chr", "snp")],
23
+ file = atsnp_bed,
24
+ sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
25
+ )
26
+
27
+ motif_lib <- motifdb_to_motiflib(mdb)
28
+ k <- max(sapply(motif_lib, nrow))
29
+ snps <- LoadSNPData(
30
+ atsnp_bed,
31
+ genome.lib = genome_pkg,
32
+ mutation = TRUE, # force using given ref and alt
33
+ default.par = nrow(snpinfo) < 1000,
34
+ half.window.size = k
35
+ )
36
+
37
+ log$info("Running atSNP ...")
38
+ atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
39
+
40
+ log$info("Calculating p values ...")
41
+ atsnp_result <- ComputePValues(
42
+ motif.lib = motif_lib,
43
+ snp.info = snps,
44
+ motif.scores = atsnp_scores$motif.scores,
45
+ ncores = ncores,
46
+ testing.mc = TRUE
47
+ )
48
+
49
+ if (!is.null(motif_var_pairs)) {
50
+ log$info("Filtering motif-variant pairs ...")
51
+ atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
52
+ atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
53
+ atsnp_result$motifs_vars <- NULL
54
+ }
55
+
56
+ padj_col <- paste0(atsnp_args$p, "_adj")
57
+ atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
58
+ cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
59
+ atsnp_result <- atsnp_result[atsnp_result[[cutoff_col]] < cutoff, , drop = FALSE]
60
+ # order by p value
61
+ atsnp_result <- atsnp_result[order(atsnp_result[[cutoff_col]]), , drop = FALSE]
62
+ snpinfo <- snpinfo[match(atsnp_result$snpid, snpinfo$snpid), , drop = FALSE]
63
+ atsnp_result$chr <- snpinfo$chr
64
+ atsnp_result$start <- snpinfo$start
65
+ atsnp_result$end <- snpinfo$end
66
+ atsnp_result$SNP_id <- snpinfo$snpid
67
+ atsnp_result$snpid <- NULL
68
+ atsnp_result$REF <- snpinfo$ref
69
+ atsnp_result$ALT <- snpinfo$alt
70
+ atsnp_result$providerName <- atsnp_result$motif
71
+ atsnp_result$providerId <- atsnp_result$providerName <- atsnp_result$motif
72
+ atsnp_result$motif <- NULL
73
+ atsnp_result$strand <- snpinfo$strand
74
+ atsnp_result$score <- snpinfo$score
75
+ atsnp_result$snpbase <- NULL
76
+ atsnp_result$altPos <- 1
77
+ atsnp_result$varType <- "SNV"
78
+ atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
79
+ paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
80
+ })
81
+ if (!is.null(regulator_col)) {
82
+ atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
83
+ match(atsnp_result$providerId, in_motifs[[motif_col]]),
84
+ regulator_col,
85
+ drop = TRUE
86
+ ]
87
+ }
88
+
89
+ write.table(
90
+ atsnp_result,
91
+ file = file.path(outdir, "atsnp.txt"),
92
+ sep = "\t", quote = FALSE, row.names = FALSE
93
+ )
94
+
95
+ log$info("Plotting variants ...")
96
+ # Convert result to GRanges object
97
+ atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
98
+ atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
99
+ atsnp_result$effect <- "strong"
100
+ atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
101
+ atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
102
+ genome(atsnp_result) <- genome
103
+ attributes(atsnp_result)$genome.package <- genome_pkg
104
+ attributes(atsnp_result)$motifs <- mdb
105
+
106
+ if (is.null(plots) || length(plots) == 0) {
107
+ atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
108
+ variants <- unique(atsnp_result$SNP_id)
109
+ } else {
110
+ variants <- names(plots)
111
+ }
112
+ for (variant in variants) {
113
+ log$info("- Variant: {variant}")
114
+ if (is.null(plots[[variant]])) {
115
+ plots[[variant]] <- list(devpars = devpars, which = "TRUE")
116
+ }
117
+ if (is.null(plots[[variant]]$which)) {
118
+ plots[[variant]]$which <- "TRUE"
119
+ }
120
+ if (is.null(plots[[variant]]$devpars)) {
121
+ plots[[variant]]$devpars <- devpars
122
+ }
123
+ res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
124
+ res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
125
+
126
+ plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
127
+ }