biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +28 -0
- biopipen/core/filters.py +79 -4
- biopipen/core/proc.py +12 -3
- biopipen/core/testing.py +75 -3
- biopipen/ns/bam.py +148 -6
- biopipen/ns/bed.py +75 -0
- biopipen/ns/cellranger.py +186 -0
- biopipen/ns/cellranger_pipeline.py +126 -0
- biopipen/ns/cnv.py +19 -3
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/cnvkit_pipeline.py +20 -12
- biopipen/ns/delim.py +34 -35
- biopipen/ns/gene.py +68 -23
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +39 -14
- biopipen/ns/plot.py +304 -1
- biopipen/ns/protein.py +183 -0
- biopipen/ns/regulatory.py +290 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +2053 -473
- biopipen/ns/scrna_metabolic_landscape.py +228 -382
- biopipen/ns/snp.py +659 -0
- biopipen/ns/stats.py +484 -0
- biopipen/ns/tcr.py +683 -98
- biopipen/ns/vcf.py +236 -2
- biopipen/ns/web.py +97 -6
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
- biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/common.svelte +15 -0
- biopipen/reports/protein/ProdigySummary.svelte +16 -0
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
- biopipen/reports/tcr/ClonalStats.svelte +16 -0
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/reports/utils/misc.liq +22 -7
- biopipen/scripts/bam/BamMerge.py +11 -15
- biopipen/scripts/bam/BamSampling.py +90 -0
- biopipen/scripts/bam/BamSort.py +141 -0
- biopipen/scripts/bam/BamSplitChroms.py +10 -10
- biopipen/scripts/bam/BamSubsetByBed.py +38 -0
- biopipen/scripts/bam/CNAClinic.R +41 -5
- biopipen/scripts/bam/CNVpytor.py +153 -54
- biopipen/scripts/bam/ControlFREEC.py +13 -14
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/bed/Bed2Vcf.py +5 -5
- biopipen/scripts/bed/BedConsensus.py +5 -5
- biopipen/scripts/bed/BedLiftOver.sh +6 -4
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
- biopipen/scripts/bed/BedtoolsMerge.py +4 -4
- biopipen/scripts/cellranger/CellRangerCount.py +138 -0
- biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
- biopipen/scripts/cnv/AneuploidyScore.R +55 -20
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
- biopipen/scripts/cnv/TMADScore.R +25 -9
- biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
- biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
- biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
- biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
- biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
- biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
- biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
- biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
- biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
- biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
- biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
- biopipen/scripts/cnvkit/guess_baits.py +166 -93
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +116 -118
- biopipen/scripts/gene/GeneNameConversion.R +67 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/gsea/Enrichr.R +5 -5
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/GSEA.R +2 -2
- biopipen/scripts/gsea/PreRank.R +5 -5
- biopipen/scripts/misc/Config2File.py +2 -2
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/misc/Str2File.py +2 -2
- biopipen/scripts/plot/Heatmap.R +3 -3
- biopipen/scripts/plot/Manhattan.R +147 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/plot/ROC.R +88 -0
- biopipen/scripts/plot/Scatter.R +112 -0
- biopipen/scripts/plot/VennDiagram.R +5 -9
- biopipen/scripts/protein/MMCIF2PDB.py +33 -0
- biopipen/scripts/protein/PDB2Fasta.py +60 -0
- biopipen/scripts/protein/Prodigy.py +119 -0
- biopipen/scripts/protein/ProdigySummary.R +140 -0
- biopipen/scripts/protein/RMSD.py +178 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
- biopipen/scripts/regulatory/motifs-common.R +324 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
- biopipen/scripts/rnaseq/Simulation.R +21 -0
- biopipen/scripts/rnaseq/UnitConversion.R +325 -54
- biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +150 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
- biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
- biopipen/scripts/scrna/CellsDistribution.R +456 -167
- biopipen/scripts/scrna/DimPlots.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
- biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
- biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
- biopipen/scripts/scrna/ExprImputation.R +7 -0
- biopipen/scripts/scrna/LoomTo10X.R +51 -0
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +679 -400
- biopipen/scripts/scrna/MetaMarkers.R +265 -161
- biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
- biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
- biopipen/scripts/scrna/RadarPlots.R +355 -134
- biopipen/scripts/scrna/ScFGSEA.R +298 -100
- biopipen/scripts/scrna/ScSimulation.R +65 -0
- biopipen/scripts/scrna/ScVelo.py +617 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
- biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
- biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
- biopipen/scripts/scrna/SeuratClustering.R +36 -233
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
- biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
- biopipen/scripts/scrna/SeuratPreparing.R +223 -173
- biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
- biopipen/scripts/scrna/SeuratTo10X.R +27 -0
- biopipen/scripts/scrna/Slingshot.R +65 -0
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
- biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
- biopipen/scripts/snp/MatrixEQTL.R +217 -0
- biopipen/scripts/snp/Plink2GTMat.py +148 -0
- biopipen/scripts/snp/PlinkCallRate.R +199 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +291 -0
- biopipen/scripts/snp/PlinkFromVcf.py +81 -0
- biopipen/scripts/snp/PlinkHWE.R +85 -0
- biopipen/scripts/snp/PlinkHet.R +96 -0
- biopipen/scripts/snp/PlinkIBD.R +196 -0
- biopipen/scripts/snp/PlinkSimulation.py +124 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/ChowTest.R +146 -0
- biopipen/scripts/stats/DiffCoexpr.R +152 -0
- biopipen/scripts/stats/LiquidAssoc.R +135 -0
- biopipen/scripts/stats/Mediation.R +108 -0
- biopipen/scripts/stats/MetaPvalue.R +130 -0
- biopipen/scripts/stats/MetaPvalue1.R +74 -0
- biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
- biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
- biopipen/scripts/tcr/Attach2Seurat.R +3 -2
- biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
- biopipen/scripts/tcr/CDR3Clustering.R +343 -0
- biopipen/scripts/tcr/ClonalStats.R +526 -0
- biopipen/scripts/tcr/CloneResidency.R +255 -131
- biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/Immunarch-basic.R +31 -9
- biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
- biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
- biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
- biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
- biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
- biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
- biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
- biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
- biopipen/scripts/tcr/Immunarch.R +63 -11
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
- biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
- biopipen/scripts/tcr/SampleDiversity.R +1 -1
- biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
- biopipen/scripts/tcr/ScRepLoading.R +166 -0
- biopipen/scripts/tcr/TCRClusterStats.R +176 -22
- biopipen/scripts/tcr/TCRDock.py +110 -0
- biopipen/scripts/tcr/TESSA.R +102 -118
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/TruvariBench.sh +14 -7
- biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
- biopipen/scripts/vcf/TruvariConsistency.R +1 -1
- biopipen/scripts/vcf/Vcf2Bed.py +2 -2
- biopipen/scripts/vcf/VcfAnno.py +11 -11
- biopipen/scripts/vcf/VcfDownSample.sh +22 -10
- biopipen/scripts/vcf/VcfFilter.py +5 -5
- biopipen/scripts/vcf/VcfFix.py +7 -7
- biopipen/scripts/vcf/VcfFix_utils.py +13 -4
- biopipen/scripts/vcf/VcfIndex.py +3 -3
- biopipen/scripts/vcf/VcfIntersect.py +3 -3
- biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
- biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/scripts/web/Download.py +8 -4
- biopipen/scripts/web/DownloadList.py +5 -5
- biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
- biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
- biopipen/scripts/web/gcloud_common.py +49 -0
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.py +146 -20
- biopipen/utils/reference.py +64 -20
- biopipen/utils/reporter.py +177 -0
- biopipen/utils/vcf.py +1 -1
- biopipen-0.34.26.dist-info/METADATA +27 -0
- biopipen-0.34.26.dist-info/RECORD +292 -0
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- {biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
- biopipen/ns/bcftools.py +0 -111
- biopipen/ns/scrna_basic.py +0 -255
- biopipen/reports/delim/SampleInfo.svelte +0 -36
- biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
- biopipen/reports/scrna/ScFGSEA.svelte +0 -35
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
- biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
- biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
- biopipen/scripts/scrna/ExprImpution.R +0 -7
- biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
- biopipen/scripts/scrna/Write10X.R +0 -11
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
- biopipen/scripts/tcr/TCRClustering.R +0 -280
- biopipen/utils/common_docstrs.py +0 -61
- biopipen/utils/gene.R +0 -49
- biopipen/utils/gsea.R +0 -193
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -114
- biopipen/utils/mutate_helpers.R +0 -433
- biopipen/utils/plot.R +0 -173
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -115
- biopipen-0.21.0.dist-info/METADATA +0 -22
- biopipen-0.21.0.dist-info/RECORD +0 -218
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from prodigy_prot.predict_IC import ( # type: ignore
|
|
6
|
+
Prodigy,
|
|
7
|
+
check_path,
|
|
8
|
+
parse_structure,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
infile: str = {{in.infile | quote}} # pyright: ignore # noqa
|
|
12
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore
|
|
13
|
+
outdir: str = {{out.outdir | quote}} # pyright: ignore
|
|
14
|
+
distance_cutoff = {{envs.distance_cutoff | float}} # pyright: ignore
|
|
15
|
+
acc_threshold = {{envs.acc_threshold | float}} # pyright: ignore
|
|
16
|
+
temperature = {{envs.temperature | float}} # pyright: ignore
|
|
17
|
+
contact_list = {{envs.contact_list | repr}} # pyright: ignore
|
|
18
|
+
pymol_selection = {{envs.pymol_selection | repr}} # pyright: ignore
|
|
19
|
+
selection = {{envs.selection | repr}} # pyright: ignore
|
|
20
|
+
outtype = {{envs.outtype | repr}} # pyright: ignore
|
|
21
|
+
|
|
22
|
+
raw_outfile = Path(outdir) / "_prodigy_raw.txt"
|
|
23
|
+
json_outfile = Path(outdir) / "_prodigy.json"
|
|
24
|
+
tsv_outfile = Path(outdir) / "_prodigy.tsv"
|
|
25
|
+
|
|
26
|
+
# log to the raw_outfile
|
|
27
|
+
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
|
|
28
|
+
logger = logging.getLogger("Prodigy")
|
|
29
|
+
|
|
30
|
+
if isinstance(selection, str):
|
|
31
|
+
selection = [selection]
|
|
32
|
+
|
|
33
|
+
struct_path = check_path(infile)
|
|
34
|
+
|
|
35
|
+
# parse structure
|
|
36
|
+
structure, n_chains, n_res = parse_structure(struct_path)
|
|
37
|
+
logger.info(
|
|
38
|
+
"[+] Parsed structure file {0} ({1} chains, {2} residues)".format(
|
|
39
|
+
structure.id, n_chains, n_res
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
prodigy = Prodigy(structure, selection, temperature)
|
|
43
|
+
prodigy.predict(distance_cutoff=distance_cutoff, acc_threshold=acc_threshold)
|
|
44
|
+
prodigy.print_prediction(outfile=raw_outfile, quiet=False)
|
|
45
|
+
|
|
46
|
+
# Print out interaction network
|
|
47
|
+
if contact_list:
|
|
48
|
+
prodigy.print_contacts(f"{outdir}/prodigy.ic")
|
|
49
|
+
|
|
50
|
+
# Print out interaction network
|
|
51
|
+
if pymol_selection:
|
|
52
|
+
prodigy.print_pymol_script(f"{outdir}/prodigy.pml")
|
|
53
|
+
|
|
54
|
+
# [+] Reading structure file: <path/to/structure.cif>
|
|
55
|
+
# [+] Parsed structure file <structure> (4 chains, 411 residues)
|
|
56
|
+
# [+] No. of intermolecular contacts: 191
|
|
57
|
+
# [+] No. of charged-charged contacts: 17
|
|
58
|
+
# [+] No. of charged-polar contacts: 18
|
|
59
|
+
# [+] No. of charged-apolar contacts: 60
|
|
60
|
+
# [+] No. of polar-polar contacts: 5
|
|
61
|
+
# [+] No. of apolar-polar contacts: 41
|
|
62
|
+
# [+] No. of apolar-apolar contacts: 50
|
|
63
|
+
# [+] Percentage of apolar NIS residues: 33.90
|
|
64
|
+
# [+] Percentage of charged NIS residues: 30.48
|
|
65
|
+
# [++] Predicted binding affinity (kcal.mol-1): -21.3
|
|
66
|
+
# [++] Predicted dissociation constant (M) at 25.0˚C: 2.3e-16
|
|
67
|
+
|
|
68
|
+
output = {}
|
|
69
|
+
with open(raw_outfile, "r") as f:
|
|
70
|
+
for line in f:
|
|
71
|
+
if line.startswith("[+"):
|
|
72
|
+
line = line.lstrip("[").lstrip("+").lstrip("]").lstrip()
|
|
73
|
+
if line.startswith("Reading structure file"):
|
|
74
|
+
continue
|
|
75
|
+
if line.startswith("Parsed structure file"):
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
key, value = line.split(":", 1)
|
|
79
|
+
key = key.strip()
|
|
80
|
+
value = value.strip()
|
|
81
|
+
if key == "No. of intermolecular contacts":
|
|
82
|
+
output["nIC"] = int(value)
|
|
83
|
+
elif key == "No. of charged-charged contacts":
|
|
84
|
+
output["nCCC"] = int(value)
|
|
85
|
+
elif key == "No. of charged-polar contacts":
|
|
86
|
+
output["nCPC"] = int(value)
|
|
87
|
+
elif key == "No. of charged-apolar contacts":
|
|
88
|
+
output["nCAPC"] = int(value)
|
|
89
|
+
elif key == "No. of polar-polar contacts":
|
|
90
|
+
output["nPPC"] = int(value)
|
|
91
|
+
elif key == "No. of apolar-polar contacts":
|
|
92
|
+
output["nAPPC"] = int(value)
|
|
93
|
+
elif key == "No. of apolar-apolar contacts":
|
|
94
|
+
output["nAPAPC"] = int(value)
|
|
95
|
+
elif key.startswith("Percentage of apolar NIS residues"):
|
|
96
|
+
output["pANISR"] = float(value)
|
|
97
|
+
elif key.startswith("Percentage of charged NIS residues"):
|
|
98
|
+
output["pCNISR"] = float(value)
|
|
99
|
+
elif key.startswith("Predicted binding affinity"):
|
|
100
|
+
output["BindingAffinity"] = float(value)
|
|
101
|
+
elif key.startswith("Predicted dissociation constant"):
|
|
102
|
+
output["DissociationConstant"] = float(value)
|
|
103
|
+
|
|
104
|
+
with open(json_outfile, "w") as f:
|
|
105
|
+
json.dump(output, f, indent=2)
|
|
106
|
+
|
|
107
|
+
with open(tsv_outfile, "w") as f:
|
|
108
|
+
f.write("\t".join(output.keys()) + "\n")
|
|
109
|
+
f.write("\t".join(map(str, output.values())) + "\n")
|
|
110
|
+
|
|
111
|
+
if outtype == "json":
|
|
112
|
+
json_outfile.rename(outfile)
|
|
113
|
+
json_outfile.symlink_to(outfile)
|
|
114
|
+
elif outtype == "tsv":
|
|
115
|
+
tsv_outfile.rename(outfile)
|
|
116
|
+
tsv_outfile.symlink_to(outfile)
|
|
117
|
+
else:
|
|
118
|
+
raw_outfile.rename(outfile)
|
|
119
|
+
raw_outfile.symlink_to(outfile)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
library(rlang)
|
|
2
|
+
library(dplyr)
|
|
3
|
+
library(biopipen.utils)
|
|
4
|
+
library(plotthis)
|
|
5
|
+
|
|
6
|
+
infiles <- {{in.infiles | r}}
|
|
7
|
+
outdir <- {{out.outdir | r}}
|
|
8
|
+
joboutdir <- {{job.outdir | r}}
|
|
9
|
+
group <- {{envs.group | r}}
|
|
10
|
+
|
|
11
|
+
if (is.character(group)) {
|
|
12
|
+
group <- read.csv(group, header = FALSE, row.names = NULL)
|
|
13
|
+
colnames(group) <- c("Sample", "Group")
|
|
14
|
+
} else if (is.list(group)) {
|
|
15
|
+
group <- do_call(
|
|
16
|
+
rbind,
|
|
17
|
+
lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
|
|
18
|
+
)
|
|
19
|
+
} else if (!is.null(group)) {
|
|
20
|
+
stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
log <- get_logger()
|
|
24
|
+
reporter <- get_reporter()
|
|
25
|
+
|
|
26
|
+
log$info("Reading and merging metrics for each sample ...")
|
|
27
|
+
metrics <- NULL
|
|
28
|
+
|
|
29
|
+
for (infile in infiles) {
|
|
30
|
+
sample <- sub("_prodigy$", "", basename(dirname(infile)))
|
|
31
|
+
log$debug("- Reading metrics from {sample}")
|
|
32
|
+
metric <- read.table(
|
|
33
|
+
infile,
|
|
34
|
+
header = TRUE,
|
|
35
|
+
sep = "\t",
|
|
36
|
+
stringsAsFactors = FALSE,
|
|
37
|
+
check.names = FALSE,
|
|
38
|
+
row.names = NULL)
|
|
39
|
+
metric$Sample <- sample
|
|
40
|
+
metric <- metric %>% select(Sample, everything())
|
|
41
|
+
if (is.null(metrics)) {
|
|
42
|
+
metrics <- metric
|
|
43
|
+
} else {
|
|
44
|
+
metrics <- rbind(metrics, metric)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# Save metrics
|
|
49
|
+
write.table(
|
|
50
|
+
metrics,
|
|
51
|
+
file.path(outdir, "metrics.txt"),
|
|
52
|
+
sep = "\t",
|
|
53
|
+
quote = FALSE,
|
|
54
|
+
row.names = FALSE
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
reporter$add(
|
|
58
|
+
list(kind = "descr", content = "Metrics for all samples"),
|
|
59
|
+
list(kind = "table", src = file.path(outdir, "metrics.txt")),
|
|
60
|
+
h1 = "Metrics of all samples"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
METRIC_DESCR = list(
|
|
64
|
+
nIC = "No. of intermolecular contacts",
|
|
65
|
+
nCCC = "No. of charged-charged contacts",
|
|
66
|
+
nCPC = "No. of charged-polar contacts",
|
|
67
|
+
nCAPC = "No. of charged-apolar contacts",
|
|
68
|
+
nPPC = "No. of polar-polar contacts",
|
|
69
|
+
nAPPC = "No. of apolar-polar contacts",
|
|
70
|
+
nAPAPC = "No. of apolar-apolar contacts",
|
|
71
|
+
pANISR = "Percentage of apolar NIS residues",
|
|
72
|
+
pCNISR = "Percentage of charged NIS residues",
|
|
73
|
+
BindingAffinity = "Predicted binding affinity (kcal.mol^-1)",
|
|
74
|
+
DissociationConstant = "Predicted dissociation constant (M)"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
if (!is.null(group)) {
|
|
78
|
+
log$info("Merging group information ...")
|
|
79
|
+
metrics <- group %>%
|
|
80
|
+
left_join(metrics, by = "Sample") %>%
|
|
81
|
+
mutate(Group = factor(Group, levels = unique(Group)))
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
log$info("Plotting Prodigy metrics ...")
|
|
85
|
+
for (metric in names(METRIC_DESCR)) {
|
|
86
|
+
log$info("- {metric}: {METRIC_DESCR[[metric]]}")
|
|
87
|
+
|
|
88
|
+
reporter$add(
|
|
89
|
+
list(
|
|
90
|
+
kind = "descr",
|
|
91
|
+
content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
|
|
92
|
+
),
|
|
93
|
+
h1 = metric
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
p <- plotthis::BarPlot(
|
|
97
|
+
x = "Sample",
|
|
98
|
+
y = metric,
|
|
99
|
+
x_text_angle = 90,
|
|
100
|
+
fill = "Group",
|
|
101
|
+
data = metrics
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
|
|
105
|
+
height <- attr(p, "height") %||% 6
|
|
106
|
+
width <- attr(p, "width") %||% (nrow(metrics) * .3 + 2)
|
|
107
|
+
png(figfile, height = height * 100, res = 100, width = width * 100)
|
|
108
|
+
print(p)
|
|
109
|
+
dev.off()
|
|
110
|
+
|
|
111
|
+
reporter$add(
|
|
112
|
+
list(src = figfile, name = "By Sample"),
|
|
113
|
+
ui = "table_of_images",
|
|
114
|
+
h1 = metric
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if (is.null(group)) { next }
|
|
118
|
+
# group: Sample, Group
|
|
119
|
+
p <- plotthis::BarPlot(
|
|
120
|
+
data = metrics,
|
|
121
|
+
x = "Group",
|
|
122
|
+
y = metric,
|
|
123
|
+
x_text_angle = 90
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
|
|
127
|
+
height <- attr(p, "height") %||% 6
|
|
128
|
+
width <- attr(p, "width") %||% (length(unique(metrics$Group)) * 0.3 + 2)
|
|
129
|
+
png(figfile, height = height * 100, res = 100, width = width * 100)
|
|
130
|
+
print(p)
|
|
131
|
+
dev.off()
|
|
132
|
+
|
|
133
|
+
reporter$add(
|
|
134
|
+
list(src = figfile, name = "By Group"),
|
|
135
|
+
ui = "table_of_images",
|
|
136
|
+
h1 = metric
|
|
137
|
+
)
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
reporter$save(joboutdir)
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from shutil import which
|
|
3
|
+
from diot import Diot # noqa: F401
|
|
4
|
+
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
5
|
+
|
|
6
|
+
infile1: str = {{in.infile1 | quote}} # pyright: ignore # noqa
|
|
7
|
+
infile2: str = {{in.infile2 | quote}} # pyright: ignore # noqa
|
|
8
|
+
outfile: str = {{out.outfile | quote}} # pyright: ignore # noqa
|
|
9
|
+
outdir: str = {{job.outdir | quote}} # pyright: ignore # noqa
|
|
10
|
+
envs: dict = {{envs | repr}} # pyright: ignore # noqa
|
|
11
|
+
conv_tool = envs.pop("conv_tool", "maxit")
|
|
12
|
+
maxit = envs.pop("maxit", "maxit")
|
|
13
|
+
beem = envs.pop("beem", "BeEM")
|
|
14
|
+
ca_only = envs.pop("ca_only", False)
|
|
15
|
+
# aa20_only = envs.pop("aa20_only", False)
|
|
16
|
+
duel = envs.pop("duel", "keep")
|
|
17
|
+
calculate_rmsd = envs.pop("calculate_rmsd", "calculate_rmsd")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def cif_to_pdb(cif_file, pdb_file:Path):
|
|
21
|
+
if conv_tool == "maxit":
|
|
22
|
+
maxit_bin = Path(which(maxit)).resolve()
|
|
23
|
+
rcsbroot = Path(maxit_bin).parent.parent
|
|
24
|
+
args = {"input": cif_file, "output": pdb_file, "o": 2, "log": pdb_file.with_suffix(".log")}
|
|
25
|
+
run_command([maxit, *dict_to_cli_args(args, prefix="-")], fg=True, env={"RCSBROOT": rcsbroot})
|
|
26
|
+
else:
|
|
27
|
+
args = {"_": cif_file, "p": pdb_file.parent.joinpath(pdb_file.stem)}
|
|
28
|
+
args = dict_to_cli_args(args, prefix="-", sep="=")
|
|
29
|
+
run_command([beem, *args], fg=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def pdb_to_ca_pdb(pdb_file: Path, ca_pdb_file: Path):
|
|
33
|
+
"""Extract C-alpha atoms from a PDB file and still keep the original order and metadata."""
|
|
34
|
+
with open(pdb_file, "r") as f, open(ca_pdb_file, "w") as fw:
|
|
35
|
+
for line in f:
|
|
36
|
+
if line.startswith("ATOM") and line[12:16].strip() == "CA":
|
|
37
|
+
fw.write(line)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# def pdb_to_aa20_pdb(pdb_file: Path, aa20_pdb_file: Path):
|
|
41
|
+
# """Extract the 20 amino acids from a PDB file and still keep the original order and metadata."""
|
|
42
|
+
# with open(pdb_file, "r") as f, open(aa20_pdb_file, "w") as fw:
|
|
43
|
+
# for line in f:
|
|
44
|
+
# if line.startswith("ATOM") and line[17:20].strip() in (
|
|
45
|
+
# "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY",
|
|
46
|
+
# "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER",
|
|
47
|
+
# "THR", "TRP", "TYR", "VAL",
|
|
48
|
+
# ):
|
|
49
|
+
# fw.write(line)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def deduel_pdb(pdb_file: Path, deduel_pdb_file: Path):
|
|
53
|
+
"""Remove/Handle the duel atoms in a PDB file."""
|
|
54
|
+
def is_duel(atom1, atom2):
|
|
55
|
+
# 1 2
|
|
56
|
+
# 01234567890123456789012345
|
|
57
|
+
# ATOM 913 CA ATYR A 113
|
|
58
|
+
# ATOM 914 CA BTYR A 113
|
|
59
|
+
# The key should be "ATOM|CA |TYR| A| 113"
|
|
60
|
+
return (
|
|
61
|
+
atom1[:4] == atom2[:4] and
|
|
62
|
+
atom1[12:16] == atom2[12:16] and
|
|
63
|
+
atom1[17:20] == atom2[17:20] and
|
|
64
|
+
atom1[21] == atom2[21] and
|
|
65
|
+
atom1[22:26] == atom2[22:26] and
|
|
66
|
+
atom1[16] != atom2[16]
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def clean_atom(atom):
|
|
70
|
+
return atom[:16] + " " + atom[17:]
|
|
71
|
+
|
|
72
|
+
last_atom = ""
|
|
73
|
+
with open(pdb_file, "r") as f, open(deduel_pdb_file, "w") as fw:
|
|
74
|
+
for line in f:
|
|
75
|
+
if not line.startswith("ATOM"):
|
|
76
|
+
fw.write(line)
|
|
77
|
+
continue
|
|
78
|
+
if not is_duel(last_atom, line):
|
|
79
|
+
if last_atom:
|
|
80
|
+
fw.write(clean_atom(last_atom))
|
|
81
|
+
last_atom = line
|
|
82
|
+
# is duel
|
|
83
|
+
elif duel == "keep":
|
|
84
|
+
fw.write(clean_atom(last_atom))
|
|
85
|
+
fw.write(clean_atom(line))
|
|
86
|
+
last_atom = ""
|
|
87
|
+
elif duel == "keep_first":
|
|
88
|
+
fw.write(clean_atom(last_atom))
|
|
89
|
+
last_atom = ""
|
|
90
|
+
elif duel == "keep_last":
|
|
91
|
+
fw.write(clean_atom(line))
|
|
92
|
+
last_atom = ""
|
|
93
|
+
elif duel == "average":
|
|
94
|
+
# Average the coordinates
|
|
95
|
+
x1 = float(last_atom[30:38])
|
|
96
|
+
y1 = float(last_atom[38:46])
|
|
97
|
+
z1 = float(last_atom[46:54])
|
|
98
|
+
x2 = float(line[30:38])
|
|
99
|
+
y2 = float(line[38:46])
|
|
100
|
+
z2 = float(line[46:54])
|
|
101
|
+
x = (x1 + x2) / 2.0
|
|
102
|
+
y = (y1 + y2) / 2.0
|
|
103
|
+
z = (z1 + z2) / 2.0
|
|
104
|
+
fw.write(clean_atom(last_atom[:30] + f"{x:8.3f}{y:8.3f}{z:8.3f}" + last_atom[54:]))
|
|
105
|
+
last_atom = ""
|
|
106
|
+
|
|
107
|
+
if last_atom:
|
|
108
|
+
fw.write(last_atom)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def index_of(lst, item) -> int:
|
|
112
|
+
try:
|
|
113
|
+
return lst.index(item)
|
|
114
|
+
except ValueError:
|
|
115
|
+
return -1
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if infile1.endswith(".cif"):
|
|
119
|
+
pdb1 = Path(outdir) / f"{Path(infile1).stem}.pdb"
|
|
120
|
+
cif_to_pdb(infile1, pdb1)
|
|
121
|
+
infile1 = pdb1 # type: ignore
|
|
122
|
+
|
|
123
|
+
if infile2.endswith(".cif"):
|
|
124
|
+
pdb2 = Path(outdir) / f"{Path(infile2).stem}.pdb"
|
|
125
|
+
cif_to_pdb(infile2, pdb2)
|
|
126
|
+
infile2 = pdb2 # type: ignore
|
|
127
|
+
|
|
128
|
+
if ca_only:
|
|
129
|
+
ca_pdb1 = Path(outdir) / f"{Path(infile1).stem}.ca.pdb"
|
|
130
|
+
pdb_to_ca_pdb(infile1, ca_pdb1) # type: ignore
|
|
131
|
+
infile1 = ca_pdb1 # type: ignore
|
|
132
|
+
|
|
133
|
+
ca_pdb2 = Path(outdir) / f"{Path(infile2).stem}.ca.pdb"
|
|
134
|
+
pdb_to_ca_pdb(infile2, ca_pdb2) # type: ignore
|
|
135
|
+
infile2 = ca_pdb2 # type: ignore
|
|
136
|
+
|
|
137
|
+
# if aa20_only:
|
|
138
|
+
# aa20_pdb1 = Path(outdir) / f"{Path(infile1).stem}.aa20.pdb"
|
|
139
|
+
# pdb_to_aa20_pdb(infile1, aa20_pdb1) # type: ignore
|
|
140
|
+
# infile1 = aa20_pdb1 # type: ignore
|
|
141
|
+
|
|
142
|
+
# aa20_pdb2 = Path(outdir) / f"{Path(infile2).stem}.aa20.pdb"
|
|
143
|
+
# pdb_to_aa20_pdb(infile2, aa20_pdb2) # type: ignore
|
|
144
|
+
# infile2 = aa20_pdb2 # type: ignore
|
|
145
|
+
|
|
146
|
+
if duel != "keep":
|
|
147
|
+
deduel_pdb1 = Path(outdir) / f"{Path(infile1).stem}.deduel.pdb"
|
|
148
|
+
deduel_pdb(infile1, deduel_pdb1) # type: ignore
|
|
149
|
+
infile1 = deduel_pdb1 # type: ignore
|
|
150
|
+
|
|
151
|
+
deduel_pdb2 = Path(outdir) / f"{Path(infile2).stem}.deduel.pdb"
|
|
152
|
+
deduel_pdb(infile2, deduel_pdb2) # type: ignore
|
|
153
|
+
infile2 = deduel_pdb2 # type: ignore
|
|
154
|
+
|
|
155
|
+
envs["_"] = [infile1, infile2]
|
|
156
|
+
envs = dict_to_cli_args(envs, dashify=True)
|
|
157
|
+
|
|
158
|
+
idx_ur = index_of(envs, "--ur")
|
|
159
|
+
if idx_ur != -1:
|
|
160
|
+
envs[idx_ur] = "-ur"
|
|
161
|
+
|
|
162
|
+
idx_urks = index_of(envs, "--urks")
|
|
163
|
+
if idx_urks != -1:
|
|
164
|
+
envs[idx_urks] = "-urks"
|
|
165
|
+
|
|
166
|
+
idx_nh = index_of(envs, "--nh")
|
|
167
|
+
if idx_nh != -1:
|
|
168
|
+
envs[idx_nh] = "-nh"
|
|
169
|
+
|
|
170
|
+
out: str = run_command([calculate_rmsd, *envs], stdout="return") # type: ignore
|
|
171
|
+
out = out.strip()
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
float(out)
|
|
175
|
+
except (ValueError, TypeError):
|
|
176
|
+
raise ValueError(out)
|
|
177
|
+
|
|
178
|
+
Path(outfile).write_text(out)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Script for regulatory.MotifAffinityTest
|
|
2
|
+
{% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
|
|
3
|
+
|
|
4
|
+
library(BiocParallel)
|
|
5
|
+
library(BSgenome)
|
|
6
|
+
library(biopipen.utils)
|
|
7
|
+
|
|
8
|
+
motiffile <- {{in.motiffile | r}}
|
|
9
|
+
varfile <- {{in.varfile | r}}
|
|
10
|
+
outdir <- {{out.outdir | r}}
|
|
11
|
+
ncores <- {{envs.ncores | r}}
|
|
12
|
+
tool <- {{envs.tool | r}}
|
|
13
|
+
bcftools <- {{envs.bcftools | r}}
|
|
14
|
+
genome <- {{envs.genome | r}}
|
|
15
|
+
motif_col <- {{envs.motif_col | r}}
|
|
16
|
+
regulator_col <- {{envs.regulator_col | r}}
|
|
17
|
+
var_col <- {{envs.var_col | r}}
|
|
18
|
+
notfound <- {{envs.notfound | r}}
|
|
19
|
+
motifdb <- {{envs.motifdb | r}}
|
|
20
|
+
regmotifs <- {{envs.regmotifs | r}}
|
|
21
|
+
devpars <- {{envs.devpars | r}}
|
|
22
|
+
plot_nvars <- {{envs.plot_nvars | r}}
|
|
23
|
+
plots <- {{envs.plots | r}}
|
|
24
|
+
cutoff <- {{envs.cutoff | r}}
|
|
25
|
+
set.seed(8525)
|
|
26
|
+
|
|
27
|
+
if (is.null(motifdb) || !file.exists(motifdb)) {
|
|
28
|
+
stop("Motif database (envs.motifdb) is required and must exist")
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (is.null(genome)) {
|
|
32
|
+
stop("Reference genome (envs.ref) is required and must exist")
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (is.null(motiffile) || !file.exists(motiffile)) {
|
|
36
|
+
stop("Motif file (in.motiffile) is required and must exist")
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (is.null(varfile) || !file.exists(varfile)) {
|
|
40
|
+
stop("Variant file (in.varfile) is required and must exist")
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (is.null(motif_col) && is.null(regulator_col)) {
|
|
44
|
+
stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
log <- get_logger()
|
|
48
|
+
|
|
49
|
+
log$info("Reading input regulator/motif file ...")
|
|
50
|
+
in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
log$info("Ensuring motifs and regulators in the input data ...")
|
|
54
|
+
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
|
|
55
|
+
genome_pkg <- get_genome_pkg(genome)
|
|
56
|
+
|
|
57
|
+
motif_var_pairs <- NULL
|
|
58
|
+
if (!is.null(var_col)) {
|
|
59
|
+
log$info("Obtaining motif-variant pairs to test ...")
|
|
60
|
+
if (!var_col %in% colnames(in_motifs)) {
|
|
61
|
+
stop("Variant column (envs.var_col) not found in the input motif file")
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
log$info("Reading variant file ...")
|
|
68
|
+
if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
69
|
+
log$info("Converting VCF file to BED file ...")
|
|
70
|
+
varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
|
|
71
|
+
cmd <- c(
|
|
72
|
+
bcftools, "query",
|
|
73
|
+
"-f", "%CHROM\\t%POS0\\t%END\\t%ID\\t0\\t+\\t%REF\\t%ALT{0}\\n",
|
|
74
|
+
"-i", 'FILTER="PASS" || FILTER="." || FILTER=""',
|
|
75
|
+
"-o", varfile_bed,
|
|
76
|
+
varfile
|
|
77
|
+
)
|
|
78
|
+
run_command(cmd, fg = TRUE)
|
|
79
|
+
|
|
80
|
+
varfile <- varfile_bed
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
# `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
|
|
84
|
+
snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
|
|
85
|
+
colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
|
|
86
|
+
|
|
87
|
+
log$info("Reading motif database ...")
|
|
88
|
+
mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
|
|
89
|
+
|
|
90
|
+
tool <- tolower(tool)
|
|
91
|
+
tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
92
|
+
|
|
93
|
+
{% if envs.tool == "motifbreakr" %}
|
|
94
|
+
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
95
|
+
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
|
|
96
|
+
{% else %}
|
|
97
|
+
atsnp_args <- list_update(
|
|
98
|
+
list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
|
|
99
|
+
{{envs.atsnp_args | r}}
|
|
100
|
+
)
|
|
101
|
+
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
|
|
102
|
+
{% endif %}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
library(atSNP)
|
|
2
|
+
library(rtracklayer)
|
|
3
|
+
|
|
4
|
+
log$info("Converting snpinfo to atSNP object ...")
|
|
5
|
+
|
|
6
|
+
# c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
|
|
7
|
+
if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
|
|
8
|
+
stop("Only SNVs are supported by atSNP. Consider using motifbreakR instead if you have indels.")
|
|
9
|
+
}
|
|
10
|
+
atsnp_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$|\\.bed$", ".atsnp.txt", basename(varfile)))
|
|
11
|
+
snpinfo$name <- ifelse(
|
|
12
|
+
snpinfo$name == "." | is.na(snpinfo$name) | nchar(snpinfo$name) == 0,
|
|
13
|
+
sprintf("%s:%s", snpinfo$chrom, snpinfo$end),
|
|
14
|
+
snpinfo$name
|
|
15
|
+
)
|
|
16
|
+
snpinfo$a1 <- snpinfo$ref
|
|
17
|
+
snpinfo$a2 <- snpinfo$alt
|
|
18
|
+
snpinfo$chr <- snpinfo$chrom
|
|
19
|
+
snpinfo$snp <- snpinfo$end
|
|
20
|
+
snpinfo$snpid <- snpinfo$name
|
|
21
|
+
write.table(
|
|
22
|
+
snpinfo[, c("snpid", "a1", "a2", "chr", "snp")],
|
|
23
|
+
file = atsnp_bed,
|
|
24
|
+
sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
motif_lib <- motifdb_to_motiflib(mdb)
|
|
28
|
+
k <- max(sapply(motif_lib, nrow))
|
|
29
|
+
snps <- LoadSNPData(
|
|
30
|
+
atsnp_bed,
|
|
31
|
+
genome.lib = genome_pkg,
|
|
32
|
+
mutation = TRUE, # force using given ref and alt
|
|
33
|
+
default.par = nrow(snpinfo) < 1000,
|
|
34
|
+
half.window.size = k
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
log$info("Running atSNP ...")
|
|
38
|
+
atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
|
|
39
|
+
|
|
40
|
+
log$info("Calculating p values ...")
|
|
41
|
+
atsnp_result <- ComputePValues(
|
|
42
|
+
motif.lib = motif_lib,
|
|
43
|
+
snp.info = snps,
|
|
44
|
+
motif.scores = atsnp_scores$motif.scores,
|
|
45
|
+
ncores = ncores,
|
|
46
|
+
testing.mc = TRUE
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if (!is.null(motif_var_pairs)) {
|
|
50
|
+
log$info("Filtering motif-variant pairs ...")
|
|
51
|
+
atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
|
|
52
|
+
atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
53
|
+
atsnp_result$motifs_vars <- NULL
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
padj_col <- paste0(atsnp_args$p, "_adj")
|
|
57
|
+
atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
|
|
58
|
+
cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
|
|
59
|
+
atsnp_result <- atsnp_result[atsnp_result[[cutoff_col]] < cutoff, , drop = FALSE]
|
|
60
|
+
# order by p value
|
|
61
|
+
atsnp_result <- atsnp_result[order(atsnp_result[[cutoff_col]]), , drop = FALSE]
|
|
62
|
+
snpinfo <- snpinfo[match(atsnp_result$snpid, snpinfo$snpid), , drop = FALSE]
|
|
63
|
+
atsnp_result$chr <- snpinfo$chr
|
|
64
|
+
atsnp_result$start <- snpinfo$start
|
|
65
|
+
atsnp_result$end <- snpinfo$end
|
|
66
|
+
atsnp_result$SNP_id <- snpinfo$snpid
|
|
67
|
+
atsnp_result$snpid <- NULL
|
|
68
|
+
atsnp_result$REF <- snpinfo$ref
|
|
69
|
+
atsnp_result$ALT <- snpinfo$alt
|
|
70
|
+
atsnp_result$providerName <- atsnp_result$motif
|
|
71
|
+
atsnp_result$providerId <- atsnp_result$providerName <- atsnp_result$motif
|
|
72
|
+
atsnp_result$motif <- NULL
|
|
73
|
+
atsnp_result$strand <- snpinfo$strand
|
|
74
|
+
atsnp_result$score <- snpinfo$score
|
|
75
|
+
atsnp_result$snpbase <- NULL
|
|
76
|
+
atsnp_result$altPos <- 1
|
|
77
|
+
atsnp_result$varType <- "SNV"
|
|
78
|
+
atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
|
|
79
|
+
paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
|
|
80
|
+
})
|
|
81
|
+
if (!is.null(regulator_col)) {
|
|
82
|
+
atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
|
|
83
|
+
match(atsnp_result$providerId, in_motifs[[motif_col]]),
|
|
84
|
+
regulator_col,
|
|
85
|
+
drop = TRUE
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
write.table(
|
|
90
|
+
atsnp_result,
|
|
91
|
+
file = file.path(outdir, "atsnp.txt"),
|
|
92
|
+
sep = "\t", quote = FALSE, row.names = FALSE
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
log$info("Plotting variants ...")
|
|
96
|
+
# Convert result to GRanges object
|
|
97
|
+
atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
|
|
98
|
+
atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
|
|
99
|
+
atsnp_result$effect <- "strong"
|
|
100
|
+
atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
|
|
101
|
+
atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
|
|
102
|
+
genome(atsnp_result) <- genome
|
|
103
|
+
attributes(atsnp_result)$genome.package <- genome_pkg
|
|
104
|
+
attributes(atsnp_result)$motifs <- mdb
|
|
105
|
+
|
|
106
|
+
if (is.null(plots) || length(plots) == 0) {
|
|
107
|
+
atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
|
|
108
|
+
variants <- unique(atsnp_result$SNP_id)
|
|
109
|
+
} else {
|
|
110
|
+
variants <- names(plots)
|
|
111
|
+
}
|
|
112
|
+
for (variant in variants) {
|
|
113
|
+
log$info("- Variant: {variant}")
|
|
114
|
+
if (is.null(plots[[variant]])) {
|
|
115
|
+
plots[[variant]] <- list(devpars = devpars, which = "TRUE")
|
|
116
|
+
}
|
|
117
|
+
if (is.null(plots[[variant]]$which)) {
|
|
118
|
+
plots[[variant]]$which <- "TRUE"
|
|
119
|
+
}
|
|
120
|
+
if (is.null(plots[[variant]]$devpars)) {
|
|
121
|
+
plots[[variant]]$devpars <- devpars
|
|
122
|
+
}
|
|
123
|
+
res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
|
|
124
|
+
res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
|
|
125
|
+
|
|
126
|
+
plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
|
|
127
|
+
}
|