PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/protein/Prodigy.py ADDED Viewed

@@ -0,0 +1,119 @@
+import json
+import logging
+import sys
+from pathlib import Path
+from prodigy_prot.predict_IC import (  # type: ignore
+    Prodigy,
+    check_path,
+    parse_structure,
+)
+infile: str = {{in.infile | quote}}  # pyright: ignore # noqa
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
+outdir: str = {{out.outdir | quote}}  # pyright: ignore
+distance_cutoff = {{envs.distance_cutoff | float}}  # pyright: ignore
+acc_threshold = {{envs.acc_threshold | float}}  # pyright: ignore
+temperature = {{envs.temperature | float}}  # pyright: ignore
+contact_list = {{envs.contact_list | repr}}  # pyright: ignore
+pymol_selection = {{envs.pymol_selection | repr}}  # pyright: ignore
+selection = {{envs.selection | repr}}  # pyright: ignore
+outtype = {{envs.outtype | repr}}  # pyright: ignore
+raw_outfile = Path(outdir) / "_prodigy_raw.txt"
+json_outfile = Path(outdir) / "_prodigy.json"
+tsv_outfile = Path(outdir) / "_prodigy.tsv"
+# log to the raw_outfile
+logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
+logger = logging.getLogger("Prodigy")
+if isinstance(selection, str):
+    selection = [selection]
+struct_path = check_path(infile)
+# parse structure
+structure, n_chains, n_res = parse_structure(struct_path)
+logger.info(
+    "[+] Parsed structure file {0} ({1} chains, {2} residues)".format(
+        structure.id, n_chains, n_res
+    )
+)
+prodigy = Prodigy(structure, selection, temperature)
+prodigy.predict(distance_cutoff=distance_cutoff, acc_threshold=acc_threshold)
+prodigy.print_prediction(outfile=raw_outfile, quiet=False)
+# Print out interaction network
+if contact_list:
+    prodigy.print_contacts(f"{outdir}/prodigy.ic")
+# Print out interaction network
+if pymol_selection:
+    prodigy.print_pymol_script(f"{outdir}/prodigy.pml")
+# [+] Reading structure file: <path/to/structure.cif>
+# [+] Parsed structure file <structure> (4 chains, 411 residues)
+# [+] No. of intermolecular contacts: 191
+# [+] No. of charged-charged contacts: 17
+# [+] No. of charged-polar contacts: 18
+# [+] No. of charged-apolar contacts: 60
+# [+] No. of polar-polar contacts: 5
+# [+] No. of apolar-polar contacts: 41
+# [+] No. of apolar-apolar contacts: 50
+# [+] Percentage of apolar NIS residues: 33.90
+# [+] Percentage of charged NIS residues: 30.48
+# [++] Predicted binding affinity (kcal.mol-1):    -21.3
+# [++] Predicted dissociation constant (M) at 25.0˚C:  2.3e-16
+output = {}
+with open(raw_outfile, "r") as f:
+    for line in f:
+        if line.startswith("[+"):
+            line = line.lstrip("[").lstrip("+").lstrip("]").lstrip()
+            if line.startswith("Reading structure file"):
+                continue
+            if line.startswith("Parsed structure file"):
+                continue
+            key, value = line.split(":", 1)
+            key = key.strip()
+            value = value.strip()
+            if key == "No. of intermolecular contacts":
+                output["nIC"] = int(value)
+            elif key == "No. of charged-charged contacts":
+                output["nCCC"] = int(value)
+            elif key == "No. of charged-polar contacts":
+                output["nCPC"] = int(value)
+            elif key == "No. of charged-apolar contacts":
+                output["nCAPC"] = int(value)
+            elif key == "No. of polar-polar contacts":
+                output["nPPC"] = int(value)
+            elif key == "No. of apolar-polar contacts":
+                output["nAPPC"] = int(value)
+            elif key == "No. of apolar-apolar contacts":
+                output["nAPAPC"] = int(value)
+            elif key.startswith("Percentage of apolar NIS residues"):
+                output["pANISR"] = float(value)
+            elif key.startswith("Percentage of charged NIS residues"):
+                output["pCNISR"] = float(value)
+            elif key.startswith("Predicted binding affinity"):
+                output["BindingAffinity"] = float(value)
+            elif key.startswith("Predicted dissociation constant"):
+                output["DissociationConstant"] = float(value)
+with open(json_outfile, "w") as f:
+    json.dump(output, f, indent=2)
+with open(tsv_outfile, "w") as f:
+    f.write("\t".join(output.keys()) + "\n")
+    f.write("\t".join(map(str, output.values())) + "\n")
+if outtype == "json":
+    json_outfile.rename(outfile)
+    json_outfile.symlink_to(outfile)
+elif outtype == "tsv":
+    tsv_outfile.rename(outfile)
+    tsv_outfile.symlink_to(outfile)
+else:
+    raw_outfile.rename(outfile)
+    raw_outfile.symlink_to(outfile)

biopipen/scripts/protein/ProdigySummary.R ADDED Viewed

@@ -0,0 +1,140 @@
+library(rlang)
+library(dplyr)
+library(biopipen.utils)
+library(plotthis)
+infiles <- {{in.infiles | r}}
+outdir <- {{out.outdir | r}}
+joboutdir <- {{job.outdir | r}}
+group <- {{envs.group | r}}
+if (is.character(group)) {
+    group <- read.csv(group, header = FALSE, row.names = NULL)
+    colnames(group) <- c("Sample", "Group")
+} else if (is.list(group)) {
+    group <- do_call(
+        rbind,
+        lapply(names(group), function(n) data.frame(Sample = group[[n]], Group = n))
+    )
+} else if (!is.null(group)) {
+    stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
+}
+log <- get_logger()
+reporter <- get_reporter()
+log$info("Reading and merging metrics for each sample ...")
+metrics <- NULL
+for (infile in infiles) {
+    sample <- sub("_prodigy$", "", basename(dirname(infile)))
+    log$debug("- Reading metrics from {sample}")
+    metric <- read.table(
+        infile,
+        header = TRUE,
+        sep = "\t",
+        stringsAsFactors = FALSE,
+        check.names = FALSE,
+        row.names = NULL)
+    metric$Sample <- sample
+    metric <- metric %>% select(Sample, everything())
+    if (is.null(metrics)) {
+        metrics <- metric
+    } else {
+        metrics <- rbind(metrics, metric)
+    }
+}
+# Save metrics
+write.table(
+    metrics,
+    file.path(outdir, "metrics.txt"),
+    sep = "\t",
+    quote = FALSE,
+    row.names = FALSE
+)
+reporter$add(
+    list(kind = "descr", content = "Metrics for all samples"),
+    list(kind = "table", src = file.path(outdir, "metrics.txt")),
+    h1 = "Metrics of all samples"
+)
+METRIC_DESCR = list(
+    nIC = "No. of intermolecular contacts",
+    nCCC = "No. of charged-charged contacts",
+    nCPC = "No. of charged-polar contacts",
+    nCAPC = "No. of charged-apolar contacts",
+    nPPC = "No. of polar-polar contacts",
+    nAPPC = "No. of apolar-polar contacts",
+    nAPAPC = "No. of apolar-apolar contacts",
+    pANISR = "Percentage of apolar NIS residues",
+    pCNISR = "Percentage of charged NIS residues",
+    BindingAffinity = "Predicted binding affinity (kcal.mol^-1)",
+    DissociationConstant = "Predicted dissociation constant (M)"
+)
+if (!is.null(group)) {
+    log$info("Merging group information ...")
+    metrics <- group %>%
+        left_join(metrics, by = "Sample") %>%
+        mutate(Group = factor(Group, levels = unique(Group)))
+}
+log$info("Plotting Prodigy metrics ...")
+for (metric in names(METRIC_DESCR)) {
+    log$info("- {metric}: {METRIC_DESCR[[metric]]}")
+    reporter$add(
+        list(
+            kind = "descr",
+            content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
+        ),
+        h1 = metric
+    )
+    p <- plotthis::BarPlot(
+        x = "Sample",
+        y = metric,
+        x_text_angle = 90,
+        fill = "Group",
+        data = metrics
+    )
+    figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
+    height <- attr(p, "height") %||% 6
+    width <- attr(p, "width") %||% (nrow(metrics) * .3 + 2)
+    png(figfile, height = height * 100, res = 100, width = width * 100)
+    print(p)
+    dev.off()
+    reporter$add(
+        list(src = figfile, name = "By Sample"),
+        ui = "table_of_images",
+        h1 = metric
+    )
+    if (is.null(group)) { next }
+    # group: Sample, Group
+    p <- plotthis::BarPlot(
+        data = metrics,
+        x = "Group",
+        y = metric,
+        x_text_angle = 90
+    )
+    figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
+    height <- attr(p, "height") %||% 6
+    width <- attr(p, "width") %||% (length(unique(metrics$Group)) * 0.3 + 2)
+    png(figfile, height = height * 100, res = 100, width = width * 100)
+    print(p)
+    dev.off()
+    reporter$add(
+        list(src = figfile, name = "By Group"),
+        ui = "table_of_images",
+        h1 = metric
+    )
+}
+reporter$save(joboutdir)

biopipen/scripts/protein/RMSD.py ADDED Viewed

@@ -0,0 +1,178 @@
+from pathlib import Path
+from shutil import which
+from diot import Diot  # noqa: F401
+from biopipen.utils.misc import run_command, dict_to_cli_args
+infile1: str = {{in.infile1 | quote}}  # pyright: ignore # noqa
+infile2: str = {{in.infile2 | quote}}  # pyright: ignore # noqa
+outfile: str = {{out.outfile | quote}}  # pyright: ignore # noqa
+outdir: str = {{job.outdir | quote}}  # pyright: ignore # noqa
+envs: dict = {{envs | repr}}  # pyright: ignore # noqa
+conv_tool = envs.pop("conv_tool", "maxit")
+maxit = envs.pop("maxit", "maxit")
+beem = envs.pop("beem", "BeEM")
+ca_only = envs.pop("ca_only", False)
+# aa20_only = envs.pop("aa20_only", False)
+duel = envs.pop("duel", "keep")
+calculate_rmsd = envs.pop("calculate_rmsd", "calculate_rmsd")
+def cif_to_pdb(cif_file, pdb_file:Path):
+    if conv_tool == "maxit":
+        maxit_bin = Path(which(maxit)).resolve()
+        rcsbroot = Path(maxit_bin).parent.parent
+        args = {"input": cif_file, "output": pdb_file, "o": 2, "log": pdb_file.with_suffix(".log")}
+        run_command([maxit, *dict_to_cli_args(args, prefix="-")], fg=True, env={"RCSBROOT": rcsbroot})
+    else:
+        args = {"_": cif_file, "p": pdb_file.parent.joinpath(pdb_file.stem)}
+        args = dict_to_cli_args(args, prefix="-", sep="=")
+        run_command([beem, *args], fg=True)
+def pdb_to_ca_pdb(pdb_file: Path, ca_pdb_file: Path):
+    """Extract C-alpha atoms from a PDB file and still keep the original order and metadata."""
+    with open(pdb_file, "r") as f, open(ca_pdb_file, "w") as fw:
+        for line in f:
+            if line.startswith("ATOM") and line[12:16].strip() == "CA":
+                fw.write(line)
+# def pdb_to_aa20_pdb(pdb_file: Path, aa20_pdb_file: Path):
+#     """Extract the 20 amino acids from a PDB file and still keep the original order and metadata."""
+#     with open(pdb_file, "r") as f, open(aa20_pdb_file, "w") as fw:
+#         for line in f:
+#             if line.startswith("ATOM") and line[17:20].strip() in (
+#                 "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY",
+#                 "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER",
+#                 "THR", "TRP", "TYR", "VAL",
+#             ):
+#                 fw.write(line)
+def deduel_pdb(pdb_file: Path, deduel_pdb_file: Path):
+    """Remove/Handle the duel atoms in a PDB file."""
+    def is_duel(atom1, atom2):
+        #           1         2
+        # 01234567890123456789012345
+        # ATOM    913  CA ATYR A 113
+        # ATOM    914  CA BTYR A 113
+        # The key should be "ATOM|CA |TYR| A| 113"
+        return (
+            atom1[:4] == atom2[:4] and
+            atom1[12:16] == atom2[12:16] and
+            atom1[17:20] == atom2[17:20] and
+            atom1[21] == atom2[21] and
+            atom1[22:26] == atom2[22:26] and
+            atom1[16] != atom2[16]
+        )
+    def clean_atom(atom):
+        return atom[:16] + " " + atom[17:]
+    last_atom = ""
+    with open(pdb_file, "r") as f, open(deduel_pdb_file, "w") as fw:
+        for line in f:
+            if not line.startswith("ATOM"):
+                fw.write(line)
+                continue
+            if not is_duel(last_atom, line):
+                if last_atom:
+                    fw.write(clean_atom(last_atom))
+                last_atom = line
+            # is duel
+            elif duel == "keep":
+                fw.write(clean_atom(last_atom))
+                fw.write(clean_atom(line))
+                last_atom = ""
+            elif duel == "keep_first":
+                fw.write(clean_atom(last_atom))
+                last_atom = ""
+            elif duel == "keep_last":
+                fw.write(clean_atom(line))
+                last_atom = ""
+            elif duel == "average":
+                # Average the coordinates
+                x1 = float(last_atom[30:38])
+                y1 = float(last_atom[38:46])
+                z1 = float(last_atom[46:54])
+                x2 = float(line[30:38])
+                y2 = float(line[38:46])
+                z2 = float(line[46:54])
+                x = (x1 + x2) / 2.0
+                y = (y1 + y2) / 2.0
+                z = (z1 + z2) / 2.0
+                fw.write(clean_atom(last_atom[:30] + f"{x:8.3f}{y:8.3f}{z:8.3f}" + last_atom[54:]))
+                last_atom = ""
+        if last_atom:
+            fw.write(last_atom)
+def index_of(lst, item) -> int:
+    try:
+        return lst.index(item)
+    except ValueError:
+        return -1
+if infile1.endswith(".cif"):
+    pdb1 = Path(outdir) / f"{Path(infile1).stem}.pdb"
+    cif_to_pdb(infile1, pdb1)
+    infile1 = pdb1  # type: ignore
+if infile2.endswith(".cif"):
+    pdb2 = Path(outdir) / f"{Path(infile2).stem}.pdb"
+    cif_to_pdb(infile2, pdb2)
+    infile2 = pdb2  # type: ignore
+if ca_only:
+    ca_pdb1 = Path(outdir) / f"{Path(infile1).stem}.ca.pdb"
+    pdb_to_ca_pdb(infile1, ca_pdb1) # type: ignore
+    infile1 = ca_pdb1  # type: ignore
+    ca_pdb2 = Path(outdir) / f"{Path(infile2).stem}.ca.pdb"
+    pdb_to_ca_pdb(infile2, ca_pdb2) # type: ignore
+    infile2 = ca_pdb2  # type: ignore
+# if aa20_only:
+#     aa20_pdb1 = Path(outdir) / f"{Path(infile1).stem}.aa20.pdb"
+#     pdb_to_aa20_pdb(infile1, aa20_pdb1) # type: ignore
+#     infile1 = aa20_pdb1  # type: ignore
+#     aa20_pdb2 = Path(outdir) / f"{Path(infile2).stem}.aa20.pdb"
+#     pdb_to_aa20_pdb(infile2, aa20_pdb2) # type: ignore
+#     infile2 = aa20_pdb2  # type: ignore
+if duel != "keep":
+    deduel_pdb1 = Path(outdir) / f"{Path(infile1).stem}.deduel.pdb"
+    deduel_pdb(infile1, deduel_pdb1) # type: ignore
+    infile1 = deduel_pdb1  # type: ignore
+    deduel_pdb2 = Path(outdir) / f"{Path(infile2).stem}.deduel.pdb"
+    deduel_pdb(infile2, deduel_pdb2) # type: ignore
+    infile2 = deduel_pdb2  # type: ignore
+envs["_"] = [infile1, infile2]
+envs = dict_to_cli_args(envs, dashify=True)
+idx_ur = index_of(envs, "--ur")
+if idx_ur != -1:
+    envs[idx_ur] = "-ur"
+idx_urks = index_of(envs, "--urks")
+if idx_urks != -1:
+    envs[idx_urks] = "-urks"
+idx_nh = index_of(envs, "--nh")
+if idx_nh != -1:
+    envs[idx_nh] = "-nh"
+out: str = run_command([calculate_rmsd, *envs], stdout="return")  # type: ignore
+out = out.strip()
+try:
+    float(out)
+except (ValueError, TypeError):
+    raise ValueError(out)
+Path(outfile).write_text(out)

biopipen/scripts/regulatory/MotifAffinityTest.R ADDED Viewed

@@ -0,0 +1,102 @@
+# Script for regulatory.MotifAffinityTest
+{% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
+library(BiocParallel)
+library(BSgenome)
+library(biopipen.utils)
+motiffile <- {{in.motiffile | r}}
+varfile <- {{in.varfile | r}}
+outdir <- {{out.outdir | r}}
+ncores <- {{envs.ncores | r}}
+tool <- {{envs.tool | r}}
+bcftools <- {{envs.bcftools | r}}
+genome <- {{envs.genome | r}}
+motif_col <- {{envs.motif_col | r}}
+regulator_col <- {{envs.regulator_col | r}}
+var_col <- {{envs.var_col | r}}
+notfound <- {{envs.notfound | r}}
+motifdb <- {{envs.motifdb | r}}
+regmotifs <- {{envs.regmotifs | r}}
+devpars <- {{envs.devpars | r}}
+plot_nvars <- {{envs.plot_nvars | r}}
+plots <- {{envs.plots | r}}
+cutoff <- {{envs.cutoff | r}}
+set.seed(8525)
+if (is.null(motifdb) || !file.exists(motifdb)) {
+    stop("Motif database (envs.motifdb) is required and must exist")
+}
+if (is.null(genome)) {
+    stop("Reference genome (envs.ref) is required and must exist")
+}
+if (is.null(motiffile) || !file.exists(motiffile)) {
+    stop("Motif file (in.motiffile) is required and must exist")
+}
+if (is.null(varfile) || !file.exists(varfile)) {
+    stop("Variant file (in.varfile) is required and must exist")
+}
+if (is.null(motif_col) && is.null(regulator_col)) {
+    stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
+}
+log <- get_logger()
+log$info("Reading input regulator/motif file ...")
+in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
+log$info("Ensuring motifs and regulators in the input data ...")
+in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
+genome_pkg <- get_genome_pkg(genome)
+motif_var_pairs <- NULL
+if (!is.null(var_col)) {
+    log$info("Obtaining motif-variant pairs to test ...")
+    if (!var_col %in% colnames(in_motifs)) {
+        stop("Variant column (envs.var_col) not found in the input motif file")
+    }
+    motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
+}
+log$info("Reading variant file ...")
+if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
+    log$info("Converting VCF file to BED file ...")
+    varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
+    cmd <- c(
+        bcftools, "query",
+        "-f", "%CHROM\\t%POS0\\t%END\\t%ID\\t0\\t+\\t%REF\\t%ALT{0}\\n",
+        "-i", 'FILTER="PASS" || FILTER="." || FILTER=""',
+        "-o", varfile_bed,
+        varfile
+    )
+    run_command(cmd, fg = TRUE)
+    varfile <- varfile_bed
+}
+# `chrom`, `start`, `end`, `name`, `score`, `strand`, `ref`, `alt`.
+snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
+colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
+log$info("Reading motif database ...")
+mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
+tool <- tolower(tool)
+tool <- match.arg(tool, c("motifbreakr", "atsnp"))
+{% if envs.tool == "motifbreakr" %}
+    motifbreakr_args <- {{envs.motifbreakr_args | r}}
+    {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
+{% else %}
+    atsnp_args <- list_update(
+        list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
+        {{envs.atsnp_args | r}}
+    )
+    {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
+{% endif %}

biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R ADDED Viewed

@@ -0,0 +1,127 @@
+library(atSNP)
+library(rtracklayer)
+log$info("Converting snpinfo to atSNP object ...")
+# c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
+if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
+    stop("Only SNVs are supported by atSNP. Consider using motifbreakR instead if you have indels.")
+}
+atsnp_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$|\\.bed$", ".atsnp.txt", basename(varfile)))
+snpinfo$name <- ifelse(
+    snpinfo$name == "." | is.na(snpinfo$name) | nchar(snpinfo$name) == 0,
+    sprintf("%s:%s", snpinfo$chrom, snpinfo$end),
+    snpinfo$name
+)
+snpinfo$a1 <- snpinfo$ref
+snpinfo$a2 <- snpinfo$alt
+snpinfo$chr <- snpinfo$chrom
+snpinfo$snp <- snpinfo$end
+snpinfo$snpid <- snpinfo$name
+write.table(
+    snpinfo[, c("snpid", "a1", "a2", "chr", "snp")],
+    file = atsnp_bed,
+    sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE
+)
+motif_lib <- motifdb_to_motiflib(mdb)
+k <- max(sapply(motif_lib, nrow))
+snps <- LoadSNPData(
+    atsnp_bed,
+    genome.lib = genome_pkg,
+    mutation = TRUE,  # force using given ref and alt
+    default.par = nrow(snpinfo) < 1000,
+    half.window.size = k
+)
+log$info("Running atSNP ...")
+atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
+log$info("Calculating p values ...")
+atsnp_result <- ComputePValues(
+    motif.lib = motif_lib,
+    snp.info = snps,
+    motif.scores = atsnp_scores$motif.scores,
+    ncores = ncores,
+    testing.mc = TRUE
+)
+if (!is.null(motif_var_pairs)) {
+    log$info("Filtering motif-variant pairs ...")
+    atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
+    atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
+    atsnp_result$motifs_vars <- NULL
+}
+padj_col <- paste0(atsnp_args$p, "_adj")
+atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
+cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
+atsnp_result <- atsnp_result[atsnp_result[[cutoff_col]] < cutoff, , drop = FALSE]
+# order by p value
+atsnp_result <- atsnp_result[order(atsnp_result[[cutoff_col]]), , drop = FALSE]
+snpinfo <- snpinfo[match(atsnp_result$snpid, snpinfo$snpid), , drop = FALSE]
+atsnp_result$chr <- snpinfo$chr
+atsnp_result$start <- snpinfo$start
+atsnp_result$end <- snpinfo$end
+atsnp_result$SNP_id <- snpinfo$snpid
+atsnp_result$snpid <- NULL
+atsnp_result$REF <- snpinfo$ref
+atsnp_result$ALT <- snpinfo$alt
+atsnp_result$providerName <- atsnp_result$motif
+atsnp_result$providerId <- atsnp_result$providerName <- atsnp_result$motif
+atsnp_result$motif <- NULL
+atsnp_result$strand <- snpinfo$strand
+atsnp_result$score <- snpinfo$score
+atsnp_result$snpbase <- NULL
+atsnp_result$altPos <- 1
+atsnp_result$varType <- "SNV"
+atsnp_result$motifPos <- sapply(1:nrow(atsnp_result), function(i) {
+    paste(c(atsnp_result$ref_start[i] - k, atsnp_result$ref_end[i] - k), collapse = ",")
+})
+if (!is.null(regulator_col)) {
+    atsnp_result$geneSymbol <- atsnp_result$Regulator <- in_motifs[
+        match(atsnp_result$providerId, in_motifs[[motif_col]]),
+        regulator_col,
+        drop = TRUE
+    ]
+}
+write.table(
+    atsnp_result,
+    file = file.path(outdir, "atsnp.txt"),
+    sep = "\t", quote = FALSE, row.names = FALSE
+)
+log$info("Plotting variants ...")
+# Convert result to GRanges object
+atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
+atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
+atsnp_result$effect <- "strong"
+atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
+atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
+genome(atsnp_result) <- genome
+attributes(atsnp_result)$genome.package <- genome_pkg
+attributes(atsnp_result)$motifs <- mdb
+if (is.null(plots) || length(plots) == 0) {
+    atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
+    variants <- unique(atsnp_result$SNP_id)
+} else {
+    variants <- names(plots)
+}
+for (variant in variants) {
+    log$info("- Variant: {variant}")
+    if (is.null(plots[[variant]])) {
+        plots[[variant]] <- list(devpars = devpars, which = "TRUE")
+    }
+    if (is.null(plots[[variant]]$which)) {
+        plots[[variant]]$which <- "TRUE"
+    }
+    if (is.null(plots[[variant]]$devpars)) {
+        plots[[variant]]$devpars <- devpars
+    }
+    res <- atsnp_result[atsnp_result$SNP_id == variant, , drop = FALSE]
+    res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
+    plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
+}

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl