PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.21.0"
1	+ __version__ = "0.34.26"

biopipen/core/config.toml CHANGED Viewed

@@ -1,9 +1,17 @@
 # Executables or binaries
 [exe]
+# BeEM: https://github.com/kad-ecoli/BeEM
+beem = "BeEM"
 # bedtools to handle bed files
 bedtools = "bedtools"
 # bcftools to handle bcf/vcf files
 bcftools = "bcftools"
+# calculate_rmsd: https://github.com/charnley/rmsd
+calculate_rmsd = "calculate_rmsd"
+# cellranger
+cellranger = "cellranger"
+# cellsnp-lite
+cellsnp_lite = "cellsnp-lite"
 # Control-FREEC to call cnvs
 freec = "freec"
 # liftover coordinates across genomes
@@ -11,6 +19,8 @@ liftover = "liftOver"
 # gatk, installed via conda
 gatk = "gatk"
 gatk4 = "gatk"
+# google cloud sdk
+gcloud = "gcloud"
 # vdjtools, installed via conda
 vdjtools = "vdjtools"
 # cnvkit.py
@@ -21,10 +31,20 @@ cnvpytor = "cnvpytor"
 cnvnator2vcf = "cnvnator2VCF.pl"
 # convert
 convert = "convert"
+# fimo from meme
+fimo = "fimo"
+# MAXIT: https://sw-tools.rcsb.org/apps/MAXIT/
+maxit = "maxit"
+# MQuad: https://github.com/single-cell-genetics/MQuad
+mquad = "mquad"
 # wget
 wget = "wget"
 # aria2c
 aria2c = "aria2c"
+# plink
+plink = "plink"
+# plink2
+plink2 = "plink2"
 # tabix
 tabix = "tabix"
 # sambamba
@@ -59,6 +79,10 @@ liftover_chain = ""
 # tmpdir = ""
 [ref]
+# The reference for cellranger gex
+ref_cellranger_gex = ""
+# The reference for cellranger vdj
+ref_cellranger_vdj = ""
 # The reference genome
 reffa = ""
 # The directory with reference for each chromosome
@@ -78,6 +102,10 @@ genome = ""
 # Database file for scType
 # https://github.com/IanevskiAleksandr/sc-type/
 sctype_db = ""
+# TF Motif database
+tf_motifdb = ""
+# TF motif pairs
+tf_motifs = ""
 [misc]
 # Number of cores used for each job

biopipen/core/filters.py CHANGED Viewed

@@ -1,12 +1,17 @@
 """Additional filters for pipen"""
 from __future__ import annotations
+import re
 import shlex
 from pathlib import Path
 from typing import Any, List, Mapping
-from argx import Namespace
+from argx import Namespace  # pyright: ignore[reportPrivateImportUsage]
 from liquid.filters.manager import FilterManager
+from yunpath import CloudPath
+from pipen_report.filters import register_component, _tag
+# from .defaults import BIOPIPEN_DIR
 filtermanager = FilterManager()
@@ -14,6 +19,7 @@ filtermanager = FilterManager()
 @filtermanager.register
 def dict_to_cli_args(
     dic: Mapping[str, Any],
+    exclude: List[str] | None = None,
     prefix: str | None = None,
     sep: str | None = " ",
     dup_key: bool = True,
@@ -26,6 +32,7 @@ def dict_to_cli_args(
     Args:
         dic: The dict to convert
+        exclude: The keys to exclude before conversion (e.g. dashify)
         prefix: The prefix of the keys after conversion
             Defaults to `None`, mean `-` for short keys and `--` for long keys
         sep: The separator between key and value
@@ -36,6 +43,13 @@ def dict_to_cli_args(
             If `sep` is `None` or `=`, this must be True, otherwise an error
             will be raised
         join: Whether to join the arguments into a single string
+        start_key: The key to start the arguments
+            This is useful when you want to put some arguments at the beginning
+            of the command line
+        end_key: The key to end the arguments
+            This is useful when you want to put some arguments at the end
+            of the command line
+        dashify: Whether to replace `_` with `-` in the keys
     Returns:
         The converted string or list of strings
@@ -43,6 +57,9 @@ def dict_to_cli_args(
     if sep in [None, "="] and not dup_key:
         raise ValueError("`dup_key` must be True when sep is `None` or `=`")
+    if exclude:
+        dic = {k: v for k, v in dic.items() if k not in exclude}
     starts = []
     ends = []
     out = []
@@ -105,7 +122,7 @@ def dict_to_cli_args(
 def r(
     obj: Any,
     ignoreintkey: bool = True,
-    todot: str = None,
+    todot: str | None = None,
     sortkeys: bool = False,
     skip: int = 0,
     _i: int = 0,
@@ -156,12 +173,14 @@ def r(
             return "TRUE"
         if obj.upper() == "FALSE":
             return "FALSE"
-        if obj.upper() == "NA" or obj.upper() == "NULL":
+        if obj.upper() == "NA" or obj.upper() == "NULL" or obj == "None":
             return obj.upper()
+        if re.match(r"^\d+:\d+$", obj):
+            return obj
         if obj.startswith("r:") or obj.startswith("R:"):
             return str(obj)[2:]
         return repr(str(obj))
-    if isinstance(obj, Path):
+    if isinstance(obj, (Path, CloudPath)):
         return repr(str(obj))
     if isinstance(obj, (list, tuple, set)):
         if any(isinstance(i, dict) for i in obj):
@@ -206,3 +225,59 @@ def r(
         return r(vars(obj), ignoreintkey, todot, sortkeys, skip, _i)
     return repr(obj)
+@filtermanager.register
+def source_r(path: str | Path, chdir: bool = False) -> str:
+    """Source an R script.
+    In addition to generating `source(path)`, we also include the mtime for the script
+    to trigger the job not cached when the script is updated.
+    If your process is used in a cloud environment, it is recommended to
+    use the `read` filter to load the script content instead of sourcing it using
+    the `source` function in R to void the path issue (path could be different
+    in different environments).
+    Args:
+        path: The path to the R script
+    Returns:
+        The R code to source the script
+    """
+    path = Path(path)
+    mtime = int(path.stat().st_mtime)
+    return (
+        f"# Last modified: {mtime}\n"
+        # f"biopipen_dir = {r(BIOPIPEN_DIR)}\n"
+        f"source('{path}', chdir = {r(chdir)})"
+    )
+@register_component("pdf")
+def _render_pdf(
+    cont: Mapping[str, Any],
+    job: Mapping[str, Any],
+    level: int,
+) -> str:
+    """Render pdf report"""
+    # cont["src"] is required
+    height = cont.get("height", "600")
+    return _tag(
+        "embed",
+        src=str(cont["src"]),
+        type="application/pdf",
+        width="100%",
+        height=height,
+    )
+@register_component("gsea")
+def _render_gsea(
+    cont: Mapping[str, Any],
+    job: Mapping[str, Any],
+    level: int,
+) -> str:
+    """Render gsea report"""
+    # cont["dir"] is required
+    raise NotImplementedError()

biopipen/core/proc.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Provides a base class for the processes to subclass"""
-from diot import Diot
+from __future__ import annotations
+from diot import Diot  # type: ignore
 from liquid.defaults import SEARCH_PATHS
-from pipen import Proc as PipenProc
+from pipen import Proc as PipenProc  # type: ignore
 from pipen_filters.filters import FILTERS
 from .filters import filtermanager
@@ -23,5 +25,12 @@ class Proc(PipenProc):
     template_opts = {
         "globals": {**FILTERS, "biopipen_dir": str(BIOPIPEN_DIR)},
         "filters": {**FILTERS, **filtermanager.filters},
-        "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],
+        "search_paths": SEARCH_PATHS + [str(REPORT_DIR)],  # type: ignore
+    }
+    plugin_opts = {
+        "poplog_pattern": (
+            r"^(?P<level>INFO|WARN|WARNING|CRITICAL|ERROR|DEBUG?)\s*"
+            r"\[\d+-\d+-\d+ \d+:\d+:\d+\] (?P<message>.*)$"
+        )
     }

biopipen/core/testing.py CHANGED Viewed

@@ -1,12 +1,16 @@
 """Provide utilities for testing."""
 import tempfile
+from functools import wraps
 from pathlib import Path
 from pipen import Pipen
 TESTING_INDEX_INIT = 1
-TESTING_PARENT_DIR = tempfile.gettempdir()
-TESTING_DIR = f"{TESTING_PARENT_DIR}/biopipen-tests-%(index)s"
+TESTING_PARENT_DIR = Path(__file__).parent.parent.parent.joinpath("tests", "running")
+TESTING_PARENT_DIR.mkdir(parents=True, exist_ok=True)
+TESTING_DIR = str(TESTING_PARENT_DIR.joinpath("biopipen-tests-%(index)s"))
+RSCRIPT_DIR = TESTING_PARENT_DIR.joinpath("biopipen-tests-rscripts")
+RSCRIPT_DIR.mkdir(exist_ok=True)
 def _find_testing_index(new):
@@ -37,14 +41,82 @@ def _get_test_dirs(testfile, new):
     return name, workdir, outdir
-def get_pipeline(testfile, loglevel="debug", **kwargs):
+def get_pipeline(testfile, loglevel="debug", enable_report=False, **kwargs):
     """Get a pipeline for a test file"""
     name, workdir, outdir = _get_test_dirs(testfile, False)
+    report_plugin_prefix = "+" if enable_report else "-"
+    plugins = kwargs.pop("plugins", [])
+    if any("report" in p for p in plugins if isinstance(p, str)):
+        raise ValueError(
+            "Do not pass `report` plugin to `get_pipeline(plugins=[...])`, "
+            "use `enable_report` instead."
+        )
+    plugins.append(f"{report_plugin_prefix}report")
     kws = {
         "name": name,
         "workdir": workdir,
         "outdir": outdir,
         "loglevel": loglevel,
+        "plugins": plugins,
     }
     kws.update(kwargs)
     return Pipen(**kws)
+def _run_rcode(rcode: str) -> str:
+    """Run R code and return the output"""
+    import hashlib
+    import textwrap
+    import subprocess as sp
+    # Use sha256 of rcode to name the file
+    rcode_hash = hashlib.sha256(rcode.encode()).hexdigest()
+    script_file = RSCRIPT_DIR.joinpath(f"rcode-{rcode_hash}.R")
+    script_file.write_text(rcode)
+    p = sp.Popen(["Rscript", str(script_file)], stdout=sp.PIPE, stderr=sp.PIPE)
+    out, err = p.communicate()
+    if p.returncode != 0:
+        out = (
+            f"R codefile:\n  {script_file}\n"
+            f"Error:\n{textwrap.indent(err.decode(), '  ')}"
+        )
+        return out
+    return out.decode().strip()
+def r_test(mem: callable) -> callable:
+    """A decorator to test R code"""
+    @wraps(mem)
+    def decorator(self, *args, **kwargs):
+        rcode = mem(self, *args, **kwargs)
+        source = getattr(self, "SOURCE_FILE", None)
+        expect = (
+            "expect <- function(expr, ...) {\n"
+            "  if (!expr) {\n"
+            "    msg <- lapply(\n"
+            "      list(...),\n"
+            "      function(x) { ifelse(is.null(x), 'NULL', x) }\n"
+            "    )\n"
+            "    stop(paste0(unlist(msg), collapse = ' '))\n"
+            "  }\n"
+            "}\n"
+        )
+        rcode = f"{expect}\n\n{rcode}\n\ncat('PASSED')\n"
+        if source is not None:
+            if not isinstance(source, (list, tuple)):
+                source = [source]
+            libs = "\n".join([f"suppressWarnings(source('{s}'))" for s in source])
+            rcode = f'{libs}\n\n{rcode}'
+        out = _run_rcode(rcode)
+        self.assertEqual(
+            out,
+            "PASSED",
+            "\n-----------------------------\n"
+            f"{out}"
+            "\n-----------------------------\n"
+        )
+    return decorator

biopipen/ns/bam.py CHANGED Viewed

@@ -4,6 +4,9 @@ from ..core.proc import Proc
 from ..core.config import config
+# +-------------------------------------------------------------------+
+# | CNV callers                                                       |
+# +-------------------------------------------------------------------+
 class CNVpytor(Proc):
     """Detect CNV using CNVpytor
@@ -17,7 +20,6 @@ class CNVpytor(Proc):
     Envs:
         cnvpytor: Path to cnvpytor
-        cnvnator2vcf: Path to CNVnator2VCF.pl to convert the result to VCF file
         samtools: Path to samtools, used to index bam file in case it's not
         ncores: Number of cores to use (`-j` for cnvpytor)
         refdir: The directory containing the fasta file for each chromosome
@@ -27,21 +29,19 @@ class CNVpytor(Proc):
         binsizes: The binsizes
         snp: How to read snp data
         filters: The filters to filter the result
-            See - https://github.com/abyzovlab/CNVpytor/blob/master
-            /GettingStarted.md#predicting-cnv-regions
+            See - https://github.com/abyzovlab/CNVpytor/blob/master/GettingStarted.md#predicting-cnv-regions
         mask_snps: Whether mask 1000 Genome snps
         baf_nomask: Do not use P mask in BAF histograms
     Requires:
         cnvpytor:
            - check: {{proc.envs.cnvpytor}} --version
-    """
+    """  # noqa: E501
     input = "bamfile:file, snpfile:file"
     output = "outdir:dir:{{in.bamfile | stem}}.cnvpytor"
     lang = config.lang.python
     envs = {
         "cnvpytor": config.exe.cnvpytor,
-        "cnvnator2vcf": config.exe.cnvnator2vcf,
         "samtools": config.exe.samtools,
         "ncores": config.misc.ncores,
         "refdir": config.ref.refdir,
@@ -152,7 +152,7 @@ class CNAClinic(Proc):
             A list of sample names
             A float number (0 < x <= 1), the fraction of samples to use
             A integer number (x > 1), the number of samples to use
-        binsize: Directly use this binsize for CNAClinic, in kbp.
+        binsize: Directly use this binsize for CNAClinic, in bp.
         genome: The genome assembly
         run_args: The arguments for CNAClinic::runSegmentation
         plot_args: The arguments for CNAClinic::plotSampleData
@@ -183,6 +183,9 @@ class CNAClinic(Proc):
     }
+# +-------------------------------------------------------------------+
+# | Bam processing tools                                              |
+# +-------------------------------------------------------------------+
 class BamSplitChroms(Proc):
     """Split bam file by chromosomes
@@ -262,3 +265,142 @@ class BamMerge(Proc):
         "sort_args": [],
     }
     script = "file://../scripts/bam/BamMerge.py"
+class BamSampling(Proc):
+    """Keeping only a fraction of read pairs from a bam file
+    Input:
+        bamfile: The bam file
+    Output:
+        outfile: The output bam file
+    Envs:
+        ncores: Number of cores to use
+        samtools: Path to samtools executable
+        tool: The tool to use, currently only "samtools" is supported
+        fraction (type=float): The fraction of reads to keep.
+            If `0 < fraction <= 1`, it's the fraction of reads to keep.
+            If `fraction > 1`, it's the number of reads to keep.
+            Note that when fraction > 1, you may not get the exact number
+            of reads specified but a close number.
+        seed: The seed for random number generator
+        index: Whether to index the output bam file
+        sort: Whether to sort the output bam file
+        sort_args: The arguments for sorting bam file using `samtools sort`.
+            These keys are not allowed: `-o`, `-@`,
+            and `--threads`, as they are managed by the script.
+    """
+    input = "bamfile:file"
+    output = "outfile:file:{{in.bamfile | stem}}.sampled{{envs.fraction}}.bam"
+    lang = config.lang.python
+    envs = {
+        "ncores": config.misc.ncores,
+        "samtools": config.exe.samtools,
+        "tool": "samtools",
+        "fraction": None,
+        "seed": 8525,
+        "index": True,
+        "sort": True,
+        "sort_args": [],
+    }
+    script = "file://../scripts/bam/BamSampling.py"
+class BamSubsetByBed(Proc):
+    """Subset bam file by the regions in a bed file
+    Input:
+        bamfile: The bam file
+        bedfile: The bed file
+    Output:
+        outfile: The output bam file
+    Envs:
+        ncores: Number of cores to use
+        samtools: Path to samtools executable
+        tool: The tool to use, currently only "samtools" is supported
+        index: Whether to index the output bam file
+    """
+    input = "bamfile:file, bedfile:file"
+    output = "outfile:file:{{in.bamfile | stem}}-subset.bam"
+    lang = config.lang.python
+    envs = {
+        "ncores": config.misc.ncores,
+        "samtools": config.exe.samtools,
+        "tool": "samtools",
+        "index": True,
+    }
+    script = "file://../scripts/bam/BamSubsetByBed.py"
+class BamSort(Proc):
+    """Sort bam file
+    Input:
+        bamfile: The bam file
+    Output:
+        outfile: The output bam file
+    Envs:
+        tool (choice): The tool to use.
+            - samtools: Use `samtools`
+            - sambamba: Use `sambamba`
+        ncores (type=int): Number of cores to use
+        samtools: Path to samtools executable
+        sambamba: Path to sambamba executable
+        tmpdir: The temporary directory to use
+        byname (flag): Whether to sort by read name
+        index (flag): Whether to index the output bam file
+            The index file will be created in the same directory as the output
+            bam file
+        <more>: Other arguments passed to the sorting tool
+            See `samtools sort` or `sambamba sort`
+    """
+    input = "bamfile:file"
+    output = "outfile:file:{{in.bamfile | stem}}.sorted.bam"
+    lang = config.lang.python
+    envs = {
+        "tool": "samtools",
+        "ncores": config.misc.ncores,
+        "samtools": config.exe.samtools,
+        "sambamba": config.exe.sambamba,
+        "tmpdir": config.path.tmpdir,
+        "byname": False,
+        "index": True,
+    }
+    script = "file://../scripts/bam/BamSort.py"
+class SamtoolsView(Proc):
+    """View bam file using samtools, mostly used for filtering
+    This is a wrapper for `samtools view` command.
+    It will create a new bam file with the same name as the input bam file.
+    Input:
+        bamfile: The bam file
+    Output:
+        outfile: The output bam file
+    Envs:
+        ncores: Number of cores to use
+        samtools: Path to samtools executable
+        index: Whether to index the output bam file
+            Requires the input bam file to be sorted.
+        <more>: Other arguments passed to the view tool
+            See `samtools view` or `sambamba view`.
+    """
+    input = "bamfile:file"
+    output = "outfile:file:{{in.bamfile | stem}}.bam"
+    lang = config.lang.python
+    envs = {
+        "ncores": config.misc.ncores,
+        "samtools": config.exe.samtools,
+        "index": True,
+    }
+    script = "file://../scripts/bam/SamtoolsView.py"

biopipen/ns/bed.py CHANGED Viewed

@@ -163,3 +163,78 @@ class BedtoolsMerge(Proc):
         "bedtools": config.exe.bedtools,
     }
     script = "file://../scripts/bed/BedtoolsMerge.py"
+class BedtoolsIntersect(Proc):
+    """Find the intersection of two BED files, using `bedtools intersect`
+    See <https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html>
+    Input:
+        afile: The first BED file
+        bfile: The second BED file
+    Output:
+        outfile: The output BED file
+    Envs:
+        bedtools: The path to bedtools
+        sort: Sort `afile` and `bfile` before intersecting.
+            By default, `-sorted` is used, assuming the input files are sorted.
+            If error occurs, try to set `sort` to `True`.
+        chrsize: Alias for `g` in `bedtools intersect`.
+        postcmd: The command to be executed for the output file after intersecting.
+            You can use `$infile`, `$outfile`, and `$outdir` to refer to the input,
+            output, and output directory, respectively.
+        <more>: Other options to be passed to `bedtools intersect`
+    """  # noqa: E501
+    input = "afile:file", "bfile:file"
+    output = "outfile:file:{{in.afile | stem0}}_{{in.bfile | stem0}}-intersect.bt"
+    lang = config.lang.python
+    envs = {
+        "bedtools": config.exe.bedtools,
+        "sort": False,
+        "chrsize": config.ref.chrsize,
+        "postcmd": None,
+    }
+    script = "file://../scripts/bed/BedtoolsIntersect.py"
+class BedtoolsMakeWindows(Proc):
+    """Make windows from a BED file or genome size file, using `bedtools makewindows`.
+    Input:
+        infile: The input BED file or a genome size file
+            Type will be detected by the number of columns in the file.
+            If it has 3+ columns, it is treated as a BED file, otherwise
+            a genome size file.
+    Output:
+        outfile: The output BED file
+    Envs:
+        bedtools: The path to bedtools
+        window (type=int): The size of the windows
+        step (type=int): The step size of the windows
+        nwin (type=int): The number of windows to be generated
+            Exclusive with `window` and `step`.
+            Either `nwin` or `window` and `step` should be provided.
+        reverse (flag): Reverse numbering of windows in the output
+        name (choice): How to name the generated windows/regions
+            - none: Do not add any name
+            - src: Use the source interval's name
+            - winnum: Use the window number
+            - srcwinnum: Use the source interval's name and window number
+    """  # noqa: E501
+    input = "infile:file"
+    output = "outfile:file:{{in.infile | stem}}_windows.bed"
+    lang = config.lang.python
+    envs = {
+        "bedtools": config.exe.bedtools,
+        "window": None,
+        "step": None,
+        "nwin": None,
+        "reverse": False,
+        "name": "none",
+    }
+    script = "file://../scripts/bed/BedtoolsMakeWindows.py"

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl