PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/ns/vcf.py CHANGED Viewed

@@ -335,6 +335,8 @@ class TruvariBench(Proc):
     """Run `truvari bench` to compare a VCF with CNV calls and
     base CNV standards
+    Requires truvari v4+
     See https://github.com/ACEnglish/truvari/wiki/bench
     Input:
@@ -358,7 +360,7 @@ class TruvariBench(Proc):
         "truvari": config.exe.truvari,
         "ref": config.ref.reffa,
         "refdist": 500,
-        "pctsim": 0.7,
+        "pctseq": 0.7,
         "pctsize": 0.7,
         "pctovl": 0.0,
         "typeignore": False,
@@ -402,7 +404,7 @@ class TruvariBenchSummary(Proc):
     output = "outdir:dir:truvari_bench.summary"
     lang = config.lang.rscript
     envs = {
-        "plots": ["call cnt", "base cnt", "precision", "recall", "f1"],
+        "plots": ["comp cnt", "base cnt", "precision", "recall", "f1"],
         "devpars": None,
     }
     script = "file://../scripts/vcf/TruvariBenchSummary.R"
@@ -414,6 +416,8 @@ class TruvariConsistency(Proc):
     See https://github.com/ACEnglish/truvari/wiki/consistency
+    Requires truvari v4+
     Input:
         vcfs: The vcf files with CNV calls
@@ -439,3 +443,233 @@ class TruvariConsistency(Proc):
     envs = {"truvari": config.exe.truvari, "heatmap": {}}
     script = "file://../scripts/vcf/TruvariConsistency.R"
     plugin_opts = {"report": "file://../reports/vcf/TruvariConsistency.svelte"}
+class BcftoolsAnnotate(Proc):
+    """Add or remove annotations from VCF files
+    See also: <https://samtools.github.io/bcftools/bcftools.html#annotate>
+    Input:
+        infile: The input VCF file
+        annfile: The annotation file.
+            Currently only VCF files are supported.
+    Output:
+        outfile: The VCF file with annotations added or removed.
+    Envs:
+        bcftools: Path to bcftools
+        tabix: Path to tabix, used to index infile and annfile
+        annfile: The annotation file. If `in.annfile` is provided,
+            this is ignored
+        ncores (type=int): Number of cores (`--threads`) to use
+        columns (auto): Comma-separated or list of columns or tags to carry over from
+            the annotation file. Overrides `-c, --columns`
+        remove (auto): Remove the specified columns from the input file
+        header (list): Headers to be added
+        gz (flag): Whether to gzip the output file
+        index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
+        <more>: Other arguments for `bcftools annotate`
+            See also <https://samtools.github.io/bcftools/bcftools.html#annotate>
+            Note that the underscore `_` will be replaced with dash `-` in the
+            argument name.
+    """
+    input = "infile:file, annfile:file"
+    output = (
+        "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
+        "{{'.gz' if envs.index or envs.gz else ''}}"
+    )
+    lang = config.lang.python
+    envs = {
+        "bcftools": config.exe.bcftools,
+        "tabix": config.exe.tabix,
+        "annfile": None,
+        "columns": [],
+        "remove": [],
+        "header": [],
+        "gz": True,
+        "index": True,
+        "ncores": config.misc.ncores,
+    }
+    script = "file://../scripts/vcf/BcftoolsAnnotate.py"
+class BcftoolsFilter(Proc):
+    """Apply fixed threshold filters to VCF files
+    Input:
+        infile: The input VCF file
+    Output:
+        outfile: The filtered VCF file. If the `in.infile` is gzipped, this is
+            gzipped as well.
+    Envs:
+        bcftools: Path to bcftools
+        tabix: Path to tabix, used to index infile/outfile
+        ncores (type=int): Number of cores (`--threads`) to use
+        keep: Whether we should keep the filtered variants or not.
+            If True, the filtered variants will be kept in the output file, but
+            with a new FILTER.
+        includes: and
+        excludes: include/exclude only sites for which EXPRESSION is true.
+            See: <https://samtools.github.io/bcftools/bcftools.html#expressions>
+            If provided, `envs.include/exclude` will be ignored.
+            If `str`/`list` used, The filter names will be `Filter_<type>_<index>`.
+            A dict is used where keys are filter names and values are expressions
+        gz (flag): Whether to gzip the output file
+        index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
+        <more>: Other arguments for `bcftools filter`
+            See also <https://samtools.github.io/bcftools/bcftools.html#filter>
+    """
+    input = "infile:file"
+    output = (
+        "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
+        "{{'.gz' if envs.index or envs.gz else ''}}"
+    )
+    lang = config.lang.python
+    envs = {
+        "bcftools": config.exe.bcftools,
+        "tabix": config.exe.tabix,
+        "ncores": config.misc.ncores,
+        "keep": True,
+        "includes": None,
+        "excludes": None,
+        "gz": True,
+        "index": True,
+    }
+    script = "file://../scripts/vcf/BcftoolsFilter.py"
+class BcftoolsSort(Proc):
+    """Sort VCF files using `bcftools sort`.
+    `bcftools sort` is used to sort VCF files by chromosome and position based on the
+    order of contigs in the header.
+    Here we provide a chrsize file to first sort the contigs in the header and then
+    sort the VCF file using `bcftools sort`.
+    Input:
+        infile: The input VCF file
+    Output:
+        outfile: The sorted VCF file.
+    Envs:
+        bcftools: Path to bcftools
+        tabix: Path to tabix, used to index infile/outfile
+        ncores (type=int): Number of cores (`--threads`) to use
+        gz (flag): Whether to gzip the output file
+        index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
+        chrsize: The chromosome size file, from which the chromosome order is used
+            to sort the contig in the header first.
+            If not provided, `bcftools sort` will be used directly.
+        notfound (choice): What if the contig in the VCF file is not found in the
+            `chrsize` file.
+            - error: Report error
+            - remove: Remove the contig from the header.
+                Note that if there are records with the removed contig, an error will
+                be raised by `bcftools sort`
+            - start: Move the contig to the start of the contigs from `chrsize`
+            - end: Move the contig to the end of the contigs from `chrsize`
+        <more>: Other arguments for `bcftools sort`. For example `max_mem`.
+            See also <https://samtools.github.io/bcftools/bcftools.html#sort>
+    """
+    input = "infile:file"
+    output = (
+        "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
+        "{{'.gz' if envs.index or envs.gz else ''}}"
+    )
+    lang = config.lang.python
+    envs = {
+        "bcftools": config.exe.bcftools,
+        "tabix": config.exe.tabix,
+        "ncores": config.misc.ncores,
+        "chrsize": config.ref.chrsize,
+        "notfound": "remove",
+        "gz": True,
+        "index": True,
+    }
+    script = "file://../scripts/vcf/BcftoolsSort.py"
+class BcftoolsMerge(Proc):
+    """Merge multiple VCF files using `bcftools merge`.
+    Input:
+        infiles: The input VCF files
+    Output:
+        outfile: The merged VCF file.
+    Envs:
+        bcftools: Path to bcftools
+        tabix: Path to tabix, used to index infile/outfile
+        ncores (type=int): Number of cores (`--threads`) to use
+        gz (flag): Whether to gzip the output file
+        index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
+        <more>: Other arguments for `bcftools merge`.
+            See also <https://samtools.github.io/bcftools/bcftools.html#merge>
+    """
+    input = "infiles:files"
+    output = (
+        "outfile:file:{{in.infiles | first | stem | append: '_etc_merged'}}.vcf"
+        "{{'.gz' if envs.index or envs.gz else ''}}"
+    )
+    lang = config.lang.python
+    envs = {
+        "bcftools": config.exe.bcftools,
+        "tabix": config.exe.tabix,
+        "ncores": config.misc.ncores,
+        "gz": True,
+        "index": True,
+    }
+    script = "file://../scripts/vcf/BcftoolsMerge.py"
+class BcftoolsView(Proc):
+    """View, subset and filter VCF files by position and filtering expression.
+    Also convert between VCF and BCF.
+    Input:
+        infile: The input VCF file
+        regions_file: The region file used to subset the input VCF file.
+        samples_file: The samples file used to subset the input VCF file.
+    Output:
+        outfile: The output VCF file.
+    Envs:
+        bcftools: Path to bcftools
+        tabix: Path to tabix, used to index infile/outfile
+        ncores (type=int): Number of cores (`--threads`) to use
+        regions_file: The region file used to subset the input VCF file.
+            If `in.regions_file` is provided, this is ignored.
+        samples_file: The samples file used to subset the input VCF file.
+            If `in.samples_file` is provided, this is ignored.
+        gz (flag): Whether to gzip the output file
+        index (flag): Whether to index the output file (tbi) (`envs.gz` forced to True)
+        <more>: Other arguments for `bcftools view`.
+            See also https://samtools.github.io/bcftools/bcftools.html#view
+            Note that the underscore `_` will be replaced with dash `-` in the
+            argument name.
+    """
+    input = "infile:file, regions_file:file, samples_file:file"
+    output = (
+        "outfile:file:{{in.infile | stem: 'gz'}}.vcf"
+        "{{'.gz' if envs.index or envs.gz else ''}}"
+    )
+    lang = config.lang.python
+    envs = {
+        "bcftools": config.exe.bcftools,
+        "tabix": config.exe.tabix,
+        "ncores": config.misc.ncores,
+        "regions_file": None,
+        "samples_file": None,
+        "gz": True,
+        "index": True,
+    }
+    script = "file://../scripts/vcf/BcftoolsView.py"

biopipen/ns/web.py CHANGED Viewed

@@ -13,8 +13,11 @@ class Download(Proc):
         outfile: The file downloaded
     Envs:
-        tool: Which tool to use to download the data
-            wget, aria2c or python's urllib
+        tool (choice): Which tool to use to download the data
+            - wget: Use wget
+            - aria2c: Use aria2c
+            - urllib: Use python's urllib
+            - aria: Alias for aria2c
         wget: Path to wget
         aria2c: Path to aria2c
         args: The arguments to pass to the tool
@@ -28,8 +31,17 @@ class Download(Proc):
     """
     input = "url"
     output = (
+        # Need to replace http:// and https:// to avoid cloudpathlib.AnyPath to get
+        # the basename for something like "https://example.com/data/?file=datafile.txt"
+        # as data, but "?file=datafile.txt"
         "outfile:file:"
-        "{{in.url | basename | replace: '%2E', '.' | slugify: separator='.'}}"
+        """{{in.url
+            | replace: 'http://', ''
+            | replace: 'https://', ''
+            | basename
+            | url_decode
+            | slugify: separator='.', lowercase=False, regex_pattern='[^-a-zA-Z0-9_]+'
+        }}"""
     )
     lang = config.lang.python
     envs = {
@@ -43,7 +55,10 @@ class Download(Proc):
 class DownloadList(Proc):
-    """Download data from URLs in a file
+    """Download data from URLs in a file.
+    This does not work by iterating over the URLs in the file. The whole file is
+    passed to `wget` or `aria2c` at once.
     Input:
         urlfile: The file containing the URLs to download data from
@@ -52,8 +67,11 @@ class DownloadList(Proc):
         outdir: The directory containing the downloaded files
     Envs:
-        tool: Which tool to use to download the data
-            wget, aria2c or python's urllib
+        tool (choice): Which tool to use to download the data
+            - wget: Use wget
+            - aria2c: Use aria2c
+            - urllib: Use python's urllib
+            - aria: Alias for aria2c
         wget: Path to wget
         aria2c: Path to aria2c
         args: The arguments to pass to the tool
@@ -76,3 +94,76 @@ class DownloadList(Proc):
         "ncores": config.misc.ncores,
     }
     script = "file://../scripts/web/DownloadList.py"
+class GCloudStorageDownloadFile(Proc):
+    """Download file from Google Cloud Storage
+    Before using this, make sure you have the `gcloud` tool installed and
+    logged in with the appropriate credentials using `gcloud auth login`.
+    Also make sure you have [`google-crc32c`](https://pypi.org/project/google-crc32c/)
+    installed to verify the integrity of the downloaded files.
+    Input:
+        url: The URL to download data from.
+            It should be in the format gs://bucket/path/to/file
+    Output:
+        outfile: The file downloaded
+    Envs:
+        gcloud: Path to gcloud
+        args (ns): Other arguments to pass to the `gcloud storage cp` command
+            - do_not_decompress (flag): Do not decompress the file.
+            - <more>: More arguments to pass to the `gcloud storage cp` command
+                See `gcloud storage cp --help` for more information
+    """
+    input = "url:var"
+    output = "outfile:file:{{in.url | replace: 'gs://', '/' | basename}}"
+    lang = config.lang.python
+    envs = {
+        "gcloud": config.exe.gcloud,
+        "args": {"do_not_decompress": True},
+    }
+    script = "file://../scripts/web/GCloudStorageDownloadFile.py"
+class GCloudStorageDownloadBucket(Proc):
+    """Download all files from a Google Cloud Storage bucket
+    Before using this, make sure you have the `gcloud` tool installed and
+    logged in with the appropriate credentials using `gcloud auth login`.
+    Note that this will not use the `--recursive` flag of `gcloud storage cp`.
+    The files will be listed and downloaded one by one so that they can be parallelized.
+    Also make sure you have [`google-crc32c`](https://pypi.org/project/google-crc32c/)
+    installed to verify the integrity of the downloaded files.
+    Input:
+        url: The URL to download data from.
+            It should be in the format gs://bucket
+    Output:
+        outdir: The directory containing the downloaded files
+    Envs:
+        gcloud: Path to gcloud
+        keep_structure (flag): Keep the directory structure of the bucket
+        ncores (type=int): The number of cores to use to download the files in parallel
+        args (ns): Other arguments to pass to the `gcloud storage cp` command
+            - do_not_decompress (flag): Do not decompress the file.
+            - <more>: More arguments to pass to the `gcloud storage cp` command
+                See `gcloud storage cp --help` for more information
+    """
+    input = "url:var"
+    output = "outdir:dir:{{in.url | replace: 'gs://', ''}}"
+    lang = config.lang.python
+    envs = {
+        "gcloud": config.exe.gcloud,
+        "keep_structure": True,
+        "ncores": config.misc.ncores,
+        "args": {"do_not_decompress": True},
+    }
+    script = "file://../scripts/web/GCloudStorageDownloadBucket.py"

biopipen/reports/bam/CNVpytor.svelte CHANGED Viewed

@@ -4,19 +4,16 @@
     import { Tabs, Tab, TabContent } from "$ccs";
 </script>
-{% for case in envs.cases %}
-<h1>{{case}}</h1>
-{%  for binsize in envs.cases[case].binsizes %}
-<h2>Binsize: {{binsize}}</h2>
+{% for binsize in envs.binsizes %}
+<h1>Binsize: {{binsize}}</h1>
 {% from_ os.path import join, basename %}
 {% assign manplots = [] %}
 {% assign circplots = [] %}
 {% assign samples = [] %}
 {% for job in jobs %}
-{%  set manplot = job.out.outdir | joinpaths: case, "manhattan."+str(binsize)+".*.png" | glob %}
-{%  set circplot = job.out.outdir | joinpaths: case, "circular."+str(binsize)+".*.png" | glob %}
+{%  set manplot = job.out.outdir | glob: "manhattan."+str(binsize)+".*.png" %}
+{%  set circplot = job.out.outdir | glob: "circular."+str(binsize)+".*.png" %}
 {%  set _ = manplots.append(manplot[0]) %}
 {%  if len(circplot) > 0 %}
 {%      set _ = circplots.append(circplot[0]) %}
@@ -45,6 +42,4 @@
     </div>
 </Tabs>
-{%  endfor %}
 {% endfor %}

biopipen/reports/cellranger/CellRangerCount.svelte ADDED Viewed

@@ -0,0 +1,18 @@
+{% from "utils/misc.liq" import report_jobs, table_of_images -%}
+<script>
+    import { Iframe } from "$libs";
+</script>
+{%- macro report_job(job, h=1) -%}
+    <Iframe
+        src="{{job.out.outdir}}/outs/web_summary.html"
+        width="100%"
+        frameborder="0"
+        style="min-height: 60vh" />
+{%- endmacro -%}
+{%- macro head_job(job) -%}
+    <h1>{{job.out.outdir | basename | escape}}</h1>
+{%- endmacro -%}
+{{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/cellranger/CellRangerSummary.svelte ADDED Viewed

@@ -0,0 +1,16 @@
+{% from "utils/misc.liq" import report_jobs -%}
+<script>
+    import { Image, DataTable, Descr } from "$libs";
+</script>
+{%- macro report_job(job, h=1) -%}
+    {{ job | render_job: h=h }}
+{%- endmacro -%}
+{%- macro head_job(job) -%}
+    <h1>{{job.out.outdir | stem | escape}}</h1>
+{%- endmacro -%}
+{{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/cellranger/CellRangerVdj.svelte ADDED Viewed

@@ -0,0 +1,18 @@
+{% from "utils/misc.liq" import report_jobs, table_of_images -%}
+<script>
+    import { Iframe } from "$libs";
+</script>
+{%- macro report_job(job, h=1) -%}
+    <Iframe
+        src="{{job.out.outdir}}/outs/web_summary.html"
+        width="100%"
+        frameborder="0"
+        style="min-height: 60vh" />
+{%- endmacro -%}
+{%- macro head_job(job) -%}
+    <h1>{{job.out.outdir | basename | escape}}</h1>
+{%- endmacro -%}
+{{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/cnvkit/CNVkitDiagram.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/cnvkit/CNVkitHeatmap.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/cnvkit/CNVkitScatter.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- for pngfile in job.out.outdir | joinpaths: '*.png' | glob -%}
+    {%- for pngfile in job.out.outdir | glob: '*.png' -%}
     <h{{h+1}}>{{pngfile | stem0 | title}}</h{{h+1}}>
     <Image src="{{pngfile}}" />
     {%- endfor -%}

biopipen/reports/common.svelte ADDED Viewed

@@ -0,0 +1,15 @@
+{% from "utils/misc.liq" import report_jobs, table_of_images -%}
+<script>
+    import { Image, DataTable, Descr } from "$libs";
+    import { Tabs, Tab, TabContent, UnorderedList, ListItem, InlineNotification } from "$ccs";
+</script>
+{%- macro report_job(job, h=1) -%}
+    {{ job | render_job: h=h }}
+{%- endmacro -%}
+{%- macro head_job(job) -%}
+    <h1>{{job.in | attr: "values" | call | first | stem0 | escape}}</h1>
+{%- endmacro -%}
+{{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/protein/ProdigySummary.svelte ADDED Viewed

@@ -0,0 +1,16 @@
+{% from "utils/misc.liq" import report_jobs -%}
+<script>
+    import { Image, DataTable, Descr } from "$libs";
+</script>
+{%- macro report_job(job, h=1) -%}
+    {{ job | render_job: h=h }}
+{%- endmacro -%}
+{%- macro head_job(job) -%}
+    <h1>{{job.out.outdir | stem | escape}}</h1>
+{%- endmacro -%}
+{{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/scrna/CellsDistribution.svelte CHANGED Viewed

@@ -1,50 +1,15 @@
 {% from "utils/misc.liq" import report_jobs, table_of_images -%}
 <script>
-    import { Image } from "$libs";
+    import { Image, DataTable, Descr } from "$libs";
+    import { Tabs, Tab, TabContent } from "$ccs";
 </script>
 {%- macro report_job(job, h=1) -%}
-    {%- set secdirs = job.out.outdir | glob: "*" -%}
-    {%- if len(secdirs) == 1 -%}
-        {%- set secname = secdirs | first | basename -%}
-        {%- if secdirs[0] | joinpaths: "venn.png" | exists -%}
-            {%- if secname == "DEFAULT" -%}
-                <h{{h}}>Case overlapping</h{{h}}>
-            {%- else -%}
-                <h{{h}}>{{ secname | escape }} - Case overlapping</h{{h}}>
-            {%- endif -%}
-            {{ table_of_images(
-                [joinpaths(secdirs[0], "venn.png"), joinpaths(secdirs[0], "upset.png")],
-                ["Venn plot", "Upset plot"]) }}
-        {%- endif -%}
-        {%- for plotfile in secdirs[0] | glob: "case-*.png" -%}
-            {%- if secname == "DEFAULT" -%}
-                <h{{h}}>{{ plotfile | stem | escape }}</h{{h}}>
-            {%- else -%}
-                <h{{h}}>{{ secname | escape }} - {{ plotfile | stem | escape }}</h{{h}}>
-            {%- endif -%}
-            <Image src={{plotfile | quote}} />
-        {%- endfor -%}
-    {%- else -%}
-        {%- for secdir in secdirs -%}
-            {%- set sec = secdir | basename -%}
-            <h{{h}}>{{sec | escape}}</h{{h}}>
-            {%- if secdir | joinpaths: "venn.png" | exists -%}
-                <h{{h+1}}>Case overlapping</h{{h+1}}>
-                {{ table_of_images(
-                    [joinpaths(secdir, "venn.png"), joinpaths(secdir, "upset.png")],
-                    ["Venn plot", "Upset plot"]) }}
-            {%- endif -%}
-            {%- for plotfile in secdir | glob: "case-*.png" -%}
-                <h{{h+1}}>{{ plotfile | stem }}</h{{h+1}}>
-                <Image src={{plotfile | quote}} />
-            {%- endfor -%}
-        {%- endfor -%}
-    {%- endif -%}
+    {{ job | render_job: h=h }}
 {%- endmacro -%}
 {%- macro head_job(job) -%}
-<h1>{{job.in.srtobj | stem0 | escape}}</h1>
+    <h1>{{job.in.srtobj | stem0 | escape}}</h1>
 {%- endmacro -%}
 {{ report_jobs(jobs, head_job, report_job) }}

biopipen/reports/scrna/DimPlots.svelte CHANGED Viewed

@@ -4,7 +4,7 @@
 </script>
 {%- macro report_job(job, h=1) -%}
-{% set images = job.out.outdir | joinpaths: "*.png" | glob %}
+{% set images = job.out.outdir | glob: "*.png" %}
 {{ table_of_images(images) }}
 {%- endmacro -%}

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl