bioflowkit 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bioflowkit-0.3.0/.gitattributes +20 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/nightly-smoke.yml +10 -2
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.gitignore +1 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/CHANGELOG.md +202 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/PKG-INFO +2 -2
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/README.md +1 -1
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/__init__.py +3 -1
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/runner.py +86 -7
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/ani_matrix.py +9 -5
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/epigenomics/atac_seq.py +5 -3
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/epigenomics/chip_seq.py +11 -8
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/genome_assembly/eukaryote_assembly.py +14 -7
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/metagenomics/metagenome_assembly.py +6 -2
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/metagenomics/metagenomics_profile.py +4 -3
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/methylation/bismark_wgbs.py +67 -8
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/proteomics/proteomics_dda.py +5 -2
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/rnaseq_deg/rnaseq_deg.py +28 -8
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/variant_calling/germline_variants.py +46 -13
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/variant_calling/joint_genotyping.py +51 -10
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/__init__.py +43 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_paths.py +35 -1
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/conda-recipe/meta.yaml +1 -1
- bioflowkit-0.3.0/data/test/cafe_small/README.md +21 -0
- bioflowkit-0.3.0/data/test/cafe_small/families.tsv +61 -0
- bioflowkit-0.3.0/data/test/cafe_small/tree.nwk +1 -0
- bioflowkit-0.3.0/data/test/genomes_small/README.md +28 -0
- bioflowkit-0.3.0/data/test/genomes_small/genome1.fna +78 -0
- bioflowkit-0.3.0/data/test/genomes_small/genome2.fna +78 -0
- bioflowkit-0.3.0/data/test/gwas_small/README.md +22 -0
- bioflowkit-0.3.0/data/test/gwas_small/gene_presence_absence.csv +13 -0
- bioflowkit-0.3.0/data/test/gwas_small/traits.csv +11 -0
- bioflowkit-0.3.0/data/test/methyl_small/README.md +29 -0
- bioflowkit-0.3.0/data/test/methyl_small/genome.fa +79 -0
- bioflowkit-0.3.0/data/test/methyl_small/sample01_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/methyl_small/sample01_R2.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/phix_small/README.md +41 -0
- bioflowkit-0.3.0/data/test/phix_small/reference.fa +79 -0
- bioflowkit-0.3.0/data/test/phix_small/sim_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/phix_small/sim_R2.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/phylo_small/README.md +25 -0
- bioflowkit-0.3.0/data/test/phylo_small/gene_presence_absence.csv +9 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g1.ffn +67 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g1.gff +100 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g2.ffn +67 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g2.gff +100 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g3.ffn +72 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g3.gff +101 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g4.ffn +67 -0
- bioflowkit-0.3.0/data/test/phylo_small/gffs/g4.gff +101 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/README.md +22 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/ctl1_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/ctl1_R2.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/ctl2_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/ctl2_R2.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/transcriptome.fa +552 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/trt1_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/trt1_R2.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/trt2_R1.fastq.gz +0 -0
- bioflowkit-0.3.0/data/test/rnaseq_small/trt2_R2.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/MAINTAINER.md +1 -1
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/benchmarks/nfcore-concordance.md +2 -2
- bioflowkit-0.3.0/docs/reference/e2e-coverage.md +68 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/reference/recipes.md +38 -21
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/reference/tools.md +50 -50
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/mkdocs.yml +1 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/pyproject.toml +1 -1
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/bowtie2.yaml +5 -2
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/samtools.yaml +6 -3
- bioflowkit-0.3.0/scripts/gen_methyl_fixture.py +89 -0
- bioflowkit-0.3.0/tests/integration/test_full_pipeline_e2e.py +343 -0
- bioflowkit-0.3.0/tests/unit/test_docker_timeout.py +130 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipes_per_pipeline.py +3 -3
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipes_per_pipeline_e2e.py +13 -3
- bioflowkit-0.3.0/tests/unit/test_resource_clamp.py +54 -0
- bioflowkit-0.3.0/tests/unit/test_unsafe_paths.py +71 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/candidate-smoke-test.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/ci.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/docs.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/nfcore-concordance.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/.github/workflows/release.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/CODE_OF_CONDUCT.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/CONTRIBUTING.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/LICENSE +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/SECURITY.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/README.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_ananatis_019464615_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_ananatis_019464615_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_ananatis_019464615_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_aquatica_900095885_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_aquatica_900095885_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_aquatica_900095885_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dadantii_003049785_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dadantii_003049785_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dadantii_003049785_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dianthicola_003403135_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dianthicola_003403135_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_dianthicola_003403135_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_lacustris_003934295_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_lacustris_003934295_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_lacustris_003934295_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_oryzae_020406815_2.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_oryzae_020406815_2.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_oryzae_020406815_2.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_parazeae_000025065_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_parazeae_000025065_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_parazeae_000025065_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_poaceiphila_007858975_2.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_poaceiphila_007858975_2.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_poaceiphila_007858975_2.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_solani_001644705_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_solani_001644705_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_solani_001644705_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_undicola_000784735_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_undicola_000784735_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_undicola_000784735_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_zeae_002887555_1.card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_zeae_002887555_1.plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/D_zeae_002887555_1.vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/_summary_card.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/_summary_plasmidfinder.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/abricate/_summary_vfdb.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/cafe/results/Base_clade_results.txt +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/cafe/results/Base_family_likelihoods.txt +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/cafe/results/Base_family_results.txt +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/cafe/results/Base_results.txt +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/cafe/vfdb_counts.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/eggnog/cog_counts_by_bucket.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/eggnog/cog_fractions_by_bucket.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_card.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_full_boxplot.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_full_card.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_full_plasmidfinder.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_full_vfdb.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_plasmidfinder.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/abricate_vfdb.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/ani_full_heatmap.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/ani_heatmap.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/cafe_hcp_detail.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/cafe_vfdb_tree.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/cog_delta.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/cog_stacked.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/pangenome_curve.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/pangenome_full_curve.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/pangenome_full_pie.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/pangenome_pie.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_full_is_dianthicola.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_full_is_solani.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_full_soft_rot.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_full_vascular_wilt.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_soft_rot.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/scoary_vascular_wilt.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/solani_island_gc.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/solani_island_synteny.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/tree_ani_nj.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/tree_full_with_vfdb.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/figures/tree_ml_iqtree.png +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/phylogeny/ani_nj.nwk +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/phylogeny/iqtree.treefile +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/phylogeny_full/iqtree_full.treefile +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary/top25_soft_rot.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary/top25_vascular_wilt.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary/traits.csv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary_full/top30_is_dianthicola.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary_full/top30_is_solani.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary_full/top30_soft_rot.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary_full/top30_vascular_wilt.tsv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/scoary_full/traits.csv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/analysis/dickeya/summary.html +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/__main__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/_app.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/db.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/doctor.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/hw.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/llm.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/ncbi.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/pipelines.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/provenance.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/recipe.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/setup.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/cli/update.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/approve.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/checkpoint.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/compatibility.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/dag.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/db.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/doctor.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/hardware.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/logger.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/ncbi.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/planner.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/provenance.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/registry.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/core/report.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/io.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/llm/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/llm/audit.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/atac_seq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/chip_seq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/genome_assembly.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/metagenomics.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/methylation.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/proteomics.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/rnaseq_deg.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/scrna_seq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/pipelines/variant_calling.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/amr_vf_catalogue.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/cafe_evolution.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/cog_enrichment.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/download_taxon.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/gwas.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/pangenome.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/comparative_genomics/phylogeny.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/epigenomics/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/genome_assembly/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/genome_assembly/prokaryote_assembly.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/metagenomics/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/methylation/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/proteomics/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/rnaseq_deg/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/single_cell/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/single_cell/scrna_seq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/recipes/variant_calling/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/report.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_cache.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_hashing.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_parallel.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_pipeline.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_result.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_runtime.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/bioflow/sdk/_stage.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/ecoli_small/R1.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/ecoli_small/R2.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/ecoli_small/real_R1.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/ecoli_small/real_R2.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/ecoli_small/reference.fa +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/rnaseq_toy/R1.fastq.gz +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/rnaseq_toy/genome.fa +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/rnaseq_toy/genome.gtf +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/data/test/rnaseq_toy/samples.csv +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docker/core/Dockerfile +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docker/docker-compose.yml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/DESIGN.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/architecture.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/index.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/install.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/maintainer/UPDATE_CADENCES.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/maintainer/cowork_schedule_prompt.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/maintainer/quarterly_audit_prompt.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/maintainer/research_prompt.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/docs/quickstart.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/cache_demo.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_atac_seq.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_chip_seq.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_custom.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_eukaryote_hifi.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_metagenomics.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_methylation.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_prokaryote_short.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_proteomics.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_recommend.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_rnaseq.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/config_scrna_seq.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/parallel_demo.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/pectobacterium_demo.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/pipeline_demo.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/recipes_quickstart.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/examples/stage_demo.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/README.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/atac_seq_standard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/chip_seq_standard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/eukaryote_denovo_hifi.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/eukaryote_denovo_hybrid.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/eukaryote_resequencing.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/metagenomics_kraken2_standard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/metagenomics_metaphlan4_standard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/methylation_bismark_wgbs.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/prokaryote_denovo_hybrid.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/prokaryote_denovo_short.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/proteomics_msfragger_dda.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/rnaseq_deseq2_standard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/scrna_seq_10x_scanpy.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/presets/scrna_seq_10x_seurat.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/schema.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/bedtools.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/bwa.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/bwa_mem2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/alignment/minimap2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/abyss.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/canu.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/flye.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/hifiasm.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/masurca.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/medaka.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/megahit.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/nextdenovo.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/nextpolish.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/pilon.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/racon.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/raven.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/shasta.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/spades.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/unicycler.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly/verkko.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/busco.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/checkm2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/compleasm.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/gfastats.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/merqury.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/assembly_qc/quast.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/abricate.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/cafe5.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/diamond.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/fastani.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/iqtree.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/mafft.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/mash.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/panaroo.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/roary.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/scoary.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/comparative_genomics/skani.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/deg/deseq2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/deg/edger.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/deg/limma_voom.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/enrichment/clusterprofiler.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/enrichment/enrichr.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/enrichment/gseapy.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/enrichment/topgo.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/bismark.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/deeptools.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/homer.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/macs3.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/methylkit.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/methylpy.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/picard.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/epigenomics/tobias.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/func_annot/antismash.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/func_annot/dbcan.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/func_annot/eggnog_mapper.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/func_annot/gtdbtk.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/func_annot/interproscan.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/bracken.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/humann3.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/kneaddata.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/kraken2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/lefse.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/maxbin2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/metabat2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/metagenomics/metaphlan4.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/comet.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/fragpipe.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/maxquant.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/msconvert.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/msfragger.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/openms.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/percolator.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/proteomics/xtandem.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/cutadapt.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/fastp.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/fastqc.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/filtlong.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/multiqc.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/nanoplot.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/seqkit.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/qc/trimgalore.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/repeat/earlgrey.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/repeat/repeatmasker.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/repeat/repeatmodeler.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/hisat2.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/kallisto.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/rsem.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/salmon.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/star.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/stringtie.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/rnaseq_align/subread.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/bustools.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/cellranger.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/harmony.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/monocle3.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/scanpy.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/scrublet.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/seurat.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/single_cell/starsolo.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/struct_annot/augustus.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/struct_annot/bakta.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/struct_annot/braker3.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/struct_annot/liftoff.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/struct_annot/prokka.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/variant_calling/bcftools.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/variant_calling/freebayes.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/variant_calling/gatk4.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/registry/tools/variant_calling/snpeff.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/compare_nfcore.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/gen_docs.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-cron-daily.sh +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-cron-weekly.sh +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-cron.sh +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-daily.ps1 +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-weekly.ps1 +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/install-schedule-windows.ps1 +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/pin_digests.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/scripts/refresh_tags.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/e2e/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/e2e/test_prokaryote_short.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/e2e/test_rnaseq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/fixtures/hypo_assembler.yaml +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/integration/__init__.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/integration/test_docker_backend.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/integration/test_recipe_real_data.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/integration/test_recipe_smoke_matrix.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/integration/test_sdk_real_docker.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_approve.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_benchmark.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_bugfixes.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_compare_nfcore.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_crossplatform.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_dag.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_db.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_digest_pinning.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_doctor.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_failure_report.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_freshness_check.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_gpu_podman.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_interactive.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_io.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_llm.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_llm_audit.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_llm_diagnose.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_llm_setup.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_ncbi.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_planner.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_planner_eukaryote.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_planner_rnaseq.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_provenance.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipe_cli_args.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipe_registry_alignment.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipes.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recipes_cookbook.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_recommend.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_registry_resolver.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_registry_sanity.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_release_watch.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_report.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_report_builder.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_run_resume.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_runner.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_cache.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_external_mounts.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_parallel.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_pipeline.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_retry.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_stage.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_sdk_streaming.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_skeleton.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/tests/unit/test_update_auto.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/update/REGISTRY_CHANGELOG.md +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/update/benchmark.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/update/freshness_check.py +0 -0
- {bioflowkit-0.2.1 → bioflowkit-0.3.0}/update/release_watch.py +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Test fixtures are read by Linux containers (FastANI, CAFE5, …). Some
|
|
2
|
+
# tool parsers don't strip a stray CR, so a CRLF checkout on Windows
|
|
3
|
+
# silently corrupts them (CAFE5 read the last column header as "D\r" and
|
|
4
|
+
# reported "D was not found"). Force LF for text fixtures everywhere,
|
|
5
|
+
# and mark the compressed reads binary so git never touches them.
|
|
6
|
+
data/test/**/*.fna text eol=lf
|
|
7
|
+
data/test/**/*.fa text eol=lf
|
|
8
|
+
data/test/**/*.fasta text eol=lf
|
|
9
|
+
data/test/**/*.tsv text eol=lf
|
|
10
|
+
data/test/**/*.csv text eol=lf
|
|
11
|
+
data/test/**/*.nwk text eol=lf
|
|
12
|
+
data/test/**/*.txt text eol=lf
|
|
13
|
+
data/test/**/*.gff text eol=lf
|
|
14
|
+
data/test/**/*.ffn text eol=lf
|
|
15
|
+
data/test/**/*.gz binary
|
|
16
|
+
|
|
17
|
+
# Registry + recipe shell commands also end up inside Linux containers;
|
|
18
|
+
# keep them LF regardless of host.
|
|
19
|
+
*.yaml text eol=lf
|
|
20
|
+
registry/** text eol=lf
|
|
@@ -48,12 +48,20 @@ jobs:
|
|
|
48
48
|
-v -m docker \
|
|
49
49
|
--junitxml=reports/smoke.xml
|
|
50
50
|
|
|
51
|
-
- name:
|
|
51
|
+
- name: Run full-pipeline e2e (all 9 recipes with committed fixtures)
|
|
52
|
+
env:
|
|
53
|
+
BIOFLOW_LOG_LEVEL: INFO
|
|
54
|
+
run: |
|
|
55
|
+
python -m pytest tests/integration/test_full_pipeline_e2e.py \
|
|
56
|
+
-v -m docker \
|
|
57
|
+
--junitxml=reports/full_e2e.xml
|
|
58
|
+
|
|
59
|
+
- name: Upload junit reports
|
|
52
60
|
if: always()
|
|
53
61
|
uses: actions/upload-artifact@v4
|
|
54
62
|
with:
|
|
55
63
|
name: smoke-junit
|
|
56
|
-
path: reports
|
|
64
|
+
path: reports/*.xml
|
|
57
65
|
|
|
58
66
|
- name: Fail the job on red results
|
|
59
67
|
if: failure()
|
|
@@ -12,6 +12,208 @@ ship bug fixes only. Breaking changes to the documented public API
|
|
|
12
12
|
|
|
13
13
|
---
|
|
14
14
|
|
|
15
|
+
## [Unreleased]
|
|
16
|
+
|
|
17
|
+
## [0.3.0] — 2026-06-18
|
|
18
|
+
|
|
19
|
+
### Fixed — aligner images had no samtools (5 recipes broke at alignment)
|
|
20
|
+
The plain single-tool aligner BioContainers (`bwa`, `bowtie2`,
|
|
21
|
+
`minimap2`) do **not** bundle samtools, yet five recipes ran
|
|
22
|
+
`aligner | samtools sort` / `samtools index` inside them — so every one
|
|
23
|
+
failed at its alignment stage with `samtools: command not found`. The
|
|
24
|
+
smoke matrix only exercises each recipe's *first* stage, so this stayed
|
|
25
|
+
invisible. (See #2.) Each aligner stage now uses an image that carries
|
|
26
|
+
both tools:
|
|
27
|
+
- **germline_variants**, **joint_genotyping**: a mulled
|
|
28
|
+
`bwa 0.7.19 + samtools 1.22` image. A new **`prepare_reference`**
|
|
29
|
+
stage builds the BWA index + `.fai` + `.dict` **once** (in the cohort
|
|
30
|
+
recipe, before the per-sample fan-out — previously each parallel
|
|
31
|
+
sample raced to index the *shared* reference), and the gatk call stage
|
|
32
|
+
drops its own `samtools` calls (`MarkDuplicates --CREATE_INDEX`
|
|
33
|
+
indexes the dedup BAM; the gatk4 image ships no samtools either).
|
|
34
|
+
Verified end-to-end on phiX: prep → bwa-mem → sort/index →
|
|
35
|
+
MarkDuplicates → HaplotypeCaller now produces a valid VCF.
|
|
36
|
+
- **chip_seq**, **atac_seq**: `staphb/bowtie2:2.5.4` (bundles samtools).
|
|
37
|
+
- **metagenome_assembly**: a mulled `minimap2 2.31 + samtools 1.23` image.
|
|
38
|
+
- `registry/tools/alignment/bowtie2.yaml` had the same broken
|
|
39
|
+
assumption (and `samtools.yaml` documented it as fact) — both fixed.
|
|
40
|
+
|
|
41
|
+
### Fixed — static audit of the never-e2e'd recipes (round 1)
|
|
42
|
+
A static pass over the 10 recipes that have no committed full-e2e fixture
|
|
43
|
+
(they need external reference data) turned up several latent defects:
|
|
44
|
+
- **proteomics_dda**: the percolator FDR cut used ``awk -F\t``, whose
|
|
45
|
+
backslash bash strips before awk sees it — so the field separator
|
|
46
|
+
became the literal letter ``t`` instead of a tab, and
|
|
47
|
+
``passing_psms.tsv`` was filtered on garbage columns. Now ``-F"\t"``,
|
|
48
|
+
which reaches awk as a real tab (verified).
|
|
49
|
+
- **eukaryote_assembly**: the docstring advertised ``polish=False`` to
|
|
50
|
+
skip Medaka for HiFi reads, but no such parameter existed. Added it
|
|
51
|
+
(the ``polish`` stage is renamed ``polish_consensus`` to free the
|
|
52
|
+
name); ``assess`` already falls back to Flye's ``assembly.fasta``.
|
|
53
|
+
- **chip_seq**: docstring promised ``--ctrl-r1 / --ctrl-r2`` raw-control
|
|
54
|
+
alignment the recipe never implemented — corrected to document the
|
|
55
|
+
actual ``--ctrl-bam`` (pre-aligned control) input.
|
|
56
|
+
- **metagenomics_profile**: ``bioflow db fetch`` example used a
|
|
57
|
+
catalog key that doesn't exist (``kraken2_standard`` →
|
|
58
|
+
``kraken2_standard_8gb``) and now notes Bracken's ``kmer_distrib`` need.
|
|
59
|
+
|
|
60
|
+
### Docs — strict build fixed + e2e-coverage page
|
|
61
|
+
- `mkdocs build --strict` was aborting: three docs links pointed at repo
|
|
62
|
+
files outside the `docs/` tree (`conda-recipe/meta.yaml`,
|
|
63
|
+
`scripts/compare_nfcore.py`, the nf-core workflow), which strict mode
|
|
64
|
+
flags as unresolved. Re-pointed them at GitHub blob URLs.
|
|
65
|
+
- New **`reference/e2e-coverage.md`** documents which 9 recipes have a
|
|
66
|
+
committed full end-to-end fixture and which 10 are gated on external
|
|
67
|
+
reference data (with the `bioflow db fetch` key for each), plus the one
|
|
68
|
+
utility recipe. Regenerated `reference/recipes.md` / `tools.md` from
|
|
69
|
+
the registry (now 20 recipes; `joint_genotyping` was missing and image
|
|
70
|
+
tags were stale).
|
|
71
|
+
|
|
72
|
+
### Fixed — ani_matrix broken for genomes outside the workspace
|
|
73
|
+
- **Bug a full e2e caught**: FastANI reads genome paths from a *list
|
|
74
|
+
file*, not the command, so the SDK's command-path translator and
|
|
75
|
+
auto-mount never applied to them — every external genome failed with
|
|
76
|
+
`Could not open <host path>`. Since genomes normally live outside the
|
|
77
|
+
output workspace, this broke the recipe's primary documented use.
|
|
78
|
+
- New SDK helper **`stage_input(path)`** copies an external file into the
|
|
79
|
+
active workspace (always mounted at `/work`) and returns its container
|
|
80
|
+
path — the clean primitive for any recipe that feeds a tool a list
|
|
81
|
+
file of paths. Also exported **`container_path(path)`**.
|
|
82
|
+
- `ani_matrix` now stages genomes via `stage_input` and writes container
|
|
83
|
+
paths into the FastANI list; verified end-to-end (genome1 vs genome2 =
|
|
84
|
+
99.5% ANI).
|
|
85
|
+
|
|
86
|
+
### Added — full e2e for the comparative-genomics recipes
|
|
87
|
+
- `tests/integration/test_full_pipeline_e2e.py` gains real end-to-end
|
|
88
|
+
tests for **amr_vf_catalogue** (ABRicate fan-out, bundled DBs),
|
|
89
|
+
**ani_matrix** (all-vs-all FastANI), **pangenome** (Prokka × N →
|
|
90
|
+
Roary), and **gwas** (Scoary on a synthetic Roary GPA + phenotype,
|
|
91
|
+
recovers a planted association). Fixtures:
|
|
92
|
+
`data/test/genomes_small/` (phiX174 + a 25-SNP variant) and
|
|
93
|
+
`data/test/gwas_small/` (12-gene × 10-sample GPA). Recipes validated
|
|
94
|
+
end-to-end: 1 (prokaryote) → 9 (see below).
|
|
95
|
+
- **cafe_evolution** (CAFE5 gene-family expansion/contraction) added as
|
|
96
|
+
the 6th, on `data/test/cafe_small/` (ultrametric 4-taxon tree + 60
|
|
97
|
+
families).
|
|
98
|
+
- **phylogeny** (single-copy core → MAFFT × N → IQ-TREE) added as the
|
|
99
|
+
7th, on `data/test/phylo_small/` (Prokka GFF + CDS + Roary GPA for 4
|
|
100
|
+
phiX strains; IQ-TREE recovers a 4-taxon ML tree).
|
|
101
|
+
- **rnaseq_deg** (fastp → Salmon → DESeq2 → enrichment + MultiQC) added
|
|
102
|
+
as the 8th, on `data/test/rnaseq_small/` (60 synthetic transcripts, 4
|
|
103
|
+
samples, 10 transcripts planted ~4× up in the treated group). DESeq2
|
|
104
|
+
recovers the planted signal (`tx0001` log2FC ≈ 2) and the run finishes
|
|
105
|
+
in seconds. The sample sheet is built by the test at run time so no
|
|
106
|
+
machine-specific paths are committed.
|
|
107
|
+
- **methylation_wgbs** (TrimGalore → Bismark → methylKit) added as the
|
|
108
|
+
9th, on `data/test/methyl_small/` (phiX174 + 3,000 synthetic
|
|
109
|
+
directional bisulfite read pairs, ~70 % CpG-methylated). The reads
|
|
110
|
+
map at 100 % and Bismark produces a real cytosine report; the genome
|
|
111
|
+
is **not** committed pre-prepared — the new `bismark_prep` stage
|
|
112
|
+
(below) bisulfite-converts it at run time, so no version-tied bowtie2
|
|
113
|
+
index lands in git. Regenerated deterministically by
|
|
114
|
+
`scripts/gen_methyl_fixture.py`.
|
|
115
|
+
|
|
116
|
+
### Added — methylation_wgbs prepares its genome (matches the docs)
|
|
117
|
+
- The recipe's docstring promised automatic genome preparation, but the
|
|
118
|
+
pipeline had no such stage — it silently required a pre-prepared
|
|
119
|
+
`Bisulfite_Genome/` directory, so running from a plain reference FASTA
|
|
120
|
+
failed. A new **`bismark_prep`** stage now runs
|
|
121
|
+
`bismark_genome_preparation` when `--bismark-genome` is a FASTA (or a
|
|
122
|
+
directory holding one); an already-prepared directory is detected and
|
|
123
|
+
used directly, skipping preparation. `methylation_wgbs` is now 4
|
|
124
|
+
stages (trim → bismark_prep → bismark → methylkit).
|
|
125
|
+
|
|
126
|
+
### Fixed — methylKit CpG-report glob (shell ate the regex escape)
|
|
127
|
+
- `methylkit_dmr` matched the cytosine report with `pattern='…txt(\.gz)?$'`,
|
|
128
|
+
but the `\.` escape was stripped by the shell before R parsed the
|
|
129
|
+
string, so R aborted with *"'\.' is an unrecognized escape in character
|
|
130
|
+
string"* and the whole recipe failed at the final stage. Replaced with
|
|
131
|
+
a `[.]` character class (no backslash to escape), which matches the
|
|
132
|
+
report — with or without a `.gz` suffix — robustly.
|
|
133
|
+
|
|
134
|
+
### Fixed — rnaseq_deg DESeq2 step (two latent bugs)
|
|
135
|
+
- The `deseq2_diff` stage required **tximport**, which the
|
|
136
|
+
`bioconductor-deseq2` BioContainer does not ship — every run failed
|
|
137
|
+
with "there is no package called 'tximport'". Rewritten to assemble
|
|
138
|
+
the count matrix in base R straight from each sample's `quant.sf`
|
|
139
|
+
(`NumReads`, rounded) and feed `DESeqDataSetFromMatrix`, dropping the
|
|
140
|
+
tximport dependency entirely.
|
|
141
|
+
- A second, masked bug: `samples$sample_id` inside the `Rscript -e "…"`
|
|
142
|
+
body (run via `sh -c`) was shell-expanded because the `$` was
|
|
143
|
+
unescaped, so the `file.path(...)` of `quant.sf` paths was wrong.
|
|
144
|
+
Escaped to `\$sample_id`. The pipeline now also **fails fast** if the
|
|
145
|
+
DESeq2 stage exits non-zero (the downstream Enrichr step tolerates an
|
|
146
|
+
empty gene list, which previously masked a broken DEG table).
|
|
147
|
+
|
|
148
|
+
### Fixed — LF line endings for container-read fixtures
|
|
149
|
+
- A new `.gitattributes` pins text test fixtures (and registry YAMLs) to
|
|
150
|
+
LF. CAFE5 doesn't strip a trailing CR, so a CRLF checkout on Windows
|
|
151
|
+
made it read the last species column as `D\r` and fail with "D was not
|
|
152
|
+
found in gene family …". FASTA parsers tolerated the CR, but the
|
|
153
|
+
table parser did not — LF is now enforced so fixtures are safe on
|
|
154
|
+
every host.
|
|
155
|
+
|
|
156
|
+
### Fixed — bounded stdout retention (no orchestrator OOM on chatty tools)
|
|
157
|
+
- `DockerBackend.run` accumulated **every** stdout line in memory; a
|
|
158
|
+
tool that emits millions of lines (Roary, IQ-TREE) could OOM the
|
|
159
|
+
orchestrator. It now retains only the trailing `_STDOUT_TAIL_LINES`
|
|
160
|
+
(5000) via a bounded `deque` for the diagnostic `CommandResult.stdout`
|
|
161
|
+
— every line still streams live to `log_callback`, and real artifacts
|
|
162
|
+
go to files in the workspace.
|
|
163
|
+
- Tests: +1 (`test_docker_timeout.py`) asserting the tail is kept.
|
|
164
|
+
|
|
165
|
+
### Fixed — clear error for shell-unsafe external input filenames
|
|
166
|
+
- An external input file whose **basename** contained a space or shell
|
|
167
|
+
metacharacter silently corrupted the recipe's command — bioflow mounts
|
|
168
|
+
the file's parent at the space-free `/inputs/<n>` and splices the
|
|
169
|
+
basename in unquoted, and it can't be quoted generically because many
|
|
170
|
+
recipes wrap the whole command in `bash -c '…'`. (A spaced *directory*
|
|
171
|
+
was already fine — only the basename survives into the command.)
|
|
172
|
+
- `_collect_external_mounts` now raises an actionable `ValueError`
|
|
173
|
+
naming the offending characters and telling the user to rename /
|
|
174
|
+
symlink to a safe name.
|
|
175
|
+
- Tests: +12 (`tests/unit/test_unsafe_paths.py`), incl. confirmation
|
|
176
|
+
that spaced *directories* and workspace-internal paths are unaffected.
|
|
177
|
+
|
|
178
|
+
### Fixed — stage_timeout now actually bounds runtime
|
|
179
|
+
- **Latent bug**: `run_plan(stage_timeout=…)` never worked. The log
|
|
180
|
+
loop (`container.logs(stream=True, follow=True)`) blocks until the
|
|
181
|
+
container exits, so the subsequent `container.wait(timeout=…)` — a
|
|
182
|
+
docker-py HTTP read timeout, not a runtime cap — could never fire for
|
|
183
|
+
a runaway container; it would hang forever.
|
|
184
|
+
- `DockerBackend.run` now starts a watchdog `threading.Timer` that
|
|
185
|
+
`container.kill()`s the stage when the timeout elapses, returning the
|
|
186
|
+
conventional exit code **124** with a clear message.
|
|
187
|
+
- Tests: +3 (`tests/unit/test_docker_timeout.py`, fake-container based,
|
|
188
|
+
deterministic).
|
|
189
|
+
|
|
190
|
+
### Fixed — DockerBackend now clamps CPU/RAM to host capacity
|
|
191
|
+
- **Bug the full-pipeline e2e caught**: a stage declaring `cpu=8` (e.g.
|
|
192
|
+
SPAdes in `prokaryote_assembly`) failed to even *start* on any host
|
|
193
|
+
with fewer cores — Docker rejects a container whose `--cpus` exceeds
|
|
194
|
+
the host count ("range of CPUs is from 0.01 to N.00"), so all 3 retry
|
|
195
|
+
attempts died instantly. Passed locally (12 cores) but failed on the
|
|
196
|
+
4-core CI runner — and would hit any user on a small workstation.
|
|
197
|
+
- `DockerBackend.run` now clamps the requested CPU to the host core count
|
|
198
|
+
and RAM to ~90% of host memory (`_clamp_resources`), so an
|
|
199
|
+
over-ambitious resource request degrades to "use what's available"
|
|
200
|
+
instead of crashing.
|
|
201
|
+
- Tests: +5 (`tests/unit/test_resource_clamp.py`).
|
|
202
|
+
|
|
203
|
+
### Added — first full-pipeline end-to-end test
|
|
204
|
+
- `tests/integration/test_full_pipeline_e2e.py`: runs the **entire**
|
|
205
|
+
`prokaryote_assembly` recipe (fastp → SPAdes → QUAST → Prokka) against
|
|
206
|
+
real BioContainers — the first time a complete pipeline (not just a
|
|
207
|
+
first stage, as the smoke matrix does) is validated end-to-end.
|
|
208
|
+
- New `data/test/phix_small/` fixture: phiX174 (`NC_001422.1`, 5386 bp) +
|
|
209
|
+
1000 wgsim-simulated 150 bp pairs (~56×, seed 42 → deterministic).
|
|
210
|
+
phiX assembles into a single ~5.4 kb contig in <1 min, so the whole
|
|
211
|
+
chain finishes in ~45 s.
|
|
212
|
+
- Asserts real data flow: assembled contig length 4.5–6 kb, QUAST
|
|
213
|
+
`report.tsv`, and Prokka annotation with ≥1 CDS. Verified locally
|
|
214
|
+
(phiX → 1 contig 5377 bp, 6 CDS).
|
|
215
|
+
- Wired into the nightly-smoke workflow as a second step.
|
|
216
|
+
|
|
15
217
|
## [0.2.1] — 2026-06-12
|
|
16
218
|
|
|
17
219
|
> **Why upgrade from 0.2.0**: the `v0.2.0` tag predated the registry
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bioflowkit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: bioflow: one-line comparative-genomics recipes + Tier-A SDK, orchestrated over per-tool Docker BioContainers.
|
|
5
5
|
Project-URL: Homepage, https://github.com/hope9901/bioflow
|
|
6
6
|
Project-URL: Repository, https://github.com/hope9901/bioflow
|
|
@@ -35,7 +35,7 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
[](https://pypi.org/project/bioflowkit/)
|
|
36
36
|
[](https://pypi.org/project/bioflowkit/)
|
|
37
37
|
[](https://pypi.org/project/bioflowkit/)
|
|
38
|
-
[](tests/)
|
|
39
39
|
[](https://github.com/hope9901/bioflow/actions/workflows/nightly-smoke.yml)
|
|
40
40
|
[](LICENSE)
|
|
41
41
|
[](https://hope9901.github.io/bioflow/)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://pypi.org/project/bioflowkit/)
|
|
4
4
|
[](https://pypi.org/project/bioflowkit/)
|
|
5
5
|
[](https://pypi.org/project/bioflowkit/)
|
|
6
|
-
[](tests/)
|
|
7
7
|
[](https://github.com/hope9901/bioflow/actions/workflows/nightly-smoke.yml)
|
|
8
8
|
[](LICENSE)
|
|
9
9
|
[](https://hope9901.github.io/bioflow/)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""bioflow - bioinformatics pipeline platform."""
|
|
2
2
|
|
|
3
|
-
__version__ = "0.
|
|
3
|
+
__version__ = "0.3.0"
|
|
4
4
|
|
|
5
5
|
# Tier-A SDK — @stage / @pipeline / runtime config
|
|
6
6
|
from bioflow.sdk import ( # noqa: E402,F401
|
|
@@ -16,6 +16,8 @@ from bioflow.sdk import ( # noqa: E402,F401
|
|
|
16
16
|
clear_cache,
|
|
17
17
|
set_log_streaming,
|
|
18
18
|
is_log_streaming_enabled,
|
|
19
|
+
container_path,
|
|
20
|
+
stage_input,
|
|
19
21
|
MockBackend,
|
|
20
22
|
DockerBackend,
|
|
21
23
|
)
|
|
@@ -25,6 +25,7 @@ Log streaming
|
|
|
25
25
|
from __future__ import annotations
|
|
26
26
|
|
|
27
27
|
import math
|
|
28
|
+
import os
|
|
28
29
|
from dataclasses import dataclass
|
|
29
30
|
from pathlib import Path
|
|
30
31
|
from typing import TYPE_CHECKING, Callable, Optional, Protocol, runtime_checkable
|
|
@@ -115,6 +116,37 @@ class MockBackend:
|
|
|
115
116
|
return CommandResult(exit_code=0)
|
|
116
117
|
|
|
117
118
|
|
|
119
|
+
# How many trailing stdout lines a stage retains in memory for error
|
|
120
|
+
# reporting. Live output still streams in full to log_callback; only the
|
|
121
|
+
# in-memory copy returned in CommandResult is capped, so a tool that emits
|
|
122
|
+
# millions of lines can't OOM the orchestrator.
|
|
123
|
+
_STDOUT_TAIL_LINES = 5000
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _clamp_resources(cpu: int, ram_gb: float) -> "tuple[int, float]":
|
|
127
|
+
"""Clamp a stage's requested CPU / RAM to the host's capacity.
|
|
128
|
+
|
|
129
|
+
Docker refuses to create a container whose ``--cpus`` exceeds the host
|
|
130
|
+
core count, so a stage declaring ``cpu=8`` must not be sent verbatim to
|
|
131
|
+
a 4-core host — it would fail to start. Memory is clamped too (a
|
|
132
|
+
``mem_limit`` above host RAM is meaningless and risks an instant OOM).
|
|
133
|
+
Both floor at 1 so a container is always launchable.
|
|
134
|
+
"""
|
|
135
|
+
host_cpu = os.cpu_count() or 1
|
|
136
|
+
eff_cpu = max(1, min(int(cpu), host_cpu))
|
|
137
|
+
|
|
138
|
+
eff_ram = ram_gb
|
|
139
|
+
try:
|
|
140
|
+
import psutil # noqa: PLC0415
|
|
141
|
+
|
|
142
|
+
host_ram_gb = psutil.virtual_memory().total / (1024 ** 3)
|
|
143
|
+
# Leave a little headroom for the host/OS.
|
|
144
|
+
eff_ram = max(1.0, min(float(ram_gb), host_ram_gb * 0.9))
|
|
145
|
+
except Exception:
|
|
146
|
+
eff_ram = max(1.0, float(ram_gb))
|
|
147
|
+
return eff_cpu, eff_ram
|
|
148
|
+
|
|
149
|
+
|
|
118
150
|
# ---------------------------------------------------------------------------
|
|
119
151
|
# Docker backend (production)
|
|
120
152
|
# ---------------------------------------------------------------------------
|
|
@@ -146,8 +178,6 @@ class DockerBackend:
|
|
|
146
178
|
_STREAMING_SUPPORTED = True # sentinel for run_plan
|
|
147
179
|
|
|
148
180
|
def __init__(self, base_url: Optional[str] = None) -> None:
|
|
149
|
-
import os # noqa: PLC0415
|
|
150
|
-
|
|
151
181
|
import docker # type: ignore[import-not-found]
|
|
152
182
|
|
|
153
183
|
url = (
|
|
@@ -189,36 +219,85 @@ class DockerBackend:
|
|
|
189
219
|
dr = self._gpu_device_requests()
|
|
190
220
|
if dr is not None:
|
|
191
221
|
extra["device_requests"] = dr
|
|
222
|
+
|
|
223
|
+
# Clamp the requested CPU/RAM to what the host actually has. Docker
|
|
224
|
+
# *rejects* a container whose --cpus exceeds the host core count
|
|
225
|
+
# ("range of CPUs is from 0.01 to N.00"), so a stage declaring
|
|
226
|
+
# cpu=8 would fail to even start on a 4-core CI runner or a small
|
|
227
|
+
# workstation. A request larger than the host should degrade to
|
|
228
|
+
# "use everything available", not crash.
|
|
229
|
+
eff_cpu, eff_ram = _clamp_resources(cpu, ram_gb)
|
|
230
|
+
|
|
192
231
|
container = None
|
|
232
|
+
timer = None
|
|
233
|
+
timed_out = {"flag": False}
|
|
193
234
|
try:
|
|
194
235
|
container = self.client.containers.run(
|
|
195
236
|
image=image,
|
|
196
237
|
command=["sh", "-c", command],
|
|
197
238
|
volumes=volumes,
|
|
198
239
|
working_dir=workdir,
|
|
199
|
-
mem_limit=f"{max(math.ceil(
|
|
200
|
-
nano_cpus=int(
|
|
240
|
+
mem_limit=f"{max(math.ceil(eff_ram), 1)}g",
|
|
241
|
+
nano_cpus=int(eff_cpu * 1_000_000_000),
|
|
201
242
|
detach=True,
|
|
202
243
|
remove=False,
|
|
203
244
|
**extra,
|
|
204
245
|
)
|
|
205
246
|
|
|
206
|
-
|
|
247
|
+
# Enforce stage_timeout with a watchdog. The log-streaming loop
|
|
248
|
+
# below blocks until the container exits, so ``container.wait
|
|
249
|
+
# (timeout=…)`` (a docker-py HTTP read timeout, not a runtime
|
|
250
|
+
# cap) can never fire for a runaway container — a timer that
|
|
251
|
+
# kills the container is the only thing that actually bounds
|
|
252
|
+
# the runtime.
|
|
253
|
+
if timeout is not None and timeout > 0:
|
|
254
|
+
import threading # noqa: PLC0415
|
|
255
|
+
|
|
256
|
+
def _kill() -> None:
|
|
257
|
+
timed_out["flag"] = True
|
|
258
|
+
try:
|
|
259
|
+
container.kill()
|
|
260
|
+
except Exception:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
timer = threading.Timer(timeout, _kill)
|
|
264
|
+
timer.daemon = True
|
|
265
|
+
timer.start()
|
|
266
|
+
|
|
267
|
+
# Retain only the tail in memory: the returned stdout is used
|
|
268
|
+
# for error diagnosis, not as the artifact (tools write their
|
|
269
|
+
# real output to files in the workspace). A chatty tool
|
|
270
|
+
# (Roary, IQ-TREE) can emit millions of lines, which would
|
|
271
|
+
# OOM the orchestrator if kept in full. Every line is still
|
|
272
|
+
# streamed live to log_callback.
|
|
273
|
+
from collections import deque # noqa: PLC0415
|
|
274
|
+
|
|
275
|
+
stdout_lines: "deque[str]" = deque(maxlen=_STDOUT_TAIL_LINES)
|
|
207
276
|
for chunk in container.logs(stream=True, follow=True):
|
|
208
277
|
line = chunk.decode(errors="replace").rstrip("\n")
|
|
209
278
|
stdout_lines.append(line)
|
|
210
279
|
if log_callback:
|
|
211
280
|
log_callback(line)
|
|
212
281
|
|
|
213
|
-
result = container.wait(
|
|
282
|
+
result = container.wait()
|
|
283
|
+
if timer is not None:
|
|
284
|
+
timer.cancel()
|
|
214
285
|
container.remove(force=True)
|
|
215
286
|
|
|
287
|
+
if timed_out["flag"]:
|
|
288
|
+
return CommandResult(
|
|
289
|
+
exit_code=124, # conventional timeout exit code
|
|
290
|
+
stdout="\n".join(stdout_lines),
|
|
291
|
+
stderr=f"stage exceeded timeout of {timeout}s and was killed",
|
|
292
|
+
)
|
|
216
293
|
return CommandResult(
|
|
217
294
|
exit_code=int(result.get("StatusCode", 1)),
|
|
218
295
|
stdout="\n".join(stdout_lines),
|
|
219
296
|
)
|
|
220
297
|
except Exception as exc:
|
|
221
|
-
|
|
298
|
+
if timer is not None:
|
|
299
|
+
timer.cancel()
|
|
300
|
+
# On any error the container may still be running — remove it.
|
|
222
301
|
if container is not None:
|
|
223
302
|
try:
|
|
224
303
|
container.remove(force=True)
|
|
@@ -10,7 +10,7 @@ from __future__ import annotations
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Iterable, Optional
|
|
12
12
|
|
|
13
|
-
from bioflow import stage,
|
|
13
|
+
from bioflow import pipeline, stage, stage_input
|
|
14
14
|
from bioflow.io import write_text
|
|
15
15
|
from bioflow.recipes import register
|
|
16
16
|
|
|
@@ -54,11 +54,15 @@ def ani_matrix(
|
|
|
54
54
|
if not fnas:
|
|
55
55
|
raise RuntimeError("No genomes given to ani_matrix")
|
|
56
56
|
|
|
57
|
+
# FastANI reads genome paths from the list FILE, not the command, so
|
|
58
|
+
# the SDK's command-path translator + auto-mount don't apply to them.
|
|
59
|
+
# stage_input() copies each genome into the workspace (always mounted
|
|
60
|
+
# at /work) and returns the container path to write into the list,
|
|
61
|
+
# which FastANI (working dir /work) can then open.
|
|
62
|
+
container_paths = [stage_input(g, subdir="ani_genomes") for g in fnas]
|
|
63
|
+
|
|
57
64
|
list_path = out_dir / "genome_list.txt"
|
|
58
|
-
write_text(
|
|
59
|
-
list_path,
|
|
60
|
-
"\n".join(str(p) for p in fnas) + "\n",
|
|
61
|
-
)
|
|
65
|
+
write_text(list_path, "\n".join(container_paths) + "\n")
|
|
62
66
|
return fastani_all_vs_all(list_path)
|
|
63
67
|
|
|
64
68
|
|
|
@@ -36,13 +36,15 @@ def trim(r1: Path, r2: Path, *, out_dir):
|
|
|
36
36
|
return f"trim_galore --paired --cores 4 --output_dir {out_dir} {r1} {r2}"
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
@stage(image="
|
|
39
|
+
@stage(image="staphb/bowtie2:2.5.4",
|
|
40
40
|
cpu=8, ram_gb=16, depends_on=trim)
|
|
41
41
|
def align(clean, bowtie2_index: Path, sample_id: str, *, out_dir):
|
|
42
42
|
"""Bowtie2 align with -X 2000 (ATAC-seq fragment max), sort + index.
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
Uses the StaPH-B bowtie2 image, which bundles samtools (the plain
|
|
45
|
+
``biocontainers/bowtie2`` image does not). Trimmed-read filenames
|
|
46
|
+
are resolved at runtime via ``ls | head -1`` so the recipe is robust
|
|
47
|
+
to TrimGalore's naming conventions.
|
|
46
48
|
"""
|
|
47
49
|
return (
|
|
48
50
|
f"bash -c '"
|
|
@@ -7,9 +7,10 @@ End-to-end workflow:
|
|
|
7
7
|
→ MACS3 callpeak (narrow peaks vs. input control)
|
|
8
8
|
→ HOMER annotatePeaks + findMotifsGenome (annotation + motifs)
|
|
9
9
|
|
|
10
|
-
Optional input/IgG control: pass
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
Optional input/IgG control: pass ``--ctrl-bam`` pointing at a
|
|
11
|
+
pre-aligned, deduplicated control BAM; MACS3 receives it as ``-c``.
|
|
12
|
+
(Raw control reads are not aligned by this recipe — run the sample arm
|
|
13
|
+
on the control FASTQs separately, or use the BAM you already have.)
|
|
13
14
|
|
|
14
15
|
Researcher (Tier B) usage::
|
|
15
16
|
|
|
@@ -37,14 +38,16 @@ def trim(r1: Path, r2: Path, *, out_dir):
|
|
|
37
38
|
return f"trim_galore --paired --cores 4 --output_dir {out_dir} {r1} {r2}"
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
@stage(image="
|
|
41
|
+
@stage(image="staphb/bowtie2:2.5.4",
|
|
41
42
|
cpu=8, ram_gb=16, depends_on=trim)
|
|
42
43
|
def align(clean, bowtie2_index: Path, sample_id: str, *, out_dir):
|
|
43
|
-
"""Bowtie2 alignment → sorted, indexed BAM
|
|
44
|
+
"""Bowtie2 alignment → sorted, indexed BAM.
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
Uses the StaPH-B bowtie2 image, which bundles samtools (the plain
|
|
47
|
+
``biocontainers/bowtie2`` image does not, so the sort/index chain
|
|
48
|
+
would fail there). Trimmed-read filenames are resolved at runtime
|
|
49
|
+
(``ls | head -1``) so the recipe survives TrimGalore's naming and
|
|
50
|
+
never feeds a glob with multiple matches to ``bowtie2 -1``.
|
|
48
51
|
"""
|
|
49
52
|
return (
|
|
50
53
|
f"bash -c '"
|
|
@@ -7,7 +7,7 @@ End-to-end ONT/HiFi workflow using the long-read tools added in 0.1.10:
|
|
|
7
7
|
→ compleasm + gfastats (assembly QC)
|
|
8
8
|
|
|
9
9
|
For HiFi reads, polishing is usually unnecessary — pass
|
|
10
|
-
``polish=False`` to skip the Medaka step.
|
|
10
|
+
``--polish false`` (``polish=False``) to skip the Medaka step.
|
|
11
11
|
|
|
12
12
|
Researcher (Tier B) usage::
|
|
13
13
|
|
|
@@ -46,7 +46,7 @@ def assemble(qc, long_reads: Path, *, out_dir, read_mode: str = "--nano-hq"):
|
|
|
46
46
|
|
|
47
47
|
@stage(image="quay.io/biocontainers/medaka:1.11.3--py39h05d5c5e_0",
|
|
48
48
|
cpu=8, ram_gb=32, depends_on=assemble)
|
|
49
|
-
def
|
|
49
|
+
def polish_consensus(asm, long_reads: Path, *, out_dir, medaka_model: str = "r1041_e82_400bps_sup_v5.0.0"):
|
|
50
50
|
"""Medaka ONT consensus polish of the Flye assembly."""
|
|
51
51
|
return (
|
|
52
52
|
f"medaka_consensus -i {long_reads} "
|
|
@@ -56,7 +56,7 @@ def polish(asm, long_reads: Path, *, out_dir, medaka_model: str = "r1041_e82_400
|
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
@stage(image="quay.io/biocontainers/compleasm:0.2.6--pyh7cba7a3_0",
|
|
59
|
-
cpu=8, ram_gb=16, depends_on=
|
|
59
|
+
cpu=8, ram_gb=16, depends_on=polish_consensus)
|
|
60
60
|
def assess(polished, *, out_dir, busco_lineage: str = "eukaryota_odb10",
|
|
61
61
|
busco_db: Path = Path("/refs/busco")):
|
|
62
62
|
"""compleasm completeness + gfastats contiguity (gfastats chained)."""
|
|
@@ -72,7 +72,7 @@ def assess(polished, *, out_dir, busco_lineage: str = "eukaryota_odb10",
|
|
|
72
72
|
# ── Pipeline ────────────────────────────────────────────────────────────────
|
|
73
73
|
|
|
74
74
|
@pipeline(
|
|
75
|
-
stages=[read_qc, assemble,
|
|
75
|
+
stages=[read_qc, assemble, polish_consensus, assess],
|
|
76
76
|
description="Eukaryote long-read assembly: NanoPlot → Flye → Medaka → compleasm",
|
|
77
77
|
)
|
|
78
78
|
def eukaryote_assembly(
|
|
@@ -80,19 +80,26 @@ def eukaryote_assembly(
|
|
|
80
80
|
*,
|
|
81
81
|
out_dir: Path,
|
|
82
82
|
read_mode: str = "--nano-hq",
|
|
83
|
+
polish: bool = True,
|
|
83
84
|
medaka_model: str = "r1041_e82_400bps_sup_v5.0.0",
|
|
84
85
|
busco_lineage: str = "eukaryota_odb10",
|
|
85
86
|
busco_db: Path = Path("/refs/busco"),
|
|
86
87
|
):
|
|
87
|
-
"""NanoPlot → Flye → Medaka → compleasm end-to-end.
|
|
88
|
+
"""NanoPlot → Flye → Medaka → compleasm end-to-end.
|
|
89
|
+
|
|
90
|
+
``polish=False`` skips the Medaka step — appropriate for HiFi reads,
|
|
91
|
+
whose per-base accuracy makes ONT consensus polishing unnecessary.
|
|
92
|
+
``assess`` then reads Flye's ``assembly.fasta`` directly (it already
|
|
93
|
+
falls back from ``consensus.fasta``).
|
|
94
|
+
"""
|
|
88
95
|
out_dir = Path(out_dir).resolve()
|
|
89
96
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
90
97
|
|
|
91
98
|
lr = Path(long_reads)
|
|
92
99
|
qc = read_qc(lr)
|
|
93
100
|
asm = assemble(qc, lr, read_mode=read_mode)
|
|
94
|
-
|
|
95
|
-
return assess(
|
|
101
|
+
polished = polish_consensus(asm, lr, medaka_model=medaka_model) if polish else asm
|
|
102
|
+
return assess(polished, busco_lineage=busco_lineage, busco_db=Path(busco_db))
|
|
96
103
|
|
|
97
104
|
|
|
98
105
|
register("eukaryote_assembly", eukaryote_assembly)
|
|
@@ -48,12 +48,16 @@ def assemble(clean, *, out_dir):
|
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
@stage(image="quay.io/biocontainers/
|
|
51
|
+
@stage(image=("quay.io/biocontainers/mulled-v2-"
|
|
52
|
+
"66534bcbb7031a148b13e2ad42583020b9cd25c4:"
|
|
53
|
+
"b411340b52d82a9c276d87c7a3dcffc880be762f-0"),
|
|
52
54
|
cpu=16, ram_gb=32, depends_on=assemble)
|
|
53
55
|
def map_back(asm, clean, *, out_dir):
|
|
54
56
|
"""Map QC reads back to contigs for coverage (minimap2 + samtools).
|
|
55
57
|
|
|
56
|
-
|
|
58
|
+
Uses a mulled minimap2 + samtools BioContainer (minimap2 2.31 +
|
|
59
|
+
samtools 1.23) — the plain ``biocontainers/minimap2`` image ships no
|
|
60
|
+
samtools, so the ``minimap2 | samtools sort`` chain fails on it.
|
|
57
61
|
"""
|
|
58
62
|
contigs = f"{asm.out_dir}/megahit/final.contigs.fa"
|
|
59
63
|
return (
|
|
@@ -6,10 +6,11 @@ End-to-end short-read shotgun-metagenomic workflow:
|
|
|
6
6
|
Requires a prebuilt Kraken2 database mounted into the workspace (e.g.
|
|
7
7
|
the standard MiniKraken2 or PlusPF DB). Use::
|
|
8
8
|
|
|
9
|
-
bioflow db fetch
|
|
9
|
+
bioflow db fetch kraken2_standard_8gb --dest /refs
|
|
10
10
|
|
|
11
|
-
…to install one
|
|
12
|
-
|
|
11
|
+
…to install one, or pass --kraken2-db pointing to a pre-existing
|
|
12
|
+
directory. (Bracken also needs the ``databaseNNmers.kmer_distrib``
|
|
13
|
+
files that ship inside the standard Kraken2 DB tarball.)
|
|
13
14
|
|
|
14
15
|
Researcher (Tier B) usage::
|
|
15
16
|
|