bioflowkit 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. bioflowkit-0.2.0/.github/workflows/candidate-smoke-test.yml +111 -0
  2. bioflowkit-0.2.0/.github/workflows/ci.yml +130 -0
  3. bioflowkit-0.2.0/.github/workflows/docs.yml +62 -0
  4. bioflowkit-0.2.0/.github/workflows/nightly-smoke.yml +58 -0
  5. bioflowkit-0.2.0/.github/workflows/release.yml +179 -0
  6. bioflowkit-0.2.0/.gitignore +100 -0
  7. bioflowkit-0.2.0/CHANGELOG.md +693 -0
  8. bioflowkit-0.2.0/CODE_OF_CONDUCT.md +55 -0
  9. bioflowkit-0.2.0/CONTRIBUTING.md +93 -0
  10. bioflowkit-0.2.0/LICENSE +21 -0
  11. bioflowkit-0.2.0/PKG-INFO +384 -0
  12. bioflowkit-0.2.0/README.md +352 -0
  13. bioflowkit-0.2.0/SECURITY.md +43 -0
  14. bioflowkit-0.2.0/analysis/README.md +51 -0
  15. bioflowkit-0.2.0/analysis/dickeya/abricate/D_ananatis_019464615_1.card.tsv +2 -0
  16. bioflowkit-0.2.0/analysis/dickeya/abricate/D_ananatis_019464615_1.plasmidfinder.tsv +1 -0
  17. bioflowkit-0.2.0/analysis/dickeya/abricate/D_ananatis_019464615_1.vfdb.tsv +9 -0
  18. bioflowkit-0.2.0/analysis/dickeya/abricate/D_aquatica_900095885_1.card.tsv +2 -0
  19. bioflowkit-0.2.0/analysis/dickeya/abricate/D_aquatica_900095885_1.plasmidfinder.tsv +1 -0
  20. bioflowkit-0.2.0/analysis/dickeya/abricate/D_aquatica_900095885_1.vfdb.tsv +4 -0
  21. bioflowkit-0.2.0/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.card.tsv +2 -0
  22. bioflowkit-0.2.0/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.plasmidfinder.tsv +1 -0
  23. bioflowkit-0.2.0/analysis/dickeya/abricate/D_chrysanthemi_000023565_1.vfdb.tsv +13 -0
  24. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dadantii_003049785_1.card.tsv +2 -0
  25. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dadantii_003049785_1.plasmidfinder.tsv +1 -0
  26. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dadantii_003049785_1.vfdb.tsv +11 -0
  27. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dianthicola_003403135_1.card.tsv +2 -0
  28. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dianthicola_003403135_1.plasmidfinder.tsv +1 -0
  29. bioflowkit-0.2.0/analysis/dickeya/abricate/D_dianthicola_003403135_1.vfdb.tsv +10 -0
  30. bioflowkit-0.2.0/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.card.tsv +2 -0
  31. bioflowkit-0.2.0/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.plasmidfinder.tsv +2 -0
  32. bioflowkit-0.2.0/analysis/dickeya/abricate/D_fangzhongdai_002812485_1.vfdb.tsv +10 -0
  33. bioflowkit-0.2.0/analysis/dickeya/abricate/D_lacustris_003934295_1.card.tsv +2 -0
  34. bioflowkit-0.2.0/analysis/dickeya/abricate/D_lacustris_003934295_1.plasmidfinder.tsv +1 -0
  35. bioflowkit-0.2.0/analysis/dickeya/abricate/D_lacustris_003934295_1.vfdb.tsv +2 -0
  36. bioflowkit-0.2.0/analysis/dickeya/abricate/D_oryzae_020406815_2.card.tsv +2 -0
  37. bioflowkit-0.2.0/analysis/dickeya/abricate/D_oryzae_020406815_2.plasmidfinder.tsv +3 -0
  38. bioflowkit-0.2.0/analysis/dickeya/abricate/D_oryzae_020406815_2.vfdb.tsv +10 -0
  39. bioflowkit-0.2.0/analysis/dickeya/abricate/D_parazeae_000025065_1.card.tsv +2 -0
  40. bioflowkit-0.2.0/analysis/dickeya/abricate/D_parazeae_000025065_1.plasmidfinder.tsv +1 -0
  41. bioflowkit-0.2.0/analysis/dickeya/abricate/D_parazeae_000025065_1.vfdb.tsv +8 -0
  42. bioflowkit-0.2.0/analysis/dickeya/abricate/D_poaceiphila_007858975_2.card.tsv +2 -0
  43. bioflowkit-0.2.0/analysis/dickeya/abricate/D_poaceiphila_007858975_2.plasmidfinder.tsv +1 -0
  44. bioflowkit-0.2.0/analysis/dickeya/abricate/D_poaceiphila_007858975_2.vfdb.tsv +2 -0
  45. bioflowkit-0.2.0/analysis/dickeya/abricate/D_solani_001644705_1.card.tsv +2 -0
  46. bioflowkit-0.2.0/analysis/dickeya/abricate/D_solani_001644705_1.plasmidfinder.tsv +1 -0
  47. bioflowkit-0.2.0/analysis/dickeya/abricate/D_solani_001644705_1.vfdb.tsv +12 -0
  48. bioflowkit-0.2.0/analysis/dickeya/abricate/D_undicola_000784735_1.card.tsv +2 -0
  49. bioflowkit-0.2.0/analysis/dickeya/abricate/D_undicola_000784735_1.plasmidfinder.tsv +4 -0
  50. bioflowkit-0.2.0/analysis/dickeya/abricate/D_undicola_000784735_1.vfdb.tsv +11 -0
  51. bioflowkit-0.2.0/analysis/dickeya/abricate/D_zeae_002887555_1.card.tsv +2 -0
  52. bioflowkit-0.2.0/analysis/dickeya/abricate/D_zeae_002887555_1.plasmidfinder.tsv +1 -0
  53. bioflowkit-0.2.0/analysis/dickeya/abricate/D_zeae_002887555_1.vfdb.tsv +8 -0
  54. bioflowkit-0.2.0/analysis/dickeya/abricate/_summary_card.tsv +2 -0
  55. bioflowkit-0.2.0/analysis/dickeya/abricate/_summary_plasmidfinder.tsv +4 -0
  56. bioflowkit-0.2.0/analysis/dickeya/abricate/_summary_vfdb.tsv +12 -0
  57. bioflowkit-0.2.0/analysis/dickeya/cafe/results/Base_clade_results.txt +160 -0
  58. bioflowkit-0.2.0/analysis/dickeya/cafe/results/Base_family_likelihoods.txt +10 -0
  59. bioflowkit-0.2.0/analysis/dickeya/cafe/results/Base_family_results.txt +10 -0
  60. bioflowkit-0.2.0/analysis/dickeya/cafe/results/Base_results.txt +4 -0
  61. bioflowkit-0.2.0/analysis/dickeya/cafe/vfdb_counts.tsv +21 -0
  62. bioflowkit-0.2.0/analysis/dickeya/eggnog/cog_counts_by_bucket.tsv +5 -0
  63. bioflowkit-0.2.0/analysis/dickeya/eggnog/cog_fractions_by_bucket.tsv +5 -0
  64. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_card.png +0 -0
  65. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_full_boxplot.png +0 -0
  66. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_full_card.png +0 -0
  67. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_full_plasmidfinder.png +0 -0
  68. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_full_vfdb.png +0 -0
  69. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_plasmidfinder.png +0 -0
  70. bioflowkit-0.2.0/analysis/dickeya/figures/abricate_vfdb.png +0 -0
  71. bioflowkit-0.2.0/analysis/dickeya/figures/ani_full_heatmap.png +0 -0
  72. bioflowkit-0.2.0/analysis/dickeya/figures/ani_heatmap.png +0 -0
  73. bioflowkit-0.2.0/analysis/dickeya/figures/cafe_hcp_detail.png +0 -0
  74. bioflowkit-0.2.0/analysis/dickeya/figures/cafe_vfdb_tree.png +0 -0
  75. bioflowkit-0.2.0/analysis/dickeya/figures/cog_delta.png +0 -0
  76. bioflowkit-0.2.0/analysis/dickeya/figures/cog_stacked.png +0 -0
  77. bioflowkit-0.2.0/analysis/dickeya/figures/pangenome_curve.png +0 -0
  78. bioflowkit-0.2.0/analysis/dickeya/figures/pangenome_full_curve.png +0 -0
  79. bioflowkit-0.2.0/analysis/dickeya/figures/pangenome_full_pie.png +0 -0
  80. bioflowkit-0.2.0/analysis/dickeya/figures/pangenome_pie.png +0 -0
  81. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_full_is_dianthicola.png +0 -0
  82. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_full_is_solani.png +0 -0
  83. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_full_soft_rot.png +0 -0
  84. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_full_vascular_wilt.png +0 -0
  85. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_soft_rot.png +0 -0
  86. bioflowkit-0.2.0/analysis/dickeya/figures/scoary_vascular_wilt.png +0 -0
  87. bioflowkit-0.2.0/analysis/dickeya/figures/solani_island_gc.png +0 -0
  88. bioflowkit-0.2.0/analysis/dickeya/figures/solani_island_synteny.png +0 -0
  89. bioflowkit-0.2.0/analysis/dickeya/figures/tree_ani_nj.png +0 -0
  90. bioflowkit-0.2.0/analysis/dickeya/figures/tree_full_with_vfdb.png +0 -0
  91. bioflowkit-0.2.0/analysis/dickeya/figures/tree_ml_iqtree.png +0 -0
  92. bioflowkit-0.2.0/analysis/dickeya/phylogeny/ani_nj.nwk +1 -0
  93. bioflowkit-0.2.0/analysis/dickeya/phylogeny/iqtree.treefile +1 -0
  94. bioflowkit-0.2.0/analysis/dickeya/phylogeny_full/iqtree_full.treefile +1 -0
  95. bioflowkit-0.2.0/analysis/dickeya/scoary/top25_soft_rot.tsv +26 -0
  96. bioflowkit-0.2.0/analysis/dickeya/scoary/top25_vascular_wilt.tsv +26 -0
  97. bioflowkit-0.2.0/analysis/dickeya/scoary/traits.csv +14 -0
  98. bioflowkit-0.2.0/analysis/dickeya/scoary_full/top30_is_dianthicola.tsv +31 -0
  99. bioflowkit-0.2.0/analysis/dickeya/scoary_full/top30_is_solani.tsv +31 -0
  100. bioflowkit-0.2.0/analysis/dickeya/scoary_full/top30_soft_rot.tsv +31 -0
  101. bioflowkit-0.2.0/analysis/dickeya/scoary_full/top30_vascular_wilt.tsv +31 -0
  102. bioflowkit-0.2.0/analysis/dickeya/scoary_full/traits.csv +263 -0
  103. bioflowkit-0.2.0/analysis/dickeya/summary.html +515 -0
  104. bioflowkit-0.2.0/bioflow/__init__.py +22 -0
  105. bioflowkit-0.2.0/bioflow/cli/__init__.py +49 -0
  106. bioflowkit-0.2.0/bioflow/cli/__main__.py +6 -0
  107. bioflowkit-0.2.0/bioflow/cli/_app.py +37 -0
  108. bioflowkit-0.2.0/bioflow/cli/db.py +67 -0
  109. bioflowkit-0.2.0/bioflow/cli/doctor.py +83 -0
  110. bioflowkit-0.2.0/bioflow/cli/hw.py +81 -0
  111. bioflowkit-0.2.0/bioflow/cli/llm.py +222 -0
  112. bioflowkit-0.2.0/bioflow/cli/ncbi.py +198 -0
  113. bioflowkit-0.2.0/bioflow/cli/pipelines.py +186 -0
  114. bioflowkit-0.2.0/bioflow/cli/recipe.py +213 -0
  115. bioflowkit-0.2.0/bioflow/cli/setup.py +137 -0
  116. bioflowkit-0.2.0/bioflow/cli/update.py +318 -0
  117. bioflowkit-0.2.0/bioflow/core/__init__.py +1 -0
  118. bioflowkit-0.2.0/bioflow/core/approve.py +236 -0
  119. bioflowkit-0.2.0/bioflow/core/checkpoint.py +87 -0
  120. bioflowkit-0.2.0/bioflow/core/compatibility.py +178 -0
  121. bioflowkit-0.2.0/bioflow/core/dag.py +61 -0
  122. bioflowkit-0.2.0/bioflow/core/db.py +361 -0
  123. bioflowkit-0.2.0/bioflow/core/doctor.py +524 -0
  124. bioflowkit-0.2.0/bioflow/core/hardware.py +111 -0
  125. bioflowkit-0.2.0/bioflow/core/logger.py +40 -0
  126. bioflowkit-0.2.0/bioflow/core/ncbi.py +794 -0
  127. bioflowkit-0.2.0/bioflow/core/planner.py +1037 -0
  128. bioflowkit-0.2.0/bioflow/core/registry.py +132 -0
  129. bioflowkit-0.2.0/bioflow/core/report.py +290 -0
  130. bioflowkit-0.2.0/bioflow/core/runner.py +394 -0
  131. bioflowkit-0.2.0/bioflow/io.py +343 -0
  132. bioflowkit-0.2.0/bioflow/llm/__init__.py +741 -0
  133. bioflowkit-0.2.0/bioflow/llm/audit.py +236 -0
  134. bioflowkit-0.2.0/bioflow/pipelines/__init__.py +1 -0
  135. bioflowkit-0.2.0/bioflow/pipelines/atac_seq.py +46 -0
  136. bioflowkit-0.2.0/bioflow/pipelines/chip_seq.py +49 -0
  137. bioflowkit-0.2.0/bioflow/pipelines/genome_assembly.py +57 -0
  138. bioflowkit-0.2.0/bioflow/pipelines/metagenomics.py +46 -0
  139. bioflowkit-0.2.0/bioflow/pipelines/methylation.py +39 -0
  140. bioflowkit-0.2.0/bioflow/pipelines/proteomics.py +47 -0
  141. bioflowkit-0.2.0/bioflow/pipelines/rnaseq_deg.py +42 -0
  142. bioflowkit-0.2.0/bioflow/pipelines/scrna_seq.py +46 -0
  143. bioflowkit-0.2.0/bioflow/pipelines/variant_calling.py +42 -0
  144. bioflowkit-0.2.0/bioflow/recipes/__init__.py +80 -0
  145. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/__init__.py +1 -0
  146. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/amr_vf_catalogue.py +58 -0
  147. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/ani_matrix.py +65 -0
  148. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/cafe_evolution.py +71 -0
  149. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/cog_enrichment.py +203 -0
  150. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/download_taxon.py +48 -0
  151. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/gwas.py +48 -0
  152. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/pangenome.py +115 -0
  153. bioflowkit-0.2.0/bioflow/recipes/comparative_genomics/phylogeny.py +243 -0
  154. bioflowkit-0.2.0/bioflow/recipes/epigenomics/__init__.py +1 -0
  155. bioflowkit-0.2.0/bioflow/recipes/epigenomics/atac_seq.py +130 -0
  156. bioflowkit-0.2.0/bioflow/recipes/epigenomics/chip_seq.py +133 -0
  157. bioflowkit-0.2.0/bioflow/recipes/genome_assembly/__init__.py +1 -0
  158. bioflowkit-0.2.0/bioflow/recipes/genome_assembly/eukaryote_assembly.py +98 -0
  159. bioflowkit-0.2.0/bioflow/recipes/genome_assembly/prokaryote_assembly.py +108 -0
  160. bioflowkit-0.2.0/bioflow/recipes/metagenomics/__init__.py +1 -0
  161. bioflowkit-0.2.0/bioflow/recipes/metagenomics/metagenome_assembly.py +118 -0
  162. bioflowkit-0.2.0/bioflow/recipes/metagenomics/metagenomics_profile.py +99 -0
  163. bioflowkit-0.2.0/bioflow/recipes/methylation/__init__.py +1 -0
  164. bioflowkit-0.2.0/bioflow/recipes/methylation/bismark_wgbs.py +120 -0
  165. bioflowkit-0.2.0/bioflow/recipes/proteomics/__init__.py +1 -0
  166. bioflowkit-0.2.0/bioflow/recipes/proteomics/proteomics_dda.py +125 -0
  167. bioflowkit-0.2.0/bioflow/recipes/rnaseq_deg/__init__.py +1 -0
  168. bioflowkit-0.2.0/bioflow/recipes/rnaseq_deg/rnaseq_deg.py +165 -0
  169. bioflowkit-0.2.0/bioflow/recipes/single_cell/__init__.py +1 -0
  170. bioflowkit-0.2.0/bioflow/recipes/single_cell/scrna_seq.py +126 -0
  171. bioflowkit-0.2.0/bioflow/recipes/variant_calling/__init__.py +1 -0
  172. bioflowkit-0.2.0/bioflow/recipes/variant_calling/germline_variants.py +140 -0
  173. bioflowkit-0.2.0/bioflow/report.py +292 -0
  174. bioflowkit-0.2.0/bioflow/sdk/__init__.py +113 -0
  175. bioflowkit-0.2.0/bioflow/sdk/_cache.py +84 -0
  176. bioflowkit-0.2.0/bioflow/sdk/_hashing.py +101 -0
  177. bioflowkit-0.2.0/bioflow/sdk/_parallel.py +144 -0
  178. bioflowkit-0.2.0/bioflow/sdk/_paths.py +161 -0
  179. bioflowkit-0.2.0/bioflow/sdk/_pipeline.py +205 -0
  180. bioflowkit-0.2.0/bioflow/sdk/_result.py +24 -0
  181. bioflowkit-0.2.0/bioflow/sdk/_runtime.py +67 -0
  182. bioflowkit-0.2.0/bioflow/sdk/_stage.py +522 -0
  183. bioflowkit-0.2.0/data/test/ecoli_small/R1.fastq.gz +0 -0
  184. bioflowkit-0.2.0/data/test/ecoli_small/R2.fastq.gz +0 -0
  185. bioflowkit-0.2.0/data/test/ecoli_small/real_R1.fastq.gz +0 -0
  186. bioflowkit-0.2.0/data/test/ecoli_small/real_R2.fastq.gz +0 -0
  187. bioflowkit-0.2.0/data/test/ecoli_small/reference.fa +2 -0
  188. bioflowkit-0.2.0/data/test/rnaseq_toy/R1.fastq.gz +0 -0
  189. bioflowkit-0.2.0/data/test/rnaseq_toy/genome.fa +2 -0
  190. bioflowkit-0.2.0/data/test/rnaseq_toy/genome.gtf +1 -0
  191. bioflowkit-0.2.0/data/test/rnaseq_toy/samples.csv +2 -0
  192. bioflowkit-0.2.0/docker/core/Dockerfile +25 -0
  193. bioflowkit-0.2.0/docker/docker-compose.yml +20 -0
  194. bioflowkit-0.2.0/docs/DESIGN.md +164 -0
  195. bioflowkit-0.2.0/docs/MAINTAINER.md +351 -0
  196. bioflowkit-0.2.0/docs/architecture.md +55 -0
  197. bioflowkit-0.2.0/docs/index.md +46 -0
  198. bioflowkit-0.2.0/docs/install.md +102 -0
  199. bioflowkit-0.2.0/docs/maintainer/UPDATE_CADENCES.md +139 -0
  200. bioflowkit-0.2.0/docs/maintainer/cowork_schedule_prompt.md +176 -0
  201. bioflowkit-0.2.0/docs/maintainer/quarterly_audit_prompt.md +107 -0
  202. bioflowkit-0.2.0/docs/maintainer/research_prompt.md +67 -0
  203. bioflowkit-0.2.0/docs/quickstart.md +75 -0
  204. bioflowkit-0.2.0/docs/reference/recipes.md +190 -0
  205. bioflowkit-0.2.0/docs/reference/tools.md +194 -0
  206. bioflowkit-0.2.0/examples/cache_demo.py +54 -0
  207. bioflowkit-0.2.0/examples/config_atac_seq.yaml +19 -0
  208. bioflowkit-0.2.0/examples/config_chip_seq.yaml +18 -0
  209. bioflowkit-0.2.0/examples/config_custom.yaml +19 -0
  210. bioflowkit-0.2.0/examples/config_eukaryote_hifi.yaml +20 -0
  211. bioflowkit-0.2.0/examples/config_metagenomics.yaml +25 -0
  212. bioflowkit-0.2.0/examples/config_methylation.yaml +19 -0
  213. bioflowkit-0.2.0/examples/config_prokaryote_short.yaml +17 -0
  214. bioflowkit-0.2.0/examples/config_proteomics.yaml +17 -0
  215. bioflowkit-0.2.0/examples/config_recommend.yaml +10 -0
  216. bioflowkit-0.2.0/examples/config_rnaseq.yaml +19 -0
  217. bioflowkit-0.2.0/examples/config_scrna_seq.yaml +20 -0
  218. bioflowkit-0.2.0/examples/parallel_demo.py +53 -0
  219. bioflowkit-0.2.0/examples/pectobacterium_demo.py +124 -0
  220. bioflowkit-0.2.0/examples/pipeline_demo.py +80 -0
  221. bioflowkit-0.2.0/examples/recipes_quickstart.py +119 -0
  222. bioflowkit-0.2.0/examples/stage_demo.py +82 -0
  223. bioflowkit-0.2.0/mkdocs.yml +62 -0
  224. bioflowkit-0.2.0/pyproject.toml +83 -0
  225. bioflowkit-0.2.0/registry/presets/README.md +41 -0
  226. bioflowkit-0.2.0/registry/presets/atac_seq_standard.yaml +22 -0
  227. bioflowkit-0.2.0/registry/presets/chip_seq_standard.yaml +22 -0
  228. bioflowkit-0.2.0/registry/presets/eukaryote_denovo_hifi.yaml +29 -0
  229. bioflowkit-0.2.0/registry/presets/eukaryote_denovo_hybrid.yaml +29 -0
  230. bioflowkit-0.2.0/registry/presets/eukaryote_resequencing.yaml +30 -0
  231. bioflowkit-0.2.0/registry/presets/metagenomics_kraken2_standard.yaml +22 -0
  232. bioflowkit-0.2.0/registry/presets/metagenomics_metaphlan4_standard.yaml +21 -0
  233. bioflowkit-0.2.0/registry/presets/methylation_bismark_wgbs.yaml +20 -0
  234. bioflowkit-0.2.0/registry/presets/prokaryote_denovo_hybrid.yaml +30 -0
  235. bioflowkit-0.2.0/registry/presets/prokaryote_denovo_short.yaml +31 -0
  236. bioflowkit-0.2.0/registry/presets/proteomics_msfragger_dda.yaml +23 -0
  237. bioflowkit-0.2.0/registry/presets/rnaseq_deseq2_standard.yaml +23 -0
  238. bioflowkit-0.2.0/registry/presets/scrna_seq_10x_scanpy.yaml +23 -0
  239. bioflowkit-0.2.0/registry/presets/scrna_seq_10x_seurat.yaml +21 -0
  240. bioflowkit-0.2.0/registry/schema.yaml +150 -0
  241. bioflowkit-0.2.0/registry/tools/alignment/bedtools.yaml +27 -0
  242. bioflowkit-0.2.0/registry/tools/alignment/bowtie2.yaml +27 -0
  243. bioflowkit-0.2.0/registry/tools/alignment/bwa.yaml +27 -0
  244. bioflowkit-0.2.0/registry/tools/alignment/bwa_mem2.yaml +33 -0
  245. bioflowkit-0.2.0/registry/tools/alignment/minimap2.yaml +27 -0
  246. bioflowkit-0.2.0/registry/tools/alignment/samtools.yaml +29 -0
  247. bioflowkit-0.2.0/registry/tools/assembly/abyss.yaml +27 -0
  248. bioflowkit-0.2.0/registry/tools/assembly/canu.yaml +28 -0
  249. bioflowkit-0.2.0/registry/tools/assembly/flye.yaml +25 -0
  250. bioflowkit-0.2.0/registry/tools/assembly/hifiasm.yaml +27 -0
  251. bioflowkit-0.2.0/registry/tools/assembly/masurca.yaml +27 -0
  252. bioflowkit-0.2.0/registry/tools/assembly/medaka.yaml +27 -0
  253. bioflowkit-0.2.0/registry/tools/assembly/megahit.yaml +26 -0
  254. bioflowkit-0.2.0/registry/tools/assembly/nextdenovo.yaml +26 -0
  255. bioflowkit-0.2.0/registry/tools/assembly/nextpolish.yaml +26 -0
  256. bioflowkit-0.2.0/registry/tools/assembly/pilon.yaml +28 -0
  257. bioflowkit-0.2.0/registry/tools/assembly/racon.yaml +27 -0
  258. bioflowkit-0.2.0/registry/tools/assembly/raven.yaml +28 -0
  259. bioflowkit-0.2.0/registry/tools/assembly/shasta.yaml +27 -0
  260. bioflowkit-0.2.0/registry/tools/assembly/spades.yaml +27 -0
  261. bioflowkit-0.2.0/registry/tools/assembly/unicycler.yaml +24 -0
  262. bioflowkit-0.2.0/registry/tools/assembly/verkko.yaml +27 -0
  263. bioflowkit-0.2.0/registry/tools/assembly_qc/busco.yaml +26 -0
  264. bioflowkit-0.2.0/registry/tools/assembly_qc/checkm2.yaml +26 -0
  265. bioflowkit-0.2.0/registry/tools/assembly_qc/compleasm.yaml +27 -0
  266. bioflowkit-0.2.0/registry/tools/assembly_qc/gfastats.yaml +26 -0
  267. bioflowkit-0.2.0/registry/tools/assembly_qc/merqury.yaml +24 -0
  268. bioflowkit-0.2.0/registry/tools/assembly_qc/quast.yaml +25 -0
  269. bioflowkit-0.2.0/registry/tools/comparative_genomics/abricate.yaml +25 -0
  270. bioflowkit-0.2.0/registry/tools/comparative_genomics/cafe5.yaml +26 -0
  271. bioflowkit-0.2.0/registry/tools/comparative_genomics/diamond.yaml +26 -0
  272. bioflowkit-0.2.0/registry/tools/comparative_genomics/fastani.yaml +25 -0
  273. bioflowkit-0.2.0/registry/tools/comparative_genomics/iqtree.yaml +26 -0
  274. bioflowkit-0.2.0/registry/tools/comparative_genomics/mafft.yaml +24 -0
  275. bioflowkit-0.2.0/registry/tools/comparative_genomics/mash.yaml +28 -0
  276. bioflowkit-0.2.0/registry/tools/comparative_genomics/panaroo.yaml +27 -0
  277. bioflowkit-0.2.0/registry/tools/comparative_genomics/roary.yaml +25 -0
  278. bioflowkit-0.2.0/registry/tools/comparative_genomics/scoary.yaml +25 -0
  279. bioflowkit-0.2.0/registry/tools/comparative_genomics/skani.yaml +27 -0
  280. bioflowkit-0.2.0/registry/tools/deg/deseq2.yaml +36 -0
  281. bioflowkit-0.2.0/registry/tools/deg/edger.yaml +37 -0
  282. bioflowkit-0.2.0/registry/tools/deg/limma_voom.yaml +36 -0
  283. bioflowkit-0.2.0/registry/tools/enrichment/clusterprofiler.yaml +41 -0
  284. bioflowkit-0.2.0/registry/tools/enrichment/enrichr.yaml +29 -0
  285. bioflowkit-0.2.0/registry/tools/enrichment/gseapy.yaml +28 -0
  286. bioflowkit-0.2.0/registry/tools/enrichment/topgo.yaml +33 -0
  287. bioflowkit-0.2.0/registry/tools/epigenomics/bismark.yaml +28 -0
  288. bioflowkit-0.2.0/registry/tools/epigenomics/deeptools.yaml +29 -0
  289. bioflowkit-0.2.0/registry/tools/epigenomics/homer.yaml +27 -0
  290. bioflowkit-0.2.0/registry/tools/epigenomics/macs3.yaml +27 -0
  291. bioflowkit-0.2.0/registry/tools/epigenomics/methylkit.yaml +35 -0
  292. bioflowkit-0.2.0/registry/tools/epigenomics/methylpy.yaml +29 -0
  293. bioflowkit-0.2.0/registry/tools/epigenomics/picard.yaml +31 -0
  294. bioflowkit-0.2.0/registry/tools/epigenomics/tobias.yaml +28 -0
  295. bioflowkit-0.2.0/registry/tools/func_annot/antismash.yaml +27 -0
  296. bioflowkit-0.2.0/registry/tools/func_annot/dbcan.yaml +27 -0
  297. bioflowkit-0.2.0/registry/tools/func_annot/eggnog_mapper.yaml +27 -0
  298. bioflowkit-0.2.0/registry/tools/func_annot/gtdbtk.yaml +27 -0
  299. bioflowkit-0.2.0/registry/tools/func_annot/interproscan.yaml +25 -0
  300. bioflowkit-0.2.0/registry/tools/metagenomics/bracken.yaml +26 -0
  301. bioflowkit-0.2.0/registry/tools/metagenomics/humann3.yaml +27 -0
  302. bioflowkit-0.2.0/registry/tools/metagenomics/kneaddata.yaml +26 -0
  303. bioflowkit-0.2.0/registry/tools/metagenomics/kraken2.yaml +28 -0
  304. bioflowkit-0.2.0/registry/tools/metagenomics/lefse.yaml +27 -0
  305. bioflowkit-0.2.0/registry/tools/metagenomics/maxbin2.yaml +27 -0
  306. bioflowkit-0.2.0/registry/tools/metagenomics/metabat2.yaml +28 -0
  307. bioflowkit-0.2.0/registry/tools/metagenomics/metaphlan4.yaml +27 -0
  308. bioflowkit-0.2.0/registry/tools/proteomics/comet.yaml +26 -0
  309. bioflowkit-0.2.0/registry/tools/proteomics/fragpipe.yaml +34 -0
  310. bioflowkit-0.2.0/registry/tools/proteomics/maxquant.yaml +25 -0
  311. bioflowkit-0.2.0/registry/tools/proteomics/msconvert.yaml +27 -0
  312. bioflowkit-0.2.0/registry/tools/proteomics/msfragger.yaml +34 -0
  313. bioflowkit-0.2.0/registry/tools/proteomics/openms.yaml +27 -0
  314. bioflowkit-0.2.0/registry/tools/proteomics/percolator.yaml +27 -0
  315. bioflowkit-0.2.0/registry/tools/proteomics/xtandem.yaml +25 -0
  316. bioflowkit-0.2.0/registry/tools/qc/cutadapt.yaml +28 -0
  317. bioflowkit-0.2.0/registry/tools/qc/fastp.yaml +30 -0
  318. bioflowkit-0.2.0/registry/tools/qc/fastqc.yaml +26 -0
  319. bioflowkit-0.2.0/registry/tools/qc/filtlong.yaml +25 -0
  320. bioflowkit-0.2.0/registry/tools/qc/multiqc.yaml +26 -0
  321. bioflowkit-0.2.0/registry/tools/qc/nanoplot.yaml +25 -0
  322. bioflowkit-0.2.0/registry/tools/qc/seqkit.yaml +26 -0
  323. bioflowkit-0.2.0/registry/tools/qc/trimgalore.yaml +28 -0
  324. bioflowkit-0.2.0/registry/tools/repeat/earlgrey.yaml +25 -0
  325. bioflowkit-0.2.0/registry/tools/repeat/repeatmasker.yaml +26 -0
  326. bioflowkit-0.2.0/registry/tools/repeat/repeatmodeler.yaml +25 -0
  327. bioflowkit-0.2.0/registry/tools/rnaseq_align/hisat2.yaml +30 -0
  328. bioflowkit-0.2.0/registry/tools/rnaseq_align/kallisto.yaml +26 -0
  329. bioflowkit-0.2.0/registry/tools/rnaseq_align/rsem.yaml +27 -0
  330. bioflowkit-0.2.0/registry/tools/rnaseq_align/salmon.yaml +29 -0
  331. bioflowkit-0.2.0/registry/tools/rnaseq_align/star.yaml +33 -0
  332. bioflowkit-0.2.0/registry/tools/rnaseq_align/stringtie.yaml +27 -0
  333. bioflowkit-0.2.0/registry/tools/rnaseq_align/subread.yaml +27 -0
  334. bioflowkit-0.2.0/registry/tools/single_cell/bustools.yaml +29 -0
  335. bioflowkit-0.2.0/registry/tools/single_cell/cellranger.yaml +27 -0
  336. bioflowkit-0.2.0/registry/tools/single_cell/harmony.yaml +30 -0
  337. bioflowkit-0.2.0/registry/tools/single_cell/monocle3.yaml +34 -0
  338. bioflowkit-0.2.0/registry/tools/single_cell/scanpy.yaml +39 -0
  339. bioflowkit-0.2.0/registry/tools/single_cell/scrublet.yaml +29 -0
  340. bioflowkit-0.2.0/registry/tools/single_cell/seurat.yaml +38 -0
  341. bioflowkit-0.2.0/registry/tools/single_cell/starsolo.yaml +29 -0
  342. bioflowkit-0.2.0/registry/tools/struct_annot/augustus.yaml +27 -0
  343. bioflowkit-0.2.0/registry/tools/struct_annot/bakta.yaml +26 -0
  344. bioflowkit-0.2.0/registry/tools/struct_annot/braker3.yaml +27 -0
  345. bioflowkit-0.2.0/registry/tools/struct_annot/liftoff.yaml +27 -0
  346. bioflowkit-0.2.0/registry/tools/struct_annot/prokka.yaml +26 -0
  347. bioflowkit-0.2.0/registry/tools/variant_calling/bcftools.yaml +28 -0
  348. bioflowkit-0.2.0/registry/tools/variant_calling/freebayes.yaml +27 -0
  349. bioflowkit-0.2.0/registry/tools/variant_calling/gatk4.yaml +27 -0
  350. bioflowkit-0.2.0/registry/tools/variant_calling/snpeff.yaml +27 -0
  351. bioflowkit-0.2.0/scripts/gen_docs.py +97 -0
  352. bioflowkit-0.2.0/scripts/install-schedule-cron-daily.sh +45 -0
  353. bioflowkit-0.2.0/scripts/install-schedule-cron-weekly.sh +47 -0
  354. bioflowkit-0.2.0/scripts/install-schedule-cron.sh +68 -0
  355. bioflowkit-0.2.0/scripts/install-schedule-daily.ps1 +52 -0
  356. bioflowkit-0.2.0/scripts/install-schedule-weekly.ps1 +54 -0
  357. bioflowkit-0.2.0/scripts/install-schedule-windows.ps1 +106 -0
  358. bioflowkit-0.2.0/scripts/pin_digests.py +309 -0
  359. bioflowkit-0.2.0/tests/__init__.py +0 -0
  360. bioflowkit-0.2.0/tests/e2e/__init__.py +0 -0
  361. bioflowkit-0.2.0/tests/e2e/test_prokaryote_short.py +131 -0
  362. bioflowkit-0.2.0/tests/e2e/test_rnaseq.py +91 -0
  363. bioflowkit-0.2.0/tests/fixtures/hypo_assembler.yaml +39 -0
  364. bioflowkit-0.2.0/tests/integration/__init__.py +0 -0
  365. bioflowkit-0.2.0/tests/integration/test_docker_backend.py +191 -0
  366. bioflowkit-0.2.0/tests/integration/test_recipe_real_data.py +98 -0
  367. bioflowkit-0.2.0/tests/integration/test_recipe_smoke_matrix.py +233 -0
  368. bioflowkit-0.2.0/tests/integration/test_sdk_real_docker.py +135 -0
  369. bioflowkit-0.2.0/tests/unit/test_approve.py +280 -0
  370. bioflowkit-0.2.0/tests/unit/test_benchmark.py +107 -0
  371. bioflowkit-0.2.0/tests/unit/test_bugfixes.py +878 -0
  372. bioflowkit-0.2.0/tests/unit/test_crossplatform.py +195 -0
  373. bioflowkit-0.2.0/tests/unit/test_dag.py +56 -0
  374. bioflowkit-0.2.0/tests/unit/test_db.py +122 -0
  375. bioflowkit-0.2.0/tests/unit/test_digest_pinning.py +204 -0
  376. bioflowkit-0.2.0/tests/unit/test_doctor.py +268 -0
  377. bioflowkit-0.2.0/tests/unit/test_failure_report.py +122 -0
  378. bioflowkit-0.2.0/tests/unit/test_freshness_check.py +187 -0
  379. bioflowkit-0.2.0/tests/unit/test_interactive.py +206 -0
  380. bioflowkit-0.2.0/tests/unit/test_io.py +311 -0
  381. bioflowkit-0.2.0/tests/unit/test_llm.py +155 -0
  382. bioflowkit-0.2.0/tests/unit/test_llm_audit.py +253 -0
  383. bioflowkit-0.2.0/tests/unit/test_llm_diagnose.py +225 -0
  384. bioflowkit-0.2.0/tests/unit/test_llm_setup.py +284 -0
  385. bioflowkit-0.2.0/tests/unit/test_ncbi.py +487 -0
  386. bioflowkit-0.2.0/tests/unit/test_planner.py +162 -0
  387. bioflowkit-0.2.0/tests/unit/test_planner_eukaryote.py +152 -0
  388. bioflowkit-0.2.0/tests/unit/test_planner_rnaseq.py +103 -0
  389. bioflowkit-0.2.0/tests/unit/test_recipe_cli_args.py +126 -0
  390. bioflowkit-0.2.0/tests/unit/test_recipe_registry_alignment.py +84 -0
  391. bioflowkit-0.2.0/tests/unit/test_recipes.py +180 -0
  392. bioflowkit-0.2.0/tests/unit/test_recipes_cookbook.py +153 -0
  393. bioflowkit-0.2.0/tests/unit/test_recipes_per_pipeline.py +60 -0
  394. bioflowkit-0.2.0/tests/unit/test_recipes_per_pipeline_e2e.py +240 -0
  395. bioflowkit-0.2.0/tests/unit/test_recommend.py +90 -0
  396. bioflowkit-0.2.0/tests/unit/test_registry_resolver.py +29 -0
  397. bioflowkit-0.2.0/tests/unit/test_registry_sanity.py +132 -0
  398. bioflowkit-0.2.0/tests/unit/test_release_watch.py +150 -0
  399. bioflowkit-0.2.0/tests/unit/test_report.py +173 -0
  400. bioflowkit-0.2.0/tests/unit/test_report_builder.py +208 -0
  401. bioflowkit-0.2.0/tests/unit/test_run_resume.py +230 -0
  402. bioflowkit-0.2.0/tests/unit/test_runner.py +82 -0
  403. bioflowkit-0.2.0/tests/unit/test_sdk_cache.py +342 -0
  404. bioflowkit-0.2.0/tests/unit/test_sdk_external_mounts.py +149 -0
  405. bioflowkit-0.2.0/tests/unit/test_sdk_parallel.py +231 -0
  406. bioflowkit-0.2.0/tests/unit/test_sdk_pipeline.py +286 -0
  407. bioflowkit-0.2.0/tests/unit/test_sdk_retry.py +234 -0
  408. bioflowkit-0.2.0/tests/unit/test_sdk_stage.py +256 -0
  409. bioflowkit-0.2.0/tests/unit/test_sdk_streaming.py +128 -0
  410. bioflowkit-0.2.0/tests/unit/test_skeleton.py +34 -0
  411. bioflowkit-0.2.0/tests/unit/test_update_auto.py +237 -0
  412. bioflowkit-0.2.0/update/REGISTRY_CHANGELOG.md +43 -0
  413. bioflowkit-0.2.0/update/benchmark.py +309 -0
  414. bioflowkit-0.2.0/update/freshness_check.py +396 -0
  415. bioflowkit-0.2.0/update/release_watch.py +300 -0
@@ -0,0 +1,111 @@
1
+ name: Candidate smoke test
2
+
3
+ # T5 (event-driven) cadence: when a PR adds/edits files under
4
+ # update/candidates/, run `bioflow update auto` against just those
5
+ # changes and post a comment summarising the per-candidate results.
6
+ #
7
+ # This catches malformed candidate YAMLs and image-pull failures
8
+ # before a maintainer merges, so the monthly cron never has to deal
9
+ # with broken candidates.
10
+
11
+ on:
12
+ pull_request:
13
+ paths:
14
+ - 'update/candidates/**'
15
+
16
+ permissions:
17
+ contents: read
18
+ pull-requests: write # for the summary comment
19
+
20
+ jobs:
21
+ smoke:
22
+ name: Validate + smoke-test changed candidates
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - name: Checkout PR head
27
+ uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 2 # we need base + head to compute the diff
30
+
31
+ - name: Identify changed candidate dirs
32
+ id: diff
33
+ run: |
34
+ # Discover which YYYY-MM dirs under update/candidates/ were touched.
35
+ # We pass each to `bioflow update auto --candidates-dir`.
36
+ changed=$(git diff --name-only HEAD^ HEAD -- 'update/candidates/**/*.yaml' \
37
+ | xargs -I{} dirname {} \
38
+ | sort -u \
39
+ | tr '\n' ' ')
40
+ echo "dirs=$changed" >> "$GITHUB_OUTPUT"
41
+ if [ -z "$changed" ]; then
42
+ echo "No candidate YAMLs changed; nothing to do."
43
+ else
44
+ echo "Changed candidate dirs: $changed"
45
+ fi
46
+
47
+ - name: Set up Python
48
+ if: steps.diff.outputs.dirs != ''
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.12"
52
+
53
+ - name: Install bioflow
54
+ if: steps.diff.outputs.dirs != ''
55
+ run: pip install -e ".[dev]"
56
+
57
+ - name: Run smoke test on each changed candidate dir
58
+ if: steps.diff.outputs.dirs != ''
59
+ id: smoke
60
+ run: |
61
+ set -euo pipefail
62
+ mkdir -p _ci_reports
63
+ rc=0
64
+ for d in ${{ steps.diff.outputs.dirs }}; do
65
+ echo "==> Smoke-testing $d"
66
+ base=$(basename "$d")
67
+ bioflow update auto --candidates-dir "$d" \
68
+ --report "_ci_reports/$base.json" || rc=$?
69
+ done
70
+ echo "exitcode=$rc" >> "$GITHUB_OUTPUT"
71
+
72
+ - name: Upload reports
73
+ if: steps.diff.outputs.dirs != ''
74
+ uses: actions/upload-artifact@v4
75
+ with:
76
+ name: candidate-smoke-reports
77
+ path: _ci_reports/
78
+
79
+ - name: Comment summary on PR
80
+ if: steps.diff.outputs.dirs != ''
81
+ uses: actions/github-script@v7
82
+ with:
83
+ script: |
84
+ const fs = require('fs');
85
+ const path = require('path');
86
+ const dir = '_ci_reports';
87
+ if (!fs.existsSync(dir)) return;
88
+ let body = '## bioflow candidate smoke test\n\n';
89
+ for (const f of fs.readdirSync(dir)) {
90
+ const data = JSON.parse(fs.readFileSync(path.join(dir, f), 'utf-8'));
91
+ body += `### \`${f.replace(/\.json$/, '')}\`\n\n`;
92
+ body += `- scanned: **${data.candidates_scanned}**\n`;
93
+ body += `- auto_approve: ${data.auto_approve}\n`;
94
+ body += `- real docker: ${data.real_docker}\n\n`;
95
+ body += '| candidate | passed | error |\n|---|---|---|\n';
96
+ for (const r of (data.results || [])) {
97
+ const ok = r.passed ? '✅' : '❌';
98
+ body += `| \`${path.basename(r.candidate)}\` | ${ok} | ${(r.error || '').slice(0, 80)} |\n`;
99
+ }
100
+ body += '\n';
101
+ }
102
+ github.rest.issues.createComment({
103
+ issue_number: context.issue.number,
104
+ owner: context.repo.owner,
105
+ repo: context.repo.repo,
106
+ body,
107
+ });
108
+
109
+ - name: Fail if any candidate failed
110
+ if: steps.diff.outputs.dirs != '' && steps.smoke.outputs.exitcode != '0'
111
+ run: exit ${{ steps.smoke.outputs.exitcode }}
@@ -0,0 +1,130 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ unit-tests:
11
+ name: Unit tests (Python ${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ matrix:
15
+ python-version: ["3.9", "3.11", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install dependencies
26
+ run: pip install -e ".[dev]"
27
+
28
+ - name: Run unit tests
29
+ run: python -m pytest tests/unit -q --tb=short
30
+
31
+ lint:
32
+ name: Lint (ruff)
33
+ runs-on: ubuntu-latest
34
+
35
+ steps:
36
+ - uses: actions/checkout@v4
37
+
38
+ - name: Set up Python
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.12"
42
+
43
+ - name: Install ruff
44
+ run: pip install ruff
45
+
46
+ - name: Run ruff
47
+ run: ruff check .
48
+
49
+ typecheck:
50
+ # Advisory only — mypy issues do not block merging. Provides
51
+ # visibility while we gradually add type annotations. Will be
52
+ # upgraded to a blocking check when the error count reaches 0.
53
+ name: Type check (mypy, advisory)
54
+ runs-on: ubuntu-latest
55
+ continue-on-error: true
56
+
57
+ steps:
58
+ - uses: actions/checkout@v4
59
+
60
+ - name: Set up Python
61
+ uses: actions/setup-python@v5
62
+ with:
63
+ python-version: "3.12"
64
+
65
+ - name: Install mypy
66
+ run: pip install -e ".[dev]" && pip install types-PyYAML types-requests
67
+
68
+ - name: Run mypy
69
+ run: mypy bioflow --ignore-missing-imports || true
70
+
71
+ registry-schema:
72
+ name: Validate registry YAMLs
73
+ runs-on: ubuntu-latest
74
+
75
+ steps:
76
+ - uses: actions/checkout@v4
77
+
78
+ - name: Set up Python
79
+ uses: actions/setup-python@v5
80
+ with:
81
+ python-version: "3.12"
82
+
83
+ - name: Install dependencies
84
+ run: pip install -e ".[dev]"
85
+
86
+ - name: Validate all tool YAMLs against schema
87
+ run: |
88
+ python - <<'EOF'
89
+ import yaml, jsonschema, pathlib, sys
90
+
91
+ schema = yaml.safe_load(
92
+ pathlib.Path("registry/schema.yaml").read_text(encoding="utf-8")
93
+ )
94
+ errors = []
95
+ for p in pathlib.Path("registry/tools").rglob("*.yaml"):
96
+ doc = yaml.safe_load(p.read_text(encoding="utf-8"))
97
+ # strip update_meta before validating
98
+ doc.pop("update_meta", None)
99
+ try:
100
+ jsonschema.validate(doc, schema)
101
+ except jsonschema.ValidationError as e:
102
+ errors.append(f"{p}: {e.message}")
103
+ if errors:
104
+ for err in errors:
105
+ print("FAIL:", err, file=sys.stderr)
106
+ sys.exit(1)
107
+ print(f"OK — {len(list(pathlib.Path('registry/tools').rglob('*.yaml')))} tool YAMLs validated")
108
+ EOF
109
+
110
+ digest-audit:
111
+ # Advisory until enough tools are pinned — prints the missing-digest
112
+ # count and lists the first ~30 unpinned tools. Upgrade to a blocking
113
+ # check (drop `|| true`) once the bulk of the registry is pinned.
114
+ name: Container digest pin audit (advisory)
115
+ runs-on: ubuntu-latest
116
+ continue-on-error: true
117
+
118
+ steps:
119
+ - uses: actions/checkout@v4
120
+
121
+ - name: Set up Python
122
+ uses: actions/setup-python@v5
123
+ with:
124
+ python-version: "3.12"
125
+
126
+ - name: Install dependencies
127
+ run: pip install pyyaml
128
+
129
+ - name: Count missing image_digest entries
130
+ run: python scripts/pin_digests.py --audit || true
@@ -0,0 +1,62 @@
1
+ name: Docs
2
+
3
+ # Build the MkDocs site and deploy it to GitHub Pages on every push to
4
+ # main. The tools/recipes reference pages are regenerated from the
5
+ # registry first so the published docs always match the code.
6
+
7
+ on:
8
+ push:
9
+ branches: [main]
10
+ paths:
11
+ - 'docs/**'
12
+ - 'mkdocs.yml'
13
+ - 'registry/**'
14
+ - 'bioflow/recipes/**'
15
+ - 'scripts/gen_docs.py'
16
+ - '.github/workflows/docs.yml'
17
+ workflow_dispatch:
18
+
19
+ permissions:
20
+ contents: read
21
+ pages: write
22
+ id-token: write
23
+
24
+ # Allow one concurrent deployment; cancel in-progress runs of this group
25
+ concurrency:
26
+ group: pages
27
+ cancel-in-progress: true
28
+
29
+ jobs:
30
+ build:
31
+ runs-on: ubuntu-latest
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+
35
+ - uses: actions/setup-python@v5
36
+ with:
37
+ python-version: "3.12"
38
+
39
+ - name: Install bioflow + docs deps
40
+ run: pip install -e ".[docs]"
41
+
42
+ - name: Regenerate reference pages from the registry
43
+ run: python scripts/gen_docs.py
44
+
45
+ - name: Build site (strict)
46
+ run: mkdocs build --strict
47
+
48
+ - name: Upload Pages artifact
49
+ uses: actions/upload-pages-artifact@v3
50
+ with:
51
+ path: site
52
+
53
+ deploy:
54
+ needs: build
55
+ runs-on: ubuntu-latest
56
+ environment:
57
+ name: github-pages
58
+ url: ${{ steps.deployment.outputs.page_url }}
59
+ steps:
60
+ - name: Deploy to GitHub Pages
61
+ id: deployment
62
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,58 @@
1
+ name: Nightly recipe smoke
2
+
3
+ # Real-container smoke tests for the recipe matrix. Runs once a day at
4
+ # 03:00 UTC and on manual dispatch. Failures are loud (workflow goes red
5
+ # in the README badge) but never block PR merges — see ci.yml for the
6
+ # unit-test gate.
7
+
8
+ on:
9
+ schedule:
10
+ - cron: "0 3 * * *"
11
+ workflow_dispatch:
12
+
13
+ jobs:
14
+ smoke-matrix:
15
+ name: Recipe smoke matrix
16
+ runs-on: ubuntu-latest
17
+ # Each recipe pulls 50–600 MB and runs for ~30s; ~10 entries fits well
18
+ # inside this limit even on a cold runner.
19
+ timeout-minutes: 45
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: Set up Python
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: "3.12"
28
+
29
+ - name: Confirm Docker is up on the runner
30
+ run: |
31
+ docker --version
32
+ docker info | head -20
33
+
34
+ - name: Install bioflow with dev extras
35
+ run: pip install -e ".[dev]"
36
+
37
+ - name: Run smoke matrix (real BioContainers)
38
+ env:
39
+ # The smoke matrix is Docker-gated; reduce log noise.
40
+ BIOFLOW_LOG_LEVEL: INFO
41
+ run: |
42
+ python -m pytest tests/integration/test_recipe_smoke_matrix.py \
43
+ -v -m docker \
44
+ --junitxml=reports/smoke.xml
45
+
46
+ - name: Upload junit report
47
+ if: always()
48
+ uses: actions/upload-artifact@v4
49
+ with:
50
+ name: smoke-junit
51
+ path: reports/smoke.xml
52
+
53
+ - name: Fail the job on red results
54
+ if: failure()
55
+ run: |
56
+ echo "::error::Nightly recipe smoke matrix failed — inspect the junit"
57
+ echo " artifact and the per-recipe stderr in the log above."
58
+ exit 1
@@ -0,0 +1,179 @@
1
+ name: Release to PyPI
2
+
3
+ # Triggered by pushing a v* tag. Pipeline:
4
+ # build → sdist + wheel + twine check, artifacts uploaded
5
+ # testpypi → publish to TestPyPI for one final smoke check
6
+ # pypi → publish to PyPI
7
+ # github → cut a GitHub Release with the artifacts attached
8
+ #
9
+ # Authentication uses PyPI Trusted Publishing (OIDC) — no long-lived
10
+ # tokens. Setup steps live in docs/MAINTAINER.md.
11
+ #
12
+ # Manual override: workflow_dispatch lets the maintainer re-run a stage
13
+ # without recreating the tag (e.g. retry pypi after fixing a TestPyPI
14
+ # issue).
15
+
16
+ on:
17
+ push:
18
+ tags:
19
+ - "v*.*.*" # v0.2.0, v0.2.1, v1.0.0, …
20
+ workflow_dispatch:
21
+
22
+ permissions:
23
+ contents: read
24
+
25
+ jobs:
26
+ build:
27
+ name: Build sdist + wheel
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+ with:
32
+ fetch-depth: 0 # for setuptools_scm-style version lookups (future)
33
+
34
+ - uses: actions/setup-python@v5
35
+ with:
36
+ python-version: "3.12"
37
+
38
+ - name: Install build + twine
39
+ run: pip install --upgrade build twine
40
+
41
+ - name: Sanity-check version match
42
+ # Refuse to build if pyproject.toml and bioflow.__version__ have
43
+ # drifted. Caught a real bug in 0.1.13.
44
+ run: |
45
+ python - <<'PY'
46
+ import tomllib, pathlib, re, sys
47
+ pp = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
48
+ py_version = pp["project"]["version"]
49
+ init = pathlib.Path("bioflow/__init__.py").read_text(encoding="utf-8")
50
+ m = re.search(r'__version__\s*=\s*"([^"]+)"', init)
51
+ if not m or m.group(1) != py_version:
52
+ print(f"FAIL: pyproject={py_version} but __version__={m.group(1) if m else 'missing'}", file=sys.stderr)
53
+ sys.exit(1)
54
+ print(f"OK — both report {py_version}")
55
+ PY
56
+
57
+ - name: Build sdist + wheel
58
+ run: python -m build
59
+
60
+ - name: twine check
61
+ run: python -m twine check dist/*
62
+
63
+ - name: Verify wheel ships sdk/, cli/, _bundled_registry/
64
+ # Guard against pyproject.toml regressions that would drop the
65
+ # bundled registry or one of the freshly-split packages. The
66
+ # glob matches whatever PyPI distribution name we ship under
67
+ # (`bioflowkit`, `bioflow`, etc.) — there is only ever one
68
+ # wheel per release in the artifact directory.
69
+ run: |
70
+ python - <<'PY'
71
+ import zipfile, glob, sys
72
+ wheels = glob.glob("dist/*.whl")
73
+ if not wheels:
74
+ print(f"FAIL: no wheel under dist/ (have: {sorted(glob.glob('dist/*'))})",
75
+ file=sys.stderr)
76
+ sys.exit(1)
77
+ whl = wheels[0]
78
+ names = zipfile.ZipFile(whl).namelist()
79
+ required_prefixes = (
80
+ "bioflow/sdk/",
81
+ "bioflow/cli/",
82
+ "bioflow/_bundled_registry/tools/",
83
+ )
84
+ missing = [p for p in required_prefixes
85
+ if not any(n.startswith(p) for n in names)]
86
+ if missing:
87
+ print(f"FAIL: {whl} missing prefixes {missing}", file=sys.stderr)
88
+ sys.exit(1)
89
+ print(f"OK — {whl} ships {len(names)} entries with all required prefixes")
90
+ PY
91
+
92
+ - name: Upload dist as artifact
93
+ uses: actions/upload-artifact@v4
94
+ with:
95
+ name: python-dist
96
+ path: dist/
97
+ if-no-files-found: error
98
+
99
+ testpypi:
100
+ name: Publish to TestPyPI
101
+ needs: build
102
+ runs-on: ubuntu-latest
103
+ environment:
104
+ name: testpypi
105
+ url: https://test.pypi.org/p/bioflowkit
106
+ permissions:
107
+ id-token: write # OIDC token for trusted publishing
108
+ steps:
109
+ - name: Download dist artifact
110
+ uses: actions/download-artifact@v4
111
+ with:
112
+ name: python-dist
113
+ path: dist/
114
+
115
+ - name: Publish to TestPyPI
116
+ uses: pypa/gh-action-pypi-publish@release/v1
117
+ with:
118
+ repository-url: https://test.pypi.org/legacy/
119
+
120
+ pypi:
121
+ name: Publish to PyPI
122
+ needs: testpypi
123
+ runs-on: ubuntu-latest
124
+ environment:
125
+ name: pypi
126
+ url: https://pypi.org/p/bioflowkit
127
+ permissions:
128
+ id-token: write
129
+ steps:
130
+ - name: Download dist artifact
131
+ uses: actions/download-artifact@v4
132
+ with:
133
+ name: python-dist
134
+ path: dist/
135
+
136
+ - name: Publish to PyPI
137
+ uses: pypa/gh-action-pypi-publish@release/v1
138
+
139
+ github:
140
+ name: Cut GitHub Release
141
+ needs: pypi
142
+ runs-on: ubuntu-latest
143
+ permissions:
144
+ contents: write # required to create a release
145
+ steps:
146
+ - uses: actions/checkout@v4
147
+
148
+ - name: Download dist artifact
149
+ uses: actions/download-artifact@v4
150
+ with:
151
+ name: python-dist
152
+ path: dist/
153
+
154
+ - name: Extract release notes for this tag
155
+ # Slice the CHANGELOG section whose header matches the tag (v0.2.0
156
+ # → look for "## [0.2.0]"). Falls back to "(see CHANGELOG.md)".
157
+ id: notes
158
+ run: |
159
+ TAG="${GITHUB_REF_NAME}"
160
+ VERSION="${TAG#v}"
161
+ python - <<PY > release_notes.md
162
+ import pathlib, re
163
+ src = pathlib.Path("CHANGELOG.md").read_text(encoding="utf-8")
164
+ pat = re.compile(rf"^## \[{re.escape('$VERSION')}\][^\n]*\n(.*?)(?=^## \[|\Z)",
165
+ re.MULTILINE | re.DOTALL)
166
+ m = pat.search(src)
167
+ print(m.group(1).strip() if m else "(see CHANGELOG.md for details)")
168
+ PY
169
+ echo "RELEASE_NOTES_PATH=release_notes.md" >> "$GITHUB_OUTPUT"
170
+
171
+ - name: Create GitHub Release
172
+ uses: softprops/action-gh-release@v2
173
+ with:
174
+ name: ${{ github.ref_name }}
175
+ body_path: release_notes.md
176
+ files: dist/*
177
+ generate_release_notes: false # we provided our own
178
+ draft: false
179
+ prerelease: ${{ contains(github.ref_name, '-') }} # e.g. v0.2.0-rc1
@@ -0,0 +1,100 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .venv/
6
+ .pytest_cache/
7
+ .mypy_cache/
8
+ .ruff_cache/
9
+
10
+ # Bioflow runtime
11
+ .bioflow_state.json
12
+ data/test/**/out/
13
+ data/references/dbs/**
14
+ data/references/genomes/**
15
+ !data/references/.gitkeep
16
+ !data/references/dbs/.gitkeep
17
+ !data/references/genomes/.gitkeep
18
+
19
+ # OS
20
+ .DS_Store
21
+ Thumbs.db
22
+
23
+ # Comparative genomics analysis outputs (regeneratable)
24
+ analysis/*/genomes/
25
+ analysis/*/genomes_full/
26
+ analysis/*/inputs/
27
+ analysis/*/prokka/
28
+ analysis/*/roary/
29
+ analysis/*/fastani/
30
+ analysis/*/fastani_full/ani_full.tsv
31
+ analysis/*/scoary/results/
32
+ analysis/*/run.log
33
+ analysis/*/full_ani.log
34
+ analysis/*/download_full.log
35
+ analysis/*/fastani_full/genomes.txt
36
+ # IQ-TREE intermediate files (keep only the *.treefile)
37
+ analysis/*/phylogeny/*.bionj
38
+ analysis/*/phylogeny/*.iqtree
39
+ analysis/*/phylogeny/*.log
40
+ analysis/*/phylogeny/*.mldist
41
+ analysis/*/phylogeny/*.splits.nex
42
+ analysis/*/phylogeny/*.uniqueseq.phy
43
+ analysis/*/phylogeny/*.contree
44
+ analysis/*/phylogeny/*.ckp.gz
45
+ analysis/*/phylogeny/*.model.gz
46
+ analysis/*/prokka_full/
47
+ analysis/*/roary_full/
48
+ analysis/*/full_pangenome.log
49
+ analysis/*/scoary_full/results/
50
+ analysis/*/eggnog/cog_db/
51
+ analysis/*/eggnog/results/pangenome_vs_cog.tsv
52
+ analysis/*/eggnog/db/
53
+ analysis/*/eggnog/run.log
54
+ analysis/*/eggnog/pangenome_reps.faa
55
+ analysis/*/eggnog/pangenome_reps.tsv
56
+ analysis/*/abricate_full/
57
+ analysis/*/abricate_full.log
58
+ analysis/*/phylogeny_full/_genes_unaligned/
59
+ analysis/*/phylogeny_full/_genes_aligned/
60
+ analysis/*/phylogeny_full/core_supermatrix.fna
61
+ analysis/*/phylogeny_full/iqtree.log
62
+ analysis/*/phylogeny_full/iqtree_full.iqtree
63
+ analysis/*/phylogeny_full/iqtree_full.log
64
+ analysis/*/phylogeny_full/iqtree_full.mldist
65
+ analysis/*/phylogeny_full/iqtree_full.splits.nex
66
+ analysis/*/phylogeny_full/iqtree_full.uniqueseq.phy
67
+ analysis/*/phylogeny_full/iqtree_full.bionj
68
+ analysis/*/phylogeny_full/iqtree_full.contree
69
+ analysis/*/phylogeny_full/iqtree_full.ckp.gz
70
+ analysis/*/cafe/results/Base_asr.tre
71
+ analysis/*/cafe/results/Base_branch_probabilities.tab
72
+ analysis/*/cafe/results/Base_change.tab
73
+ analysis/*/cafe/results/Base_count.tab
74
+ analysis/*/cafe/results/Base_report.cafe
75
+ analysis/*/cafe/tree_ultrametric.nwk
76
+ examples/_stage_demo_ws/
77
+ examples/_cache_demo_ws/
78
+ examples/_parallel_demo_ws/
79
+ examples/_pipeline_demo_ws/
80
+ analysis/*/pangenome_via_sdk/
81
+ analysis/*/pangenome_sdk_13/
82
+ analysis/*/sdk_13_run.log
83
+ analysis/*/.cache/
84
+ examples/_pectobacterium_ws/
85
+ examples/_pectobacterium_run.log
86
+
87
+ # Update scheduler outputs (per-machine, not for version control)
88
+ update/notifications/
89
+ update/last_run.json
90
+ update/release_watch_state.json
91
+ update/notifications/cron.log
92
+ update/notifications/release_watch.log
93
+
94
+ # Build artifacts
95
+ dist/
96
+ build/
97
+ *.egg-info/
98
+
99
+ # MkDocs build output
100
+ site/