ma-agents 3.12.2 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. package/bin/cli.js +11 -6
  2. package/docs/architecture.md +18 -0
  3. package/lib/bmad-cache/bmb/.claude-plugin/marketplace.json +1 -1
  4. package/lib/bmad-cache/bmb/_git_preserved/hooks/commit-msg.sample +52 -2
  5. package/lib/bmad-cache/bmb/_git_preserved/hooks/fsmonitor-watchman.sample +2 -8
  6. package/lib/bmad-cache/bmb/_git_preserved/index +0 -0
  7. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-8f8b045fef5af6911495cf3b2a89f1ed75e120f7.idx +0 -0
  8. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-8f8b045fef5af6911495cf3b2a89f1ed75e120f7.pack +0 -0
  9. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-8f8b045fef5af6911495cf3b2a89f1ed75e120f7.rev +0 -0
  10. package/lib/bmad-cache/bmb/_git_preserved/packed-refs +1 -1
  11. package/lib/bmad-cache/bmb/_git_preserved/refs/heads/main +1 -1
  12. package/lib/bmad-cache/bmb/_git_preserved/shallow +1 -1
  13. package/lib/bmad-cache/bmb/package-lock.json +2 -2
  14. package/lib/bmad-cache/bmb/package.json +1 -1
  15. package/lib/bmad-cache/bmb/samples/bmad-agent-dream-weaver/assets/module-help.csv +1 -1
  16. package/lib/bmad-cache/bmb/samples/bmad-agent-dream-weaver/scripts/merge-config.py +33 -0
  17. package/lib/bmad-cache/bmb/samples/bmad-agent-dream-weaver/scripts/merge-help-csv.py +28 -0
  18. package/lib/bmad-cache/bmb/samples/sample-module-setup/assets/module-help.csv +1 -1
  19. package/lib/bmad-cache/bmb/samples/sample-module-setup/scripts/cleanup-legacy.py +28 -0
  20. package/lib/bmad-cache/bmb/samples/sample-module-setup/scripts/merge-config.py +33 -0
  21. package/lib/bmad-cache/bmb/samples/sample-module-setup/scripts/merge-help-csv.py +28 -0
  22. package/lib/bmad-cache/bmb/skills/bmad-bmb-setup/assets/module-help.csv +1 -1
  23. package/lib/bmad-cache/bmb/skills/bmad-bmb-setup/scripts/cleanup-legacy.py +28 -0
  24. package/lib/bmad-cache/bmb/skills/bmad-bmb-setup/scripts/merge-config.py +33 -0
  25. package/lib/bmad-cache/bmb/skills/bmad-bmb-setup/scripts/merge-help-csv.py +28 -0
  26. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/assets/Dockerfile +29 -0
  27. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/docker_setup.py +115 -0
  28. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/generate_report.py +184 -0
  29. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/pty_runner.py +171 -0
  30. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/run_evals.py +492 -0
  31. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/run_triggers.py +366 -0
  32. package/lib/bmad-cache/bmb/skills/bmad-eval-runner/scripts/utils.py +260 -0
  33. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/assets/module-help.csv +1 -1
  34. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/scripts/cleanup-legacy.py +28 -0
  35. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/scripts/merge-config.py +33 -0
  36. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/setup-skill-template/scripts/merge-help-csv.py +28 -0
  37. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/standalone-module-template/merge-config.py +33 -0
  38. package/lib/bmad-cache/bmb/skills/bmad-module-builder/assets/standalone-module-template/merge-help-csv.py +28 -0
  39. package/lib/bmad-cache/bmb/skills/bmad-module-builder/scripts/tests/test-validate-module.py +74 -1
  40. package/lib/bmad-cache/bmb/skills/bmad-module-builder/scripts/validate-module.py +24 -13
  41. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/assets/sample-customize-product-brief.toml +48 -33
  42. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/scripts/extract-report-json.py +287 -0
  43. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/scripts/generate-html-report.py +57 -8
  44. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/scripts/prepass-prompt-metrics.py +7 -7
  45. package/lib/bmad-cache/bmb/skills/module-help.csv +1 -1
  46. package/lib/bmad-cache/bmb/website/public/img/eval-test-types.png +0 -0
  47. package/lib/bmad-cache/cache-manifest.json +17 -18
  48. package/lib/bmad-cache/cis/_git_preserved/hooks/commit-msg.sample +52 -2
  49. package/lib/bmad-cache/cis/_git_preserved/hooks/fsmonitor-watchman.sample +2 -8
  50. package/lib/bmad-cache/cis/_git_preserved/index +0 -0
  51. package/lib/bmad-cache/cis/_git_preserved/objects/pack/pack-18c8290560a98bcb7bf0676e6cc9b2ac5ca2823e.idx +0 -0
  52. package/lib/bmad-cache/cis/_git_preserved/objects/pack/{pack-42ffc048f54e58ce94c6331bc6be97ebbb7936f2.pack → pack-18c8290560a98bcb7bf0676e6cc9b2ac5ca2823e.pack} +0 -0
  53. package/lib/bmad-cache/cis/_git_preserved/objects/pack/pack-18c8290560a98bcb7bf0676e6cc9b2ac5ca2823e.rev +0 -0
  54. package/lib/bmad-cache/cis/_git_preserved/packed-refs +1 -1
  55. package/lib/bmad-cache/cis/_git_preserved/refs/heads/main +1 -1
  56. package/lib/bmad-cache/cis/_git_preserved/refs/tags/v0.2.1 +1 -0
  57. package/lib/bmad-cache/cis/_git_preserved/shallow +1 -1
  58. package/lib/bmad-cache/cis/package-lock.json +2 -2
  59. package/lib/bmad-cache/cis/package.json +1 -1
  60. package/lib/bmad-cache/cis/src/module-help.csv +1 -1
  61. package/lib/bmad-cache/gds/.claude-plugin/marketplace.json +4 -7
  62. package/lib/bmad-cache/gds/README.md +3 -1
  63. package/lib/bmad-cache/gds/_git_preserved/hooks/commit-msg.sample +52 -2
  64. package/lib/bmad-cache/gds/_git_preserved/hooks/fsmonitor-watchman.sample +2 -8
  65. package/lib/bmad-cache/gds/_git_preserved/index +0 -0
  66. package/lib/bmad-cache/gds/_git_preserved/objects/pack/pack-dcb7c556d9bb6b6b70d2301e094eaac6d7300552.idx +0 -0
  67. package/lib/bmad-cache/gds/_git_preserved/objects/pack/{pack-9427a146a90c00bb542cba038874bf9671ba4dc0.pack → pack-dcb7c556d9bb6b6b70d2301e094eaac6d7300552.pack} +0 -0
  68. package/lib/bmad-cache/gds/_git_preserved/objects/pack/pack-dcb7c556d9bb6b6b70d2301e094eaac6d7300552.rev +0 -0
  69. package/lib/bmad-cache/gds/_git_preserved/packed-refs +1 -1
  70. package/lib/bmad-cache/gds/_git_preserved/refs/heads/main +1 -1
  71. package/lib/bmad-cache/gds/_git_preserved/shallow +1 -1
  72. package/lib/bmad-cache/gds/package.json +1 -1
  73. package/lib/bmad-cache/gds/src/agents/gds-agent-game-designer/customize.toml +5 -5
  74. package/lib/bmad-cache/gds/src/agents/gds-agent-game-dev/customize.toml +5 -5
  75. package/lib/bmad-cache/gds/src/agents/gds-agent-game-solo-dev/customize.toml +0 -5
  76. package/lib/bmad-cache/gds/src/module-help.csv +6 -12
  77. package/lib/bmad-cache/gds/src/module.yaml +1 -1
  78. package/lib/bmad-cache/gds/src/workflows/1-preproduction/gds-create-game-brief/customize.toml +97 -22
  79. package/lib/bmad-cache/gds/src/workflows/2-design/gds-gdd/assets/validation-report-template.html +190 -0
  80. package/lib/bmad-cache/gds/src/workflows/2-design/gds-gdd/customize.toml +99 -0
  81. package/lib/bmad-cache/gds/src/workflows/2-design/gds-gdd/scripts/render-validation-html.py +290 -0
  82. package/lib/bmad-cache/gds/src/workflows/2-design/gds-prd/assets/validation-report-template.html +190 -0
  83. package/lib/bmad-cache/gds/src/workflows/2-design/gds-prd/customize.toml +84 -0
  84. package/lib/bmad-cache/gds/src/workflows/2-design/gds-ux/assets/validation-report-template.html +319 -0
  85. package/lib/bmad-cache/gds/src/workflows/2-design/gds-ux/customize.toml +101 -0
  86. package/lib/bmad-cache/gds/src/workflows/3-technical/gds-game-architecture/architecture-patterns.yaml +1 -0
  87. package/lib/bmad-cache/gds/src/workflows/3-technical/gds-game-architecture/decision-catalog.yaml +88 -0
  88. package/lib/bmad-cache/gds/src/workflows/3-technical/gds-game-architecture/engine-mcps.yaml +124 -2
  89. package/lib/bmad-cache/gds/src/workflows/4-production/gds-investigate/customize.toml +62 -0
  90. package/lib/bmad-cache/tea/.claude-plugin/marketplace.json +1 -1
  91. package/lib/bmad-cache/tea/.github/workflows/docs.yaml +3 -3
  92. package/lib/bmad-cache/tea/.github/workflows/quality.yaml +10 -10
  93. package/lib/bmad-cache/tea/AGENTS.md +31 -0
  94. package/lib/bmad-cache/tea/CHANGELOG.md +42 -1
  95. package/lib/bmad-cache/tea/README.md +8 -5
  96. package/lib/bmad-cache/tea/_git_preserved/hooks/commit-msg.sample +52 -2
  97. package/lib/bmad-cache/tea/_git_preserved/hooks/fsmonitor-watchman.sample +2 -8
  98. package/lib/bmad-cache/tea/_git_preserved/index +0 -0
  99. package/lib/bmad-cache/tea/_git_preserved/objects/pack/pack-9e4197e37df7763dd7a05c2965ee921dfd2eb617.idx +0 -0
  100. package/lib/bmad-cache/tea/_git_preserved/objects/pack/{pack-f0df537f2649464ff6c5aee241165eb9c8664227.pack → pack-9e4197e37df7763dd7a05c2965ee921dfd2eb617.pack} +0 -0
  101. package/lib/bmad-cache/tea/_git_preserved/objects/pack/pack-9e4197e37df7763dd7a05c2965ee921dfd2eb617.rev +0 -0
  102. package/lib/bmad-cache/tea/_git_preserved/packed-refs +1 -1
  103. package/lib/bmad-cache/tea/_git_preserved/refs/heads/main +1 -1
  104. package/lib/bmad-cache/tea/_git_preserved/refs/tags/v1.19.0 +1 -0
  105. package/lib/bmad-cache/tea/_git_preserved/shallow +1 -1
  106. package/lib/bmad-cache/tea/docs/explanation/engagement-models.md +15 -16
  107. package/lib/bmad-cache/tea/docs/explanation/knowledge-base-system.md +2 -0
  108. package/lib/bmad-cache/tea/docs/explanation/risk-based-testing.md +1 -1
  109. package/lib/bmad-cache/tea/docs/explanation/tea-overview.md +88 -52
  110. package/lib/bmad-cache/tea/docs/explanation/testing-as-engineering.md +13 -12
  111. package/lib/bmad-cache/tea/docs/glossary/index.md +2 -2
  112. package/lib/bmad-cache/tea/docs/how-to/brownfield/use-tea-for-enterprise.md +19 -18
  113. package/lib/bmad-cache/tea/docs/how-to/brownfield/use-tea-with-existing-tests.md +1 -1
  114. package/lib/bmad-cache/tea/docs/how-to/workflows/run-nfr-assess.md +32 -26
  115. package/lib/bmad-cache/tea/docs/how-to/workflows/run-test-design.md +20 -14
  116. package/lib/bmad-cache/tea/docs/how-to/workflows/run-trace.md +3 -3
  117. package/lib/bmad-cache/tea/docs/index.md +13 -11
  118. package/lib/bmad-cache/tea/docs/reference/commands.md +37 -13
  119. package/lib/bmad-cache/tea/docs/reference/knowledge-base.md +2 -2
  120. package/lib/bmad-cache/tea/package-lock.json +2 -2
  121. package/lib/bmad-cache/tea/package.json +1 -1
  122. package/lib/bmad-cache/tea/src/agents/bmad-tea/customize.toml +20 -15
  123. package/lib/bmad-cache/tea/src/agents/bmad-tea/resources/knowledge/confidence-gate.md +73 -0
  124. package/lib/bmad-cache/tea/src/agents/bmad-tea/resources/knowledge/test-quality.md +1 -0
  125. package/lib/bmad-cache/tea/src/agents/bmad-tea/resources/tea-index.csv +2 -1
  126. package/lib/bmad-cache/tea/src/module-help.csv +2 -2
  127. package/lib/bmad-cache/tea/src/workflows/testarch/README.md +5 -4
  128. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-teach-me-testing/data/role-paths.yaml +1 -1
  129. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-teach-me-testing/data/tea-resources-index.yaml +1 -1
  130. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-teach-me-testing/steps-c/step-04-session-01.md +2 -2
  131. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-teach-me-testing/steps-c/step-04-session-07.md +1 -1
  132. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-teach-me-testing/templates/certificate-template.md +1 -1
  133. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-atdd/resources/tea-index.csv +1 -1
  134. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-automate/resources/tea-index.csv +1 -1
  135. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-ci/resources/tea-index.csv +1 -1
  136. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-framework/resources/tea-index.csv +1 -1
  137. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/SKILL.md +3 -3
  138. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/checklist.md +11 -11
  139. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/instructions.md +4 -2
  140. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/nfr-report-template.md +5 -5
  141. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/resources/tea-index.csv +1 -1
  142. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-01-load-context.md +1 -1
  143. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-01b-resume.md +1 -1
  144. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-02-define-thresholds.md +14 -3
  145. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04-evaluate-and-score.md +7 -7
  146. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04a-subagent-security.md +4 -4
  147. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04b-subagent-performance.md +4 -4
  148. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04c-subagent-reliability.md +4 -4
  149. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04d-subagent-scalability.md +4 -4
  150. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-04e-aggregate-nfr.md +4 -4
  151. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/steps-c/step-05-generate-report.md +1 -1
  152. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/workflow-plan.md +1 -1
  153. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-nfr/workflow.yaml +3 -3
  154. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/checklist.md +23 -3
  155. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/resources/tea-index.csv +1 -1
  156. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/steps-c/step-02-load-context.md +7 -0
  157. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/steps-c/step-03-risk-and-testability.md +16 -2
  158. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/steps-c/step-04-coverage-plan.md +20 -4
  159. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/steps-c/step-05-generate-output.md +2 -0
  160. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/test-design-architecture-template.md +17 -0
  161. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/test-design-qa-template.md +15 -0
  162. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-design/test-design-template.md +16 -0
  163. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-test-review/resources/tea-index.csv +1 -1
  164. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-trace/checklist.md +1 -1
  165. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-trace/resources/tea-index.csv +1 -1
  166. package/lib/bmad-cache/tea/src/workflows/testarch/bmad-testarch-trace/trace-template.md +1 -1
  167. package/lib/bmad-cache/tea/test/test-installation-components.js +49 -0
  168. package/lib/bmad-cache/tea/website/astro.config.mjs +2 -2
  169. package/lib/bmad-cache/wds/README.md +1 -1
  170. package/lib/bmad-cache/wds/_git_preserved/hooks/commit-msg.sample +52 -2
  171. package/lib/bmad-cache/wds/_git_preserved/hooks/fsmonitor-watchman.sample +2 -8
  172. package/lib/bmad-cache/wds/_git_preserved/index +0 -0
  173. package/lib/bmad-cache/wds/_git_preserved/objects/pack/pack-656c3d8d5426e73043b6a7f45eedaab74e3c419e.idx +0 -0
  174. package/lib/bmad-cache/wds/_git_preserved/objects/pack/{pack-96877c1c09123cccb1f91c1412184b11d2b492ad.pack → pack-656c3d8d5426e73043b6a7f45eedaab74e3c419e.pack} +0 -0
  175. package/lib/bmad-cache/wds/_git_preserved/objects/pack/pack-656c3d8d5426e73043b6a7f45eedaab74e3c419e.rev +0 -0
  176. package/lib/bmad-cache/wds/_git_preserved/packed-refs +1 -1
  177. package/lib/bmad-cache/wds/_git_preserved/refs/heads/main +1 -1
  178. package/lib/bmad-cache/wds/_git_preserved/refs/tags/v0.4.3 +1 -0
  179. package/lib/bmad-cache/wds/_git_preserved/shallow +1 -1
  180. package/lib/bmad-cache/wds/eslint.config.mjs +1 -1
  181. package/lib/bmad-cache/wds/package.json +1 -1
  182. package/lib/bmad-cache/wds/src/agents/wds-agent-freya-ux/customize.toml +80 -0
  183. package/lib/bmad-cache/wds/src/agents/wds-agent-mimir-builder/customize.toml +52 -0
  184. package/lib/bmad-cache/wds/src/agents/wds-agent-saga-analyst/customize.toml +70 -0
  185. package/lib/bmad-cache/wds/src/module-help.csv +19 -19
  186. package/lib/bmad-cache/wds/src/module.yaml +28 -0
  187. package/lib/bmad-cache/wds/src/scripts/README.md +155 -0
  188. package/lib/bmad-cache/wds/src/scripts/wds-add-object.js +202 -0
  189. package/lib/bmad-cache/wds/src/scripts/wds-add-spacing.js +158 -0
  190. package/lib/bmad-cache/wds/src/scripts/wds-init-page.js +229 -0
  191. package/lib/bmad-cache/wds/src/scripts/wds-init-scenario.js +120 -0
  192. package/lib/bmad-cache/wds/src/scripts/wds-nav.js +201 -0
  193. package/lib/bmad-cache/wds/src/scripts/wds-validate.js +301 -0
  194. package/lib/bmad-cache/wds/src/workflows/wds-3-scenarios/workflow.xml +450 -0
  195. package/lib/bmad-cache/wds/src/workflows/wds-4-ux-design/workflow-specify.xml +387 -0
  196. package/lib/bmad-extension/.claude-plugin/marketplace.json.template +1 -1
  197. package/lib/bmad-extension-plugin/.claude-plugin/marketplace.json +2 -2
  198. package/lib/bmad.js +91 -7
  199. package/lib/installer.js +28 -6
  200. package/lib/mil498-templates/OCD.md +169 -169
  201. package/lib/mil498-templates/README.md +4 -4
  202. package/lib/mil498-templates/SDD.md +163 -163
  203. package/lib/mil498-templates/SDP.md +307 -307
  204. package/lib/mil498-templates/SRS.md +219 -219
  205. package/lib/mil498-templates/SSDD.md +154 -154
  206. package/lib/mil498-templates/SSS.md +225 -225
  207. package/lib/mil498-templates/STD.md +188 -188
  208. package/lib/templates/instruction-block-git.template.md +25 -0
  209. package/package.json +5 -4
  210. package/scripts/build-bmad-cache.js +143 -42
  211. package/skills/git-workflow-skill/skill.json +21 -21
  212. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-6ecd9fc6445b1281449c5ec49a6c5794708e662e.idx +0 -0
  213. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-6ecd9fc6445b1281449c5ec49a6c5794708e662e.pack +0 -0
  214. package/lib/bmad-cache/bmb/_git_preserved/objects/pack/pack-6ecd9fc6445b1281449c5ec49a6c5794708e662e.rev +0 -0
  215. package/lib/bmad-cache/bmb/_git_preserved/refs/remotes/origin/HEAD +0 -1
  216. package/lib/bmad-cache/bmb/_git_preserved/refs/tags/v1.7.0 +0 -1
  217. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/scripts/generate-convert-report.py +0 -406
  218. package/lib/bmad-cache/bmb/skills/bmad-workflow-builder/scripts/tests/test_generate_convert_report.py +0 -243
  219. package/lib/bmad-cache/cis/_git_preserved/objects/pack/pack-42ffc048f54e58ce94c6331bc6be97ebbb7936f2.idx +0 -0
  220. package/lib/bmad-cache/cis/_git_preserved/objects/pack/pack-42ffc048f54e58ce94c6331bc6be97ebbb7936f2.rev +0 -0
  221. package/lib/bmad-cache/cis/_git_preserved/refs/remotes/origin/HEAD +0 -1
  222. package/lib/bmad-cache/gds/_git_preserved/objects/pack/pack-9427a146a90c00bb542cba038874bf9671ba4dc0.idx +0 -0
  223. package/lib/bmad-cache/gds/_git_preserved/objects/pack/pack-9427a146a90c00bb542cba038874bf9671ba4dc0.rev +0 -0
  224. package/lib/bmad-cache/gds/_git_preserved/refs/remotes/origin/HEAD +0 -1
  225. package/lib/bmad-cache/gds/src/workflows/2-design/gds-create-gdd/customize.toml +0 -41
  226. package/lib/bmad-cache/gds/src/workflows/2-design/gds-create-prd/customize.toml +0 -41
  227. package/lib/bmad-cache/gds/src/workflows/2-design/gds-create-prd/data/domain-complexity.csv +0 -15
  228. package/lib/bmad-cache/gds/src/workflows/2-design/gds-create-prd/data/project-types.csv +0 -11
  229. package/lib/bmad-cache/gds/src/workflows/2-design/gds-create-ux-design/customize.toml +0 -41
  230. package/lib/bmad-cache/gds/src/workflows/2-design/gds-edit-gdd/customize.toml +0 -41
  231. package/lib/bmad-cache/gds/src/workflows/2-design/gds-edit-prd/customize.toml +0 -41
  232. package/lib/bmad-cache/gds/src/workflows/2-design/gds-validate-gdd/customize.toml +0 -41
  233. package/lib/bmad-cache/gds/src/workflows/2-design/gds-validate-prd/customize.toml +0 -41
  234. package/lib/bmad-cache/gds/src/workflows/2-design/gds-validate-prd/data/domain-complexity.csv +0 -15
  235. package/lib/bmad-cache/gds/src/workflows/2-design/gds-validate-prd/data/project-types.csv +0 -11
  236. package/lib/bmad-cache/tea/_git_preserved/objects/pack/pack-f0df537f2649464ff6c5aee241165eb9c8664227.idx +0 -0
  237. package/lib/bmad-cache/tea/_git_preserved/objects/pack/pack-f0df537f2649464ff6c5aee241165eb9c8664227.rev +0 -0
  238. package/lib/bmad-cache/tea/_git_preserved/refs/remotes/origin/HEAD +0 -1
  239. package/lib/bmad-cache/wds/_git_preserved/objects/pack/pack-96877c1c09123cccb1f91c1412184b11d2b492ad.idx +0 -0
  240. package/lib/bmad-cache/wds/_git_preserved/objects/pack/pack-96877c1c09123cccb1f91c1412184b11d2b492ad.rev +0 -0
  241. package/lib/bmad-cache/wds/_git_preserved/refs/remotes/origin/HEAD +0 -1
  242. package/lib/bmad-cache/wds/src/agents/wds-agent-freya-ux/bmad-skill-manifest.yaml +0 -12
  243. package/lib/bmad-cache/wds/src/agents/wds-agent-saga-analyst/bmad-skill-manifest.yaml +0 -12
  244. package/lib/bmad-cache/wds/src/workflows/wds-0-alignment-signoff/bmad-skill-manifest.yaml +0 -1
  245. package/lib/bmad-cache/wds/src/workflows/wds-0-project-setup/bmad-skill-manifest.yaml +0 -1
  246. package/lib/bmad-cache/wds/src/workflows/wds-1-project-brief/bmad-skill-manifest.yaml +0 -1
  247. package/lib/bmad-cache/wds/src/workflows/wds-2-trigger-mapping/bmad-skill-manifest.yaml +0 -1
  248. package/lib/bmad-cache/wds/src/workflows/wds-3-scenarios/bmad-skill-manifest.yaml +0 -1
  249. package/lib/bmad-cache/wds/src/workflows/wds-4-ux-design/bmad-skill-manifest.yaml +0 -1
  250. package/lib/bmad-cache/wds/src/workflows/wds-5-agentic-development/bmad-skill-manifest.yaml +0 -1
  251. package/lib/bmad-cache/wds/src/workflows/wds-6-asset-generation/bmad-skill-manifest.yaml +0 -1
  252. package/lib/bmad-cache/wds/src/workflows/wds-7-design-system/bmad-skill-manifest.yaml +0 -1
  253. package/lib/bmad-cache/wds/src/workflows/wds-8-product-evolution/bmad-skill-manifest.yaml +0 -1
  254. /package/lib/bmad-cache/gds/src/workflows/2-design/{gds-create-gdd → gds-gdd/assets}/game-types.csv +0 -0
  255. /package/lib/bmad-cache/gds/src/workflows/2-design/{gds-validate-gdd/data → gds-gdd/assets}/genre-complexity.csv +0 -0
@@ -0,0 +1,492 @@
1
+ #!/usr/bin/env python3
2
+ # /// script
3
+ # requires-python = ">=3.9"
4
+ # ///
5
+ """Run a skill's artifact evals in isolated workspaces.
6
+
7
+ For each eval, the runner:
8
+ 1. Stages a fresh workspace (Docker container or local tmp dir under ~/bmad-evals).
9
+ 2. Applies the setup overlay (base then per-eval) so _bmad/ config and dependency
10
+ skills land in the workspace BEFORE the skill is staged — the skill's own copy
11
+ always wins over overlay content.
12
+ 3. Copies the skill into .claude/skills/ so it is discoverable by claude.
13
+ 4. Stages any fixture files declared in the eval's `files` list.
14
+ 5. Runs `claude -p '<prompt>' --output-format stream-json --verbose`, capturing
15
+ the transcript. The Skill tool is available in -p mode and fires for installed
16
+ skills, so dependency skills provided by the setup overlay are properly invokable.
17
+ 6. Rsyncs any files claude wrote into `<run-dir>/<eval-id>/artifacts/`.
18
+ 7. Writes `metrics.json` (tool-call counts, timing, output sizes).
19
+
20
+ Grading is performed separately by the parent skill's grader subagents.
21
+
22
+ Usage:
23
+ python3 run_evals.py \\
24
+ --skill-path PATH \\
25
+ --evals-file PATH/evals.json \\
26
+ --project-root PATH \\
27
+ --output-dir PATH \\
28
+ --isolation docker|local \\
29
+ [--workers N] [--timeout SECS] [--eval-ids A1,B3] [--quiet]
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import argparse
35
+ import json
36
+ import os
37
+ import shutil
38
+ import subprocess
39
+ import sys
40
+ import time
41
+ from concurrent.futures import ThreadPoolExecutor, as_completed
42
+ from pathlib import Path
43
+
44
+ SCRIPT_DIR = Path(__file__).resolve().parent
45
+ sys.path.insert(0, str(SCRIPT_DIR))
46
+
47
+ from utils import ( # noqa: E402
48
+ apply_setup_overlay,
49
+ discover_setup_dirs,
50
+ new_run_id,
51
+ parse_skill_md,
52
+ read_json,
53
+ read_macos_keychain_credentials,
54
+ stage_credentials,
55
+ utc_now_iso,
56
+ write_json,
57
+ )
58
+
59
+ DOCKER_IMAGE = "bmad-eval-runner:latest"
60
+ _KEYCHAIN_CREDS: str | None = read_macos_keychain_credentials()
61
+ RSYNC_EXCLUDES = (
62
+ ".git", ".bare", "node_modules", ".venv", "__pycache__",
63
+ ".pytest_cache", ".next", "dist", "build", ".cache",
64
+ ".DS_Store", "*.pyc",
65
+ )
66
+
67
+
68
+ def stage_workspace_local(
69
+ workspace: Path,
70
+ project_root: Path,
71
+ skill_path: Path,
72
+ fixtures: list[tuple[Path, str]],
73
+ setup_dirs: list[Path] | None = None,
74
+ ) -> Path:
75
+ """Build a clean local workspace. Returns the project root inside workspace."""
76
+ workspace.mkdir(parents=True, exist_ok=True)
77
+ project_dest = workspace / "project"
78
+ home_dir = workspace / ".home"
79
+ (home_dir / ".claude").mkdir(parents=True, exist_ok=True)
80
+
81
+ excludes: list[str] = []
82
+ for pat in RSYNC_EXCLUDES:
83
+ excludes.extend(["--exclude", pat])
84
+
85
+ if shutil.which("rsync"):
86
+ subprocess.run(
87
+ ["rsync", "-a", *excludes, f"{project_root}/", f"{project_dest}/"],
88
+ check=True,
89
+ )
90
+ else:
91
+ shutil.copytree(project_root, project_dest, dirs_exist_ok=True,
92
+ ignore=shutil.ignore_patterns(*RSYNC_EXCLUDES))
93
+
94
+ # Apply setup overlay before staging the skill — the skill's own copy wins.
95
+ if setup_dirs:
96
+ apply_setup_overlay(setup_dirs, project_dest)
97
+
98
+ skill_link_dir = project_dest / ".claude" / "skills"
99
+ skill_link_dir.mkdir(parents=True, exist_ok=True)
100
+ skill_dest = skill_link_dir / skill_path.name
101
+ if not skill_dest.exists():
102
+ try:
103
+ os.symlink(skill_path, skill_dest)
104
+ except OSError:
105
+ shutil.copytree(skill_path, skill_dest, dirs_exist_ok=True)
106
+
107
+ for src, dest_rel in fixtures:
108
+ dest = project_dest / dest_rel
109
+ dest.parent.mkdir(parents=True, exist_ok=True)
110
+ shutil.copy2(src, dest)
111
+
112
+ return project_dest
113
+
114
+
115
+ def run_eval_local(
116
+ eval_item: dict,
117
+ run_dir: Path,
118
+ skill_path: Path,
119
+ project_root: Path,
120
+ timeout: int,
121
+ setup_dirs: list[Path] | None = None,
122
+ ) -> dict:
123
+ eval_id = str(eval_item.get("id", "unnamed"))
124
+ eval_dir = run_dir / eval_id
125
+ workspace_root = eval_dir / "workspace"
126
+ artifacts_dir = eval_dir / "artifacts"
127
+ transcript_path = eval_dir / "transcript.jsonl"
128
+
129
+ eval_dir.mkdir(parents=True, exist_ok=True)
130
+ artifacts_dir.mkdir(parents=True, exist_ok=True)
131
+
132
+ fixtures = resolve_fixtures(eval_item.get("files", []), project_root)
133
+ workspace_project = stage_workspace_local(
134
+ workspace_root, project_root, skill_path, fixtures, setup_dirs
135
+ )
136
+
137
+ (eval_dir / "prompt.txt").write_text(eval_item["prompt"], encoding="utf-8")
138
+ workspace_snapshot_before = snapshot_files(workspace_project)
139
+
140
+ home_dir = workspace_root / ".home"
141
+ stage_credentials(home_dir / ".claude", _KEYCHAIN_CREDS)
142
+ env = {
143
+ "HOME": str(home_dir),
144
+ "CLAUDE_CONFIG_DIR": str(home_dir / ".claude"),
145
+ "PATH": os.environ.get("PATH", ""),
146
+ "ANTHROPIC_API_KEY": os.environ.get("ANTHROPIC_API_KEY", ""),
147
+ }
148
+
149
+ cmd = [
150
+ "claude",
151
+ "-p", eval_item["prompt"],
152
+ "--output-format", "stream-json",
153
+ "--verbose",
154
+ "--dangerously-skip-permissions",
155
+ ]
156
+
157
+ start = time.time()
158
+ try:
159
+ with transcript_path.open("wb") as out:
160
+ proc = subprocess.run(
161
+ cmd,
162
+ stdout=out,
163
+ stderr=subprocess.PIPE,
164
+ cwd=str(workspace_project),
165
+ env=env,
166
+ timeout=timeout,
167
+ )
168
+ elapsed = time.time() - start
169
+ return_code = proc.returncode
170
+ stderr_tail = (proc.stderr or b"").decode("utf-8", errors="replace")[-2000:]
171
+ except subprocess.TimeoutExpired as e:
172
+ elapsed = time.time() - start
173
+ return_code = -1
174
+ stderr_tail = f"TIMEOUT after {timeout}s"
175
+ if e.stderr:
176
+ stderr_tail += "\n" + e.stderr.decode("utf-8", errors="replace")[-2000:]
177
+
178
+ new_files = diff_workspace(workspace_project, workspace_snapshot_before)
179
+ sync_artifacts(workspace_project, new_files, artifacts_dir)
180
+
181
+ metrics = compute_metrics(transcript_path, artifacts_dir, elapsed, return_code, stderr_tail)
182
+ write_json(eval_dir / "metrics.json", metrics)
183
+
184
+ return {
185
+ "eval_id": eval_id,
186
+ "elapsed_s": elapsed,
187
+ "return_code": return_code,
188
+ "transcript": str(transcript_path.relative_to(run_dir)),
189
+ "artifacts_dir": str(artifacts_dir.relative_to(run_dir)),
190
+ "metrics": metrics,
191
+ }
192
+
193
+
194
+ def run_eval_docker(
195
+ eval_item: dict,
196
+ run_dir: Path,
197
+ skill_path: Path,
198
+ project_root: Path,
199
+ timeout: int,
200
+ setup_dirs: list[Path] | None = None,
201
+ ) -> dict:
202
+ eval_id = str(eval_item.get("id", "unnamed"))
203
+ eval_dir = run_dir / eval_id
204
+ artifacts_dir = eval_dir / "artifacts"
205
+ transcript_path = eval_dir / "transcript.jsonl"
206
+
207
+ eval_dir.mkdir(parents=True, exist_ok=True)
208
+ artifacts_dir.mkdir(parents=True, exist_ok=True)
209
+ fixtures_staging = eval_dir / "fixtures_in"
210
+ fixtures_staging.mkdir(parents=True, exist_ok=True)
211
+
212
+ fixtures = resolve_fixtures(eval_item.get("files", []), project_root)
213
+ for src, dest_rel in fixtures:
214
+ dest = fixtures_staging / dest_rel
215
+ dest.parent.mkdir(parents=True, exist_ok=True)
216
+ shutil.copy2(src, dest)
217
+
218
+ (eval_dir / "prompt.txt").write_text(eval_item["prompt"], encoding="utf-8")
219
+
220
+ # Pre-merge setup overlay dirs on the host; mount as /setup:ro in the container.
221
+ setup_merged: Path | None = None
222
+ if setup_dirs:
223
+ setup_merged = eval_dir / "setup_merged"
224
+ apply_setup_overlay(setup_dirs, setup_merged)
225
+ if not any(setup_merged.iterdir()):
226
+ setup_merged = None
227
+
228
+ creds_dir: Path | None = None
229
+ if _KEYCHAIN_CREDS:
230
+ creds_dir = eval_dir / "creds"
231
+ creds_dir.mkdir(parents=True, exist_ok=True)
232
+ (creds_dir / ".credentials.json").write_text(_KEYCHAIN_CREDS, encoding="utf-8")
233
+
234
+ container_script = r"""
235
+ set -e
236
+ mkdir -p /workspace
237
+ rsync -a \
238
+ --exclude=.git --exclude=.bare --exclude=node_modules --exclude=.venv \
239
+ --exclude=__pycache__ --exclude=.pytest_cache --exclude=.next \
240
+ --exclude=dist --exclude=build --exclude=.cache --exclude=.DS_Store \
241
+ /project/ /workspace/
242
+ if [ -d /setup ]; then
243
+ rsync -a /setup/ /workspace/
244
+ fi
245
+ mkdir -p /workspace/.claude/skills
246
+ cp -R "$SKILL_SRC" "/workspace/.claude/skills/$SKILL_NAME"
247
+ if [ -d /fixtures ]; then
248
+ cp -R /fixtures/. /workspace/
249
+ fi
250
+ if [ -f /creds/.credentials.json ]; then
251
+ mkdir -p /home/evaluator/.claude
252
+ cp /creds/.credentials.json /home/evaluator/.claude/.credentials.json
253
+ fi
254
+ cd /workspace
255
+ claude -p "$EVAL_PROMPT" \
256
+ --output-format stream-json --verbose \
257
+ --dangerously-skip-permissions \
258
+ > /output/transcript.jsonl 2> /output/stderr.log || true
259
+ mkdir -p /output/artifacts
260
+ rsync -a --exclude=.claude --exclude=node_modules --exclude=.git \
261
+ --filter='+ */' --filter='+ *' \
262
+ /workspace/ /output/artifacts/
263
+ """
264
+
265
+ skill_name = skill_path.name
266
+ cmd = [
267
+ "docker", "run", "--rm",
268
+ "-v", f"{project_root}:/project:ro",
269
+ "-v", f"{skill_path}:/skill_src:ro",
270
+ "-v", f"{eval_dir}:/output",
271
+ "-e", "ANTHROPIC_API_KEY",
272
+ "-e", f"EVAL_PROMPT={eval_item['prompt']}",
273
+ "-e", f"SKILL_SRC=/skill_src",
274
+ "-e", f"SKILL_NAME={skill_name}",
275
+ ]
276
+ if creds_dir:
277
+ cmd += ["-v", f"{creds_dir}:/creds:ro"]
278
+ if fixtures:
279
+ cmd += ["-v", f"{fixtures_staging}:/fixtures:ro"]
280
+ if setup_merged:
281
+ cmd += ["-v", f"{setup_merged}:/setup:ro"]
282
+ cmd += [DOCKER_IMAGE, "bash", "-c", container_script]
283
+
284
+ start = time.time()
285
+ try:
286
+ proc = subprocess.run(
287
+ cmd,
288
+ capture_output=True,
289
+ timeout=timeout + 30,
290
+ )
291
+ elapsed = time.time() - start
292
+ return_code = proc.returncode
293
+ stderr_tail = proc.stderr.decode("utf-8", errors="replace")[-2000:]
294
+ if proc.stdout:
295
+ (eval_dir / "docker.stdout.log").write_bytes(proc.stdout)
296
+ except subprocess.TimeoutExpired as e:
297
+ elapsed = time.time() - start
298
+ return_code = -1
299
+ stderr_tail = f"TIMEOUT after {timeout}s"
300
+ if e.stderr:
301
+ stderr_tail += "\n" + e.stderr.decode("utf-8", errors="replace")[-2000:]
302
+
303
+ metrics = compute_metrics(transcript_path, artifacts_dir, elapsed, return_code, stderr_tail)
304
+ write_json(eval_dir / "metrics.json", metrics)
305
+ shutil.rmtree(fixtures_staging, ignore_errors=True)
306
+
307
+ return {
308
+ "eval_id": eval_id,
309
+ "elapsed_s": elapsed,
310
+ "return_code": return_code,
311
+ "transcript": str(transcript_path.relative_to(run_dir)),
312
+ "artifacts_dir": str(artifacts_dir.relative_to(run_dir)),
313
+ "metrics": metrics,
314
+ }
315
+
316
+
317
+ def resolve_fixtures(files: list[str], project_root: Path) -> list[tuple[Path, str]]:
318
+ out: list[tuple[Path, str]] = []
319
+ for entry in files:
320
+ candidate = (project_root / entry).resolve()
321
+ if not candidate.is_file():
322
+ alt = Path(entry).resolve()
323
+ if alt.is_file():
324
+ candidate = alt
325
+ else:
326
+ print(f"Warning: fixture not found: {entry}", file=sys.stderr)
327
+ continue
328
+ out.append((candidate, entry))
329
+ return out
330
+
331
+
332
+ def snapshot_files(root: Path) -> set[str]:
333
+ snap: set[str] = set()
334
+ for p in root.rglob("*"):
335
+ if p.is_file():
336
+ snap.add(str(p.relative_to(root)))
337
+ return snap
338
+
339
+
340
+ def diff_workspace(root: Path, before: set[str]) -> list[str]:
341
+ after = snapshot_files(root)
342
+ return sorted(after - before)
343
+
344
+
345
+ def sync_artifacts(workspace: Path, new_files: list[str], dest: Path) -> None:
346
+ for rel in new_files:
347
+ src = workspace / rel
348
+ if not src.is_file():
349
+ continue
350
+ if any(part in (".claude", "node_modules", ".git", ".venv") for part in src.parts):
351
+ continue
352
+ target = dest / rel
353
+ target.parent.mkdir(parents=True, exist_ok=True)
354
+ shutil.copy2(src, target)
355
+
356
+
357
+ def compute_metrics(transcript: Path, artifacts: Path, elapsed: float,
358
+ rc: int, stderr_tail: str) -> dict:
359
+ tool_calls: dict[str, int] = {}
360
+ total_steps = 0
361
+ if transcript.is_file():
362
+ for raw in transcript.read_text(encoding="utf-8", errors="replace").splitlines():
363
+ raw = raw.strip()
364
+ if not raw:
365
+ continue
366
+ try:
367
+ evt = json.loads(raw)
368
+ except json.JSONDecodeError:
369
+ continue
370
+ if evt.get("type") == "assistant":
371
+ total_steps += 1
372
+ for item in evt.get("message", {}).get("content", []):
373
+ if item.get("type") == "tool_use":
374
+ name = item.get("name", "?")
375
+ tool_calls[name] = tool_calls.get(name, 0) + 1
376
+
377
+ output_chars = 0
378
+ for f in artifacts.rglob("*"):
379
+ if f.is_file():
380
+ try:
381
+ output_chars += f.stat().st_size
382
+ except OSError:
383
+ pass
384
+
385
+ return {
386
+ "elapsed_s": round(elapsed, 2),
387
+ "return_code": rc,
388
+ "tool_calls": tool_calls,
389
+ "total_tool_calls": sum(tool_calls.values()),
390
+ "total_steps": total_steps,
391
+ "output_chars": output_chars,
392
+ "transcript_chars": transcript.stat().st_size if transcript.is_file() else 0,
393
+ "stderr_tail": stderr_tail,
394
+ }
395
+
396
+
397
+ def main() -> int:
398
+ parser = argparse.ArgumentParser(description="Run a skill's artifact evals in isolation")
399
+ parser.add_argument("--skill-path", required=True, type=Path)
400
+ parser.add_argument("--evals-file", required=True, type=Path)
401
+ parser.add_argument("--project-root", required=True, type=Path)
402
+ parser.add_argument("--output-dir", required=True, type=Path)
403
+ parser.add_argument("--isolation", choices=("docker", "local"), required=True)
404
+ parser.add_argument("--workers", type=int, default=8)
405
+ parser.add_argument("--timeout", type=int, default=600)
406
+ parser.add_argument("--eval-ids", default=None, help="Comma-separated subset of eval ids to run")
407
+ parser.add_argument("--quiet", action="store_true")
408
+ args = parser.parse_args()
409
+
410
+ skill_path = args.skill_path.resolve()
411
+ project_root = args.project_root.resolve()
412
+ evals_file = args.evals_file.resolve()
413
+ if not evals_file.is_file():
414
+ print(f"evals file not found: {evals_file}", file=sys.stderr)
415
+ return 2
416
+
417
+ skill_name, _, _ = parse_skill_md(skill_path)
418
+ data = read_json(evals_file)
419
+ evals = data["evals"] if isinstance(data, dict) and "evals" in data else data
420
+
421
+ if args.eval_ids:
422
+ wanted = {x.strip() for x in args.eval_ids.split(",") if x.strip()}
423
+ evals = [e for e in evals if str(e.get("id")) in wanted]
424
+
425
+ run_id = new_run_id(skill_name)
426
+ run_dir = (args.output_dir / run_id).resolve()
427
+ run_dir.mkdir(parents=True, exist_ok=True)
428
+
429
+ write_json(run_dir / "run.json", {
430
+ "run_id": run_id,
431
+ "skill_name": skill_name,
432
+ "skill_path": str(skill_path),
433
+ "project_root": str(project_root),
434
+ "evals_file": str(evals_file),
435
+ "isolation": args.isolation,
436
+ "started_at": utc_now_iso(),
437
+ "eval_count": len(evals),
438
+ })
439
+
440
+ runner = run_eval_docker if args.isolation == "docker" else run_eval_local
441
+
442
+ results: list[dict] = []
443
+ if not args.quiet:
444
+ print(
445
+ f"[run_evals] {len(evals)} evals, isolation={args.isolation}, run_dir={run_dir}",
446
+ file=sys.stderr,
447
+ )
448
+
449
+ with ThreadPoolExecutor(max_workers=args.workers) as pool:
450
+ future_to_eval = {
451
+ pool.submit(
452
+ runner,
453
+ item,
454
+ run_dir,
455
+ skill_path,
456
+ project_root,
457
+ int(item.get("timeout", args.timeout)),
458
+ discover_setup_dirs(evals_file, str(item.get("id", ""))),
459
+ ): item
460
+ for item in evals
461
+ }
462
+ for fut in as_completed(future_to_eval):
463
+ item = future_to_eval[fut]
464
+ try:
465
+ res = fut.result()
466
+ except Exception as e:
467
+ res = {"eval_id": str(item.get("id")), "error": str(e), "return_code": -1}
468
+ results.append(res)
469
+ if not args.quiet:
470
+ rc = res.get("return_code")
471
+ status = "ok" if rc == 0 else f"rc={rc}"
472
+ print(
473
+ f" [{status}] eval {res.get('eval_id')} ({res.get('elapsed_s', 0):.1f}s)",
474
+ file=sys.stderr,
475
+ )
476
+
477
+ summary = {
478
+ "run_id": run_id,
479
+ "completed_at": utc_now_iso(),
480
+ "total": len(evals),
481
+ "executed": len(results),
482
+ "exec_failures": sum(1 for r in results if r.get("return_code") != 0),
483
+ "run_dir": str(run_dir),
484
+ "results": results,
485
+ }
486
+ write_json(run_dir / "execution-summary.json", summary)
487
+ print(json.dumps(summary, indent=2))
488
+ return 0
489
+
490
+
491
+ if __name__ == "__main__":
492
+ sys.exit(main())