@kontourai/flow-agents 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/.githooks/pre-push +11 -0
  2. package/.github/workflows/ci.yml +210 -0
  3. package/.github/workflows/docs-pages.yml +52 -0
  4. package/.github/workflows/publish-npm.yml +104 -0
  5. package/AGENTS.md +26 -0
  6. package/CHANGELOG.md +66 -0
  7. package/CODE_OF_CONDUCT.md +25 -0
  8. package/CONTEXT.md +300 -0
  9. package/CONTRIBUTING.md +44 -0
  10. package/LICENSE +201 -0
  11. package/README.md +129 -0
  12. package/SECURITY.md +33 -0
  13. package/agent-cards/dev.json +19 -0
  14. package/agents/dev.json +127 -0
  15. package/agents/tool-code-reviewer.json +61 -0
  16. package/agents/tool-dependencies-updater.json +118 -0
  17. package/agents/tool-explore-config.json +92 -0
  18. package/agents/tool-explore-deps.json +92 -0
  19. package/agents/tool-explore-entry.json +92 -0
  20. package/agents/tool-explore-patterns.json +92 -0
  21. package/agents/tool-explore-structure.json +92 -0
  22. package/agents/tool-explore-tests.json +92 -0
  23. package/agents/tool-planner.json +57 -0
  24. package/agents/tool-playwright.json +145 -0
  25. package/agents/tool-security-reviewer.json +56 -0
  26. package/agents/tool-verifier.json +61 -0
  27. package/agents/tool-worker.json +58 -0
  28. package/build/src/cli/console-learning-projection.js +123 -0
  29. package/build/src/cli/docs-preview.js +39 -0
  30. package/build/src/cli/effective-backlog-settings.js +102 -0
  31. package/build/src/cli/export-bookmarks.js +38 -0
  32. package/build/src/cli/fixture-retirement-audit.js +140 -0
  33. package/build/src/cli/flow-kit.js +138 -0
  34. package/build/src/cli/import-bookmarks.js +50 -0
  35. package/build/src/cli/init.js +239 -0
  36. package/build/src/cli/instinct-cli.js +93 -0
  37. package/build/src/cli/promote-workflow-artifact.js +63 -0
  38. package/build/src/cli/publish-change-helper.js +154 -0
  39. package/build/src/cli/pull-work-provider.js +469 -0
  40. package/build/src/cli/runtime-adapter.js +23 -0
  41. package/build/src/cli/telemetry-doctor.js +221 -0
  42. package/build/src/cli/usage-feedback.js +443 -0
  43. package/build/src/cli/validate-hook-influence.js +152 -0
  44. package/build/src/cli/validate-source-tree.js +31 -0
  45. package/build/src/cli/validate-workflow-artifacts.js +486 -0
  46. package/build/src/cli/veritas-governance.js +262 -0
  47. package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
  48. package/build/src/cli/workflow-sidecar.js +816 -0
  49. package/build/src/cli.js +89 -0
  50. package/build/src/flow-kit/validate.js +75 -0
  51. package/build/src/lib/args.js +45 -0
  52. package/build/src/lib/fs.js +62 -0
  53. package/build/src/lib/workflow-learning-projection.js +334 -0
  54. package/build/src/runtime-adapters.js +146 -0
  55. package/build/src/tools/build-universal-bundles.js +397 -0
  56. package/build/src/tools/common.js +56 -0
  57. package/build/src/tools/filter-installed-packs.js +132 -0
  58. package/build/src/tools/generate-context-map.js +198 -0
  59. package/build/src/tools/validate-package.js +64 -0
  60. package/build/src/tools/validate-source-tree.js +622 -0
  61. package/console.telemetry.json +176 -0
  62. package/context/base-rules.md +17 -0
  63. package/context/code-review-standards.md +62 -0
  64. package/context/coding-standards.md +42 -0
  65. package/context/common/orchestrators.md +12 -0
  66. package/context/common/subagents.md +28 -0
  67. package/context/contracts/artifact-contract.md +182 -0
  68. package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
  69. package/context/contracts/delivery-contract.md +69 -0
  70. package/context/contracts/execution-contract.md +53 -0
  71. package/context/contracts/governance-adapter-contract.md +67 -0
  72. package/context/contracts/planning-contract.md +85 -0
  73. package/context/contracts/review-contract.md +104 -0
  74. package/context/contracts/sandbox-policy.md +52 -0
  75. package/context/contracts/verification-contract.md +134 -0
  76. package/context/contracts/work-item-contract.md +215 -0
  77. package/context/deferred/demo-mode.md +33 -0
  78. package/context/deferred/languages/go.md +31 -0
  79. package/context/deferred/languages/python.md +31 -0
  80. package/context/deferred/languages/typescript.md +34 -0
  81. package/context/deferred/parallelization.md +35 -0
  82. package/context/deferred/worktree-isolation.md +24 -0
  83. package/context/development-workflow.md +50 -0
  84. package/context/scripts/context-budget/budget-scan.sh +166 -0
  85. package/context/scripts/detect-tools.sh +3 -0
  86. package/context/scripts/discover-agents.sh +28 -0
  87. package/context/scripts/git-status.sh +49 -0
  88. package/context/scripts/hooks/config-protection.js +79 -0
  89. package/context/scripts/hooks/desktop-notify.sh +39 -0
  90. package/context/scripts/hooks/governance-audit.sh +135 -0
  91. package/context/scripts/hooks/lib/audit-transport.sh +40 -0
  92. package/context/scripts/hooks/lib/hook-flags.js +49 -0
  93. package/context/scripts/hooks/lib/patterns.sh +57 -0
  94. package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
  95. package/context/scripts/hooks/post-edit-accumulator.js +66 -0
  96. package/context/scripts/hooks/pre-commit-quality.js +194 -0
  97. package/context/scripts/hooks/quality-gate.js +93 -0
  98. package/context/scripts/hooks/report-only-guard.js +21 -0
  99. package/context/scripts/hooks/run-hook.js +136 -0
  100. package/context/scripts/hooks/stop-format-typecheck.js +141 -0
  101. package/context/scripts/hooks/stop-goal-fit.js +337 -0
  102. package/context/scripts/hooks/workflow-steering.js +250 -0
  103. package/context/scripts/telemetry/console-presets.sh +14 -0
  104. package/context/scripts/telemetry/install-console-config.sh +214 -0
  105. package/context/scripts/telemetry/lib/config.sh +85 -0
  106. package/context/scripts/telemetry/lib/enrich.sh +115 -0
  107. package/context/scripts/telemetry/lib/redact.sh +22 -0
  108. package/context/scripts/telemetry/lib/session.sh +63 -0
  109. package/context/scripts/telemetry/lib/transport.sh +183 -0
  110. package/context/scripts/telemetry/lib/usage.sh +29 -0
  111. package/context/scripts/telemetry/sync-agents.sh +173 -0
  112. package/context/scripts/telemetry/telemetry.conf +23 -0
  113. package/context/scripts/telemetry/telemetry.sh +387 -0
  114. package/context/scripts/validate-package.sh +89 -0
  115. package/context/settings/backlog-provider-settings.json +54 -0
  116. package/context/templates/core/identity.md +26 -0
  117. package/context/templates/core/user.md +15 -0
  118. package/docs/_config.yml +15 -0
  119. package/docs/_layouts/default.html +87 -0
  120. package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
  121. package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
  122. package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
  123. package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
  124. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
  125. package/docs/adr/0006-typescript-first-source-policy.md +98 -0
  126. package/docs/agent-system-guidebook.md +391 -0
  127. package/docs/agent-usage-feedback-loop.md +351 -0
  128. package/docs/assets/favicon.svg +13 -0
  129. package/docs/assets/og-image.png +0 -0
  130. package/docs/assets/site.css +774 -0
  131. package/docs/assets/site.js +139 -0
  132. package/docs/configurable-workflow-routing.md +174 -0
  133. package/docs/context-map.md +145 -0
  134. package/docs/developer-architecture.md +145 -0
  135. package/docs/developer-hook-setup.md +61 -0
  136. package/docs/fixture-ownership.md +44 -0
  137. package/docs/flow-kit-repository-contract.md +180 -0
  138. package/docs/index.md +129 -0
  139. package/docs/kontour-resource-contract.md +358 -0
  140. package/docs/migrations.md +64 -0
  141. package/docs/north-star.md +322 -0
  142. package/docs/operating-layers.md +110 -0
  143. package/docs/repository-structure.md +132 -0
  144. package/docs/sandbox-policy.md +56 -0
  145. package/docs/skills-map.md +203 -0
  146. package/docs/standards-register.md +96 -0
  147. package/docs/veritas-integration.md +165 -0
  148. package/docs/work-item-adapters.md +72 -0
  149. package/docs/workflow-artifact-lifecycle.md +141 -0
  150. package/docs/workflow-eval-strategy.md +295 -0
  151. package/docs/workflow-shared-contracts.md +51 -0
  152. package/docs/workflow-usage-guide.md +443 -0
  153. package/evals/ARCHITECTURE.md +143 -0
  154. package/evals/CONVENTIONS.md +58 -0
  155. package/evals/README.md +128 -0
  156. package/evals/acceptance/run.sh +29 -0
  157. package/evals/acceptance/test_claude_harness.sh +242 -0
  158. package/evals/acceptance/test_codex_harness.sh +108 -0
  159. package/evals/acceptance/test_kiro_harness.sh +128 -0
  160. package/evals/cases/dev/404.html +97 -0
  161. package/evals/cases/dev/code-review.yaml +44 -0
  162. package/evals/cases/dev/dashboard.html +300 -0
  163. package/evals/cases/dev/deliver.yaml +66 -0
  164. package/evals/cases/dev/dependency-update.yaml +16 -0
  165. package/evals/cases/dev/explore.yaml +20 -0
  166. package/evals/cases/dev/index.html +370 -0
  167. package/evals/cases/dev/package-lock.json +28 -0
  168. package/evals/cases/dev/package.json +16 -0
  169. package/evals/cases/dev/plan-work.yaml +20 -0
  170. package/evals/cases/dev/promptfooconfig.yaml +666 -0
  171. package/evals/cases/dev/search-first.yaml +20 -0
  172. package/evals/cases/dev/tdd-workflow.yaml +48 -0
  173. package/evals/cases/dev/verify-work.yaml +44 -0
  174. package/evals/cases/dev/workflow.yaml +34 -0
  175. package/evals/ci/run-baseline.sh +283 -0
  176. package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
  177. package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
  178. package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
  179. package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
  180. package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
  181. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
  182. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
  183. package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
  184. package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
  185. package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
  186. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
  187. package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
  188. package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
  189. package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
  190. package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
  191. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
  192. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
  193. package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
  194. package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
  195. package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
  196. package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
  197. package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
  198. package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
  199. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
  200. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
  201. package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
  202. package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
  203. package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
  204. package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
  205. package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
  206. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
  207. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
  208. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
  209. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
  210. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
  211. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
  212. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
  213. package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
  214. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
  215. package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
  216. package/evals/fixtures/hook-influence/cases.json +336 -0
  217. package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
  218. package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
  219. package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
  220. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
  221. package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
  222. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
  223. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
  224. package/evals/fixtures/surface-trust/provider-absent.json +19 -0
  225. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
  226. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
  227. package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
  228. package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
  229. package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
  230. package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
  231. package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
  232. package/evals/integration/test_bundle_install.sh +541 -0
  233. package/evals/integration/test_console_learning_projection.sh +192 -0
  234. package/evals/integration/test_context_map.sh +65 -0
  235. package/evals/integration/test_effective_backlog_settings.sh +58 -0
  236. package/evals/integration/test_fixture_retirement_audit.sh +58 -0
  237. package/evals/integration/test_flow_agents_statusline.sh +93 -0
  238. package/evals/integration/test_flow_kit_repository.sh +90 -0
  239. package/evals/integration/test_goal_fit_hook.sh +482 -0
  240. package/evals/integration/test_hook_category_behaviors.sh +190 -0
  241. package/evals/integration/test_hook_influence_cases.sh +69 -0
  242. package/evals/integration/test_local_flow_kit_install.sh +145 -0
  243. package/evals/integration/test_publish_change_helper.sh +176 -0
  244. package/evals/integration/test_pull_work_provider.sh +140 -0
  245. package/evals/integration/test_runtime_adapter_activation.sh +106 -0
  246. package/evals/integration/test_telemetry.sh +485 -0
  247. package/evals/integration/test_telemetry_doctor.sh +193 -0
  248. package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
  249. package/evals/integration/test_usage_feedback_global.sh +117 -0
  250. package/evals/integration/test_usage_feedback_import.sh +227 -0
  251. package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
  252. package/evals/integration/test_usage_feedback_report.sh +263 -0
  253. package/evals/integration/test_veritas_governance_adapter.sh +235 -0
  254. package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
  255. package/evals/integration/test_workflow_artifacts.sh +1247 -0
  256. package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
  257. package/evals/integration/test_workflow_steering_hook.sh +337 -0
  258. package/evals/lib/assertions/delegated-to.js +40 -0
  259. package/evals/lib/assertions/max-tool-calls.js +15 -0
  260. package/evals/lib/assertions/no-write-tools.js +27 -0
  261. package/evals/lib/assertions/pass-at-k.js +39 -0
  262. package/evals/lib/assertions/telemetry-utils.js +105 -0
  263. package/evals/lib/assertions/tool-called.js +39 -0
  264. package/evals/lib/assertions/verify-after-fix.js +61 -0
  265. package/evals/lib/claude-judge.sh +40 -0
  266. package/evals/lib/claude-provider.sh +74 -0
  267. package/evals/lib/codex-judge.sh +39 -0
  268. package/evals/lib/codex-provider.sh +81 -0
  269. package/evals/lib/eval-dev.sh +5 -0
  270. package/evals/lib/eval-judge.sh +22 -0
  271. package/evals/lib/eval-provider.sh +26 -0
  272. package/evals/lib/eval-report.sh +73 -0
  273. package/evals/lib/kiro-dev.sh +4 -0
  274. package/evals/lib/kiro-judge.sh +17 -0
  275. package/evals/lib/kiro-provider.sh +62 -0
  276. package/evals/lib/node.sh +111 -0
  277. package/evals/promptfooconfig.yaml +70 -0
  278. package/evals/run.sh +309 -0
  279. package/evals/static/test_evidence_refs.sh +141 -0
  280. package/evals/static/test_package.sh +407 -0
  281. package/evals/static/test_repo_hooks.sh +68 -0
  282. package/evals/static/test_universal_bundles.sh +274 -0
  283. package/evals/static/test_workflow_skills.sh +1207 -0
  284. package/install.sh +64 -0
  285. package/integrations/veritas/flow-agents.adapter.json +138 -0
  286. package/integrations/veritas/flow-agents.authority-settings.json +26 -0
  287. package/integrations/veritas/flow-agents.repo-standards.json +82 -0
  288. package/kits/builder/flows/build.flow.json +218 -0
  289. package/kits/builder/flows/shape.flow.json +127 -0
  290. package/kits/builder/kit.json +19 -0
  291. package/kits/catalog.json +11 -0
  292. package/package.json +130 -0
  293. package/packaging/README.md +60 -0
  294. package/packaging/manifest.json +173 -0
  295. package/packaging/packs.json +69 -0
  296. package/powers/dependency-checker/POWER.md +20 -0
  297. package/powers/dependency-checker/mcp.json +20 -0
  298. package/powers/playwright/POWER.md +25 -0
  299. package/powers/playwright/mcp.json +12 -0
  300. package/prompts/code-audit.md +123 -0
  301. package/prompts/kcommit.md +88 -0
  302. package/schemas/backlog-provider-settings.schema.json +138 -0
  303. package/schemas/workflow-acceptance.schema.json +216 -0
  304. package/schemas/workflow-critique.schema.json +113 -0
  305. package/schemas/workflow-evidence.schema.json +357 -0
  306. package/schemas/workflow-handoff.schema.json +52 -0
  307. package/schemas/workflow-learning.schema.json +223 -0
  308. package/schemas/workflow-release.schema.json +172 -0
  309. package/schemas/workflow-state.schema.json +80 -0
  310. package/scripts/README.md +111 -0
  311. package/scripts/build-universal-bundles.js +3 -0
  312. package/scripts/check-content-boundary.cjs +99 -0
  313. package/scripts/context-budget/budget-scan.sh +166 -0
  314. package/scripts/detect-tools.sh +3 -0
  315. package/scripts/discover-agents.sh +28 -0
  316. package/scripts/effective-backlog-settings.js +2 -0
  317. package/scripts/filter-installed-packs.js +2 -0
  318. package/scripts/flow-kit.js +2 -0
  319. package/scripts/generate-context-map.js +2 -0
  320. package/scripts/git-status.sh +49 -0
  321. package/scripts/hooks/claude-hook-adapter.js +174 -0
  322. package/scripts/hooks/claude-telemetry-hook.js +115 -0
  323. package/scripts/hooks/codex-hook-adapter.js +176 -0
  324. package/scripts/hooks/codex-telemetry-hook.js +95 -0
  325. package/scripts/hooks/config-protection.js +79 -0
  326. package/scripts/hooks/desktop-notify.sh +39 -0
  327. package/scripts/hooks/governance-audit.sh +135 -0
  328. package/scripts/hooks/lib/audit-transport.sh +40 -0
  329. package/scripts/hooks/lib/hook-flags.js +49 -0
  330. package/scripts/hooks/lib/patterns.sh +57 -0
  331. package/scripts/hooks/lib/resolve-formatter.js +80 -0
  332. package/scripts/hooks/post-edit-accumulator.js +66 -0
  333. package/scripts/hooks/pre-commit-quality.js +194 -0
  334. package/scripts/hooks/quality-gate.js +93 -0
  335. package/scripts/hooks/report-only-guard.js +21 -0
  336. package/scripts/hooks/run-hook.js +136 -0
  337. package/scripts/hooks/stop-format-typecheck.js +141 -0
  338. package/scripts/hooks/stop-goal-fit.js +337 -0
  339. package/scripts/hooks/workflow-steering.js +250 -0
  340. package/scripts/install-codex-home.sh +106 -0
  341. package/scripts/package.json +3 -0
  342. package/scripts/promote-workflow-artifact.js +2 -0
  343. package/scripts/publish-change-helper.js +2 -0
  344. package/scripts/pull-work-provider.js +2 -0
  345. package/scripts/setup-repo-hooks.sh +8 -0
  346. package/scripts/statusline/flow-agents-statusline.js +157 -0
  347. package/scripts/telemetry/console-presets.sh +14 -0
  348. package/scripts/telemetry/install-console-config.sh +214 -0
  349. package/scripts/telemetry/lib/config.sh +85 -0
  350. package/scripts/telemetry/lib/enrich.sh +115 -0
  351. package/scripts/telemetry/lib/redact.sh +22 -0
  352. package/scripts/telemetry/lib/session.sh +63 -0
  353. package/scripts/telemetry/lib/transport.sh +183 -0
  354. package/scripts/telemetry/lib/usage.sh +29 -0
  355. package/scripts/telemetry/sync-agents.sh +173 -0
  356. package/scripts/telemetry/telemetry.conf +23 -0
  357. package/scripts/telemetry/telemetry.sh +387 -0
  358. package/scripts/usage-feedback.js +2 -0
  359. package/scripts/validate-hook-influence-cases.js +2 -0
  360. package/scripts/validate-package.sh +89 -0
  361. package/scripts/validate-source-tree.js +9 -0
  362. package/skills/agentic-engineering/SKILL.md +62 -0
  363. package/skills/browser-test/SKILL.md +51 -0
  364. package/skills/builder-shape/SKILL.md +76 -0
  365. package/skills/context-budget/SKILL.md +40 -0
  366. package/skills/deliver/SKILL.md +241 -0
  367. package/skills/dependency-update/SKILL.md +68 -0
  368. package/skills/design-probe/SKILL.md +107 -0
  369. package/skills/eval-rebuild/SKILL.md +39 -0
  370. package/skills/evidence-gate/SKILL.md +186 -0
  371. package/skills/execute-plan/SKILL.md +110 -0
  372. package/skills/explore/SKILL.md +137 -0
  373. package/skills/feedback-loop/SKILL.md +87 -0
  374. package/skills/fix-bug/SKILL.md +133 -0
  375. package/skills/frontend-design/SKILL.md +80 -0
  376. package/skills/github-cli/SKILL.md +63 -0
  377. package/skills/idea-to-backlog/SKILL.md +267 -0
  378. package/skills/knowledge-capture/SKILL.md +55 -0
  379. package/skills/learning-review/SKILL.md +115 -0
  380. package/skills/pickup-probe/SKILL.md +114 -0
  381. package/skills/plan-work/SKILL.md +176 -0
  382. package/skills/pull-work/SKILL.md +309 -0
  383. package/skills/release-readiness/SKILL.md +121 -0
  384. package/skills/review-work/SKILL.md +161 -0
  385. package/skills/search-first/SKILL.md +66 -0
  386. package/skills/tdd-workflow/SKILL.md +140 -0
  387. package/skills/verify-work/SKILL.md +109 -0
  388. package/src/cli/console-learning-projection.ts +140 -0
  389. package/src/cli/effective-backlog-settings.ts +99 -0
  390. package/src/cli/fixture-retirement-audit.ts +154 -0
  391. package/src/cli/flow-kit.ts +139 -0
  392. package/src/cli/init.ts +248 -0
  393. package/src/cli/promote-workflow-artifact.ts +64 -0
  394. package/src/cli/publish-change-helper.ts +143 -0
  395. package/src/cli/pull-work-provider.ts +481 -0
  396. package/src/cli/runtime-adapter.ts +24 -0
  397. package/src/cli/telemetry-doctor.ts +243 -0
  398. package/src/cli/usage-feedback.ts +418 -0
  399. package/src/cli/validate-hook-influence.ts +119 -0
  400. package/src/cli/validate-source-tree.ts +30 -0
  401. package/src/cli/validate-workflow-artifacts.ts +411 -0
  402. package/src/cli/veritas-governance.ts +322 -0
  403. package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
  404. package/src/cli/workflow-sidecar.ts +676 -0
  405. package/src/cli.ts +95 -0
  406. package/src/flow-kit/validate.ts +74 -0
  407. package/src/lib/args.ts +43 -0
  408. package/src/lib/fs.ts +62 -0
  409. package/src/lib/workflow-learning-projection.ts +491 -0
  410. package/src/runtime-adapters.ts +154 -0
  411. package/src/tools/build-universal-bundles.ts +366 -0
  412. package/src/tools/common.ts +61 -0
  413. package/src/tools/filter-installed-packs.ts +129 -0
  414. package/src/tools/generate-context-map.ts +199 -0
  415. package/src/tools/validate-package.ts +57 -0
  416. package/src/tools/validate-source-tree.ts +488 -0
  417. package/tsconfig.json +19 -0
  418. package/veritas.claims.json +6 -0
@@ -0,0 +1,1247 @@
1
+ #!/usr/bin/env bash
2
+ # test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests
3
+ set -uo pipefail
4
+
5
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT/evals/lib/node.sh"
7
+
8
+ TMPDIR_EVAL="$(mktemp -d)"
9
+ errors=0
10
+
11
+ cleanup() {
12
+ rm -rf "$TMPDIR_EVAL"
13
+ }
14
+ trap cleanup EXIT
15
+
16
+ _pass() { echo " ✓ $1"; }
17
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
18
+
19
+ VALIDATOR="validate-workflow-artifacts"
20
+ REPO="$TMPDIR_EVAL/repo"
21
+ ARTIFACT_DIR="$REPO/.flow-agents/workflow-contract-e2e"
22
+ mkdir -p "$ARTIFACT_DIR"
23
+
24
+ cat > "$REPO/AGENTS.md" <<'MARKDOWN'
25
+ # Test Repo
26
+ MARKDOWN
27
+
28
+ cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver-plan.md" <<'MARKDOWN'
29
+ ---
30
+ role: plan
31
+ parent: workflow-contract-e2e--deliver
32
+ created: 2026-05-06T00:00:00Z
33
+ ---
34
+
35
+ ## Plan
36
+
37
+ Add deterministic artifact validation and wire it into integration evals.
38
+
39
+ ## Definition Of Done
40
+
41
+ - **User outcome:** Maintainers can run one local command and know whether delivery artifacts still satisfy the shared contracts.
42
+ - **Scope:** Validator, integration smoke test, eval runner wiring, and docs.
43
+ - **Acceptance criteria:**
44
+ - [ ] Valid artifact chains pass - Evidence: validator returns exit 0.
45
+ - [ ] Missing Goal Fit fails - Evidence: validator returns non-zero and names Goal Fit.
46
+ - [ ] Green-build-only artifacts fail - Evidence: validator reports acceptance evidence is missing.
47
+ - [ ] Hidden NOT_VERIFIED fails - Evidence: validator reports explicit acceptance is required.
48
+ - **Usefulness checks:**
49
+ - [ ] User-facing workflow is documented or discoverable
50
+ - [ ] Local and global/project scope are separated when relevant
51
+ - [ ] Dashboard/UI changes have visual evidence when relevant
52
+ - [ ] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
53
+ - **Stop-short risks:** Static checks could pass while generated artifacts hide missing evidence.
54
+ - **Durable docs target:** docs/workflow-eval-strategy.md
55
+ - **Sandbox mode:** local-edit
56
+
57
+ ### Wave 1 (parallel)
58
+
59
+ #### Task: Artifact validator
60
+ - **Files:** src/cli/validate-workflow-artifacts.ts
61
+ - **Changes:** Validate plan, delivery, and review artifact contracts.
62
+ - **Acceptance:** Good fixtures pass and bad fixtures fail with actionable messages.
63
+ - **Context:** Shared contracts in context/contracts/.
64
+ MARKDOWN
65
+
66
+ cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver-review.md" <<'MARKDOWN'
67
+ ---
68
+ role: review
69
+ parent: workflow-contract-e2e--deliver
70
+ created: 2026-05-06T00:00:00Z
71
+ verdict: PASS
72
+ ---
73
+
74
+ ## Verification Report
75
+
76
+ Build: [PASS] flow_agents_node validate-workflow-artifacts fixture, exit 0
77
+ Types: [SKIP] no type checker configured for shell fixtures
78
+ Lint: [SKIP] no linter configured for shell fixtures
79
+ Tests: [PASS] bash evals/integration/test_workflow_artifacts.sh, exit 0
80
+ Security: [SKIP] no production code path touched
81
+ Diff: [PASS] validator and integration fixtures reviewed
82
+
83
+ ### Acceptance Criteria
84
+ - [PASS] Valid artifact chains pass - Evidence: validator returned exit 0.
85
+ - [PASS] Missing Goal Fit fails - Evidence: validator returned non-zero and named Goal Fit.
86
+ - [PASS] Green-build-only artifacts fail - Evidence: validator reported missing acceptance evidence.
87
+ - [PASS] Hidden NOT_VERIFIED fails - Evidence: validator required explicit acceptance or routing.
88
+
89
+ ### Goal Fit
90
+ - [PASS] User outcome - Evidence: one local integration command covers the artifact chain.
91
+ - [PASS] User-facing workflow - Evidence: docs name the command.
92
+ - [PASS] Durable docs target - Evidence: docs/workflow-eval-strategy.md.
93
+ - [PASS] Stop-short risks - Evidence: negative fixtures cover green-only and hidden NOT_VERIFIED.
94
+
95
+ ### Verdict: PASS
96
+ Shared workflow artifacts satisfy the contract.
97
+ MARKDOWN
98
+
99
+ cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" <<'MARKDOWN'
100
+ # Build workflow contract E2E tests
101
+
102
+ branch: main
103
+ worktree: main
104
+ created: 2026-05-06T00:00:00Z
105
+ status: delivered
106
+ type: deliver
107
+ iteration: 1
108
+
109
+ ## Plan
110
+
111
+ See workflow-contract-e2e--deliver-plan.md.
112
+
113
+ ## Definition Of Done
114
+
115
+ - **User outcome:** Maintainers can run one local command and know whether delivery artifacts still satisfy the shared contracts.
116
+ - **Scope:** Validator, integration smoke test, eval runner wiring, and docs.
117
+ - **Acceptance criteria:**
118
+ - [x] Valid artifact chains pass - Evidence: validator returns exit 0.
119
+ - [x] Missing Goal Fit fails - Evidence: validator returns non-zero and names Goal Fit.
120
+ - [x] Green-build-only artifacts fail - Evidence: validator reports acceptance evidence is missing.
121
+ - [x] Hidden NOT_VERIFIED fails - Evidence: validator reports explicit acceptance is required.
122
+ - **Usefulness checks:**
123
+ - [x] User-facing workflow is documented or discoverable
124
+ - [x] Local and global/project scope are separated when relevant
125
+ - [x] Dashboard/UI changes have visual evidence when relevant
126
+ - [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
127
+ - **Stop-short risks:** Static checks could pass while generated artifacts hide missing evidence.
128
+ - **Durable docs target:** docs/workflow-eval-strategy.md
129
+ - **Sandbox mode:** local-edit
130
+
131
+ ## Execution Progress
132
+
133
+ ### Wave 1 (completed)
134
+ - [x] Artifact validator - done
135
+ - [x] Integration fixtures - done
136
+
137
+ ## Verification Report
138
+
139
+ Build: [PASS] flow_agents_node validate-workflow-artifacts fixture, exit 0
140
+ Types: [SKIP] no type checker configured for shell fixtures
141
+ Lint: [SKIP] no linter configured for shell fixtures
142
+ Tests: [PASS] bash evals/integration/test_workflow_artifacts.sh, exit 0
143
+ Security: [SKIP] no production code path touched
144
+ Diff: [PASS] validator and integration fixtures reviewed
145
+
146
+ ### Acceptance Criteria
147
+ - [PASS] Valid artifact chains pass - Evidence: validator returned exit 0.
148
+ - [PASS] Missing Goal Fit fails - Evidence: validator returned non-zero and named Goal Fit.
149
+ - [PASS] Green-build-only artifacts fail - Evidence: validator reported missing acceptance evidence.
150
+ - [PASS] Hidden NOT_VERIFIED fails - Evidence: validator required explicit acceptance or routing.
151
+
152
+ ### Goal Fit
153
+ - [PASS] User outcome - Evidence: one local integration command covers the artifact chain.
154
+ - [PASS] User-facing workflow - Evidence: docs name the command.
155
+ - [PASS] Durable docs target - Evidence: docs/workflow-eval-strategy.md.
156
+ - [PASS] Stop-short risks - Evidence: negative fixtures cover green-only and hidden NOT_VERIFIED.
157
+
158
+ ### Verdict: PASS
159
+ Shared workflow artifacts satisfy the contract.
160
+
161
+ ## Goal Fit Gate
162
+
163
+ - [x] Original user goal restated
164
+ - [x] Every acceptance criterion has evidence
165
+ - [x] User-facing workflow was exercised or documented
166
+ - [x] Local/project and global scope are handled when relevant
167
+ - [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
168
+ - [x] Dashboard/UI changes have visual evidence when relevant
169
+ - [x] Durable docs target is updated, scheduled for final acceptance, or marked not needed with reason
170
+
171
+ ## Final Acceptance
172
+
173
+ - [x] CI/relevant checks passed
174
+ - [x] Merge/release decision recorded
175
+ - [x] Working artifacts archived or linked
176
+ - [x] Long-lived docs updated with why and how the feature was built
177
+ - [x] Follow-up issues or learning-review items created for deferred work
178
+ MARKDOWN
179
+
180
+ cat > "$ARTIFACT_DIR/state.json" <<'JSON'
181
+ {
182
+ "schema_version": "1.0",
183
+ "task_slug": "workflow-contract-e2e",
184
+ "status": "delivered",
185
+ "phase": "done",
186
+ "created_at": "2026-05-06T00:00:00Z",
187
+ "updated_at": "2026-05-06T00:00:00Z",
188
+ "artifact_paths": [
189
+ "workflow-contract-e2e--deliver.md",
190
+ "workflow-contract-e2e--deliver-plan.md",
191
+ "workflow-contract-e2e--deliver-review.md"
192
+ ],
193
+ "next_action": {
194
+ "status": "done",
195
+ "summary": "Workflow artifact contract fixtures pass validation."
196
+ }
197
+ }
198
+ JSON
199
+
200
+ cat > "$ARTIFACT_DIR/acceptance.json" <<'JSON'
201
+ {
202
+ "schema_version": "1.0",
203
+ "task_slug": "workflow-contract-e2e",
204
+ "source_request": "Build workflow contract E2E tests.",
205
+ "criteria": [
206
+ {
207
+ "id": "valid-chain-passes",
208
+ "description": "Valid artifact chains pass.",
209
+ "status": "pass",
210
+ "evidence_refs": [
211
+ {
212
+ "kind": "artifact",
213
+ "file": "workflow-contract-e2e--deliver-review.md",
214
+ "summary": "Verification artifact for the valid chain."
215
+ }
216
+ ]
217
+ },
218
+ {
219
+ "id": "missing-goal-fit-fails",
220
+ "description": "Missing Goal Fit fails.",
221
+ "status": "pass",
222
+ "evidence_refs": [
223
+ {
224
+ "kind": "source",
225
+ "file": "evals/integration/test_workflow_artifacts.sh",
226
+ "line_start": 1,
227
+ "line_end": 1,
228
+ "excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
229
+ }
230
+ ]
231
+ },
232
+ {
233
+ "id": "green-build-only-fails",
234
+ "description": "Green-build-only artifacts fail.",
235
+ "status": "pass",
236
+ "evidence_refs": [
237
+ {
238
+ "kind": "source",
239
+ "file": "evals/integration/test_workflow_artifacts.sh",
240
+ "line_start": 1,
241
+ "line_end": 1,
242
+ "excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
243
+ }
244
+ ]
245
+ },
246
+ {
247
+ "id": "hidden-not-verified-fails",
248
+ "description": "Hidden NOT_VERIFIED fails.",
249
+ "status": "pass",
250
+ "evidence_refs": [
251
+ {
252
+ "kind": "source",
253
+ "file": "evals/integration/test_workflow_artifacts.sh",
254
+ "line_start": 1,
255
+ "line_end": 1,
256
+ "excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
257
+ }
258
+ ]
259
+ }
260
+ ],
261
+ "goal_fit": {
262
+ "status": "pass",
263
+ "summary": "Maintainers can run one local command and validate workflow artifacts."
264
+ }
265
+ }
266
+ JSON
267
+
268
+ cat > "$ARTIFACT_DIR/evidence.json" <<'JSON'
269
+ {
270
+ "schema_version": "1.0",
271
+ "task_slug": "workflow-contract-e2e",
272
+ "verdict": "pass",
273
+ "checks": [
274
+ {
275
+ "id": "workflow-artifact-validator",
276
+ "kind": "test",
277
+ "status": "pass",
278
+ "command": "flow_agents_node validate-workflow-artifacts fixture",
279
+ "summary": "Valid Markdown artifacts and sidecars pass.",
280
+ "artifact_refs": [
281
+ {
282
+ "kind": "artifact",
283
+ "file": "workflow-contract-e2e--deliver.md",
284
+ "summary": "Delivery artifact validated by the workflow artifact validator."
285
+ }
286
+ ],
287
+ "standard_refs": [
288
+ {
289
+ "standard": "junit",
290
+ "ref": "reports/workflow-artifact-validator.xml",
291
+ "role": "mapping",
292
+ "summary": "JUnit-style test evidence can be linked without flattening it."
293
+ }
294
+ ]
295
+ }
296
+ ],
297
+ "external_evidence": [
298
+ {
299
+ "system": "veritas",
300
+ "ref": {
301
+ "kind": "external",
302
+ "url": "veritas://proof-lanes/workflow-contract-e2e",
303
+ "summary": "Optional Veritas proof-lane reference."
304
+ },
305
+ "summary": "Optional Veritas proof-lane reference.",
306
+ "standard": "veritas"
307
+ }
308
+ ],
309
+ "not_verified_gaps": []
310
+ }
311
+ JSON
312
+
313
+ cat > "$ARTIFACT_DIR/handoff.json" <<'JSON'
314
+ {
315
+ "schema_version": "1.0",
316
+ "task_slug": "workflow-contract-e2e",
317
+ "summary": "Workflow artifact validation is complete.",
318
+ "current_state_ref": "state.json",
319
+ "next_steps": [
320
+ "Keep sidecar schemas aligned with the Markdown workflow contracts."
321
+ ],
322
+ "blockers": [],
323
+ "warnings": []
324
+ }
325
+ JSON
326
+
327
+ cat > "$ARTIFACT_DIR/critique.json" <<'JSON'
328
+ {
329
+ "schema_version": "1.0",
330
+ "task_slug": "workflow-contract-e2e",
331
+ "status": "pass",
332
+ "required": true,
333
+ "updated_at": "2026-05-06T00:00:00Z",
334
+ "critiques": [
335
+ {
336
+ "id": "workflow-contract-review",
337
+ "reviewer": "tool-code-reviewer",
338
+ "reviewed_at": "2026-05-06T00:00:00Z",
339
+ "verdict": "pass",
340
+ "summary": "No blocking findings in the workflow artifact fixture.",
341
+ "artifact_refs": ["workflow-contract-e2e--deliver.md"],
342
+ "findings": []
343
+ }
344
+ ]
345
+ }
346
+ JSON
347
+
348
+ cat > "$ARTIFACT_DIR/learning.json" <<'JSON'
349
+ {
350
+ "schema_version": "1.0",
351
+ "task_slug": "workflow-contract-e2e",
352
+ "status": "learned",
353
+ "updated_at": "2026-05-06T00:00:00Z",
354
+ "records": [
355
+ {
356
+ "id": "workflow-contract-fixture",
357
+ "recorded_at": "2026-05-06T00:00:00Z",
358
+ "source_refs": ["workflow-contract-e2e--deliver.md", "evidence.json"],
359
+ "outcome": "success",
360
+ "facts": ["The workflow artifact validator accepted the complete fixture chain."],
361
+ "interpretation": "A passing learning record can route completed workflow evidence into durable system improvements.",
362
+ "routing": [
363
+ {
364
+ "target": "eval",
365
+ "action": "Keep workflow artifact fixture coverage in integration tests.",
366
+ "status": "completed",
367
+ "ref": "evals/integration/test_workflow_artifacts.sh"
368
+ }
369
+ ],
370
+ "correction": {
371
+ "needed": false,
372
+ "evidence": "The complete fixture chain matched intended workflow artifact behavior."
373
+ }
374
+ }
375
+ ]
376
+ }
377
+ JSON
378
+
379
+ cat > "$ARTIFACT_DIR/release.json" <<'JSON'
380
+ {
381
+ "schema_version": "1.0",
382
+ "task_slug": "workflow-contract-e2e",
383
+ "decision": "merge",
384
+ "updated_at": "2026-05-06T00:00:00Z",
385
+ "scope": "Workflow artifact validator fixtures and sidecar schemas.",
386
+ "evidence_ref": "evidence.json",
387
+ "gates": [
388
+ {
389
+ "name": "merge",
390
+ "status": "pass",
391
+ "summary": "Local static and integration checks passed.",
392
+ "evidence_refs": ["evidence.json"]
393
+ },
394
+ {
395
+ "name": "docs",
396
+ "status": "pass",
397
+ "summary": "Workflow docs are updated.",
398
+ "evidence_refs": ["docs/workflow-eval-strategy.md"]
399
+ }
400
+ ],
401
+ "rollback_plan": {
402
+ "status": "not_required",
403
+ "summary": "No deployment occurs for this fixture.",
404
+ "owner": "maintainer"
405
+ },
406
+ "observability_plan": {
407
+ "status": "not_required",
408
+ "summary": "No runtime surface changes."
409
+ },
410
+ "post_deploy_checks": [],
411
+ "docs": {
412
+ "status": "updated",
413
+ "summary": "Fixture coverage documents release readiness sidecar validation.",
414
+ "refs": ["evals/integration/test_workflow_artifacts.sh"]
415
+ }
416
+ }
417
+ JSON
418
+
419
+ if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$ARTIFACT_DIR" >"$TMPDIR_EVAL/valid.out" 2>"$TMPDIR_EVAL/valid.err"; then
420
+ _pass "valid plan/review/delivery artifact chain and sidecars pass"
421
+ else
422
+ _fail "valid artifact chain failed: $(cat "$TMPDIR_EVAL/valid.out" "$TMPDIR_EVAL/valid.err")"
423
+ fi
424
+
425
+ BAD="$TMPDIR_EVAL/bad"
426
+ mkdir -p "$BAD"
427
+
428
+ cat > "$BAD/missing-goal-fit--deliver.md" <<'MARKDOWN'
429
+ # Missing Goal Fit
430
+
431
+ status: delivered
432
+ type: deliver
433
+
434
+ ## Plan
435
+ Plan exists.
436
+
437
+ ## Definition Of Done
438
+ - **User outcome:** User can inspect the result.
439
+ - **Acceptance criteria:**
440
+ - [x] It works - Evidence: test output
441
+ - **Stop-short risks:** Goal Fit could be missing.
442
+ - **Durable docs target:** docs/test.md
443
+
444
+ ## Verification Report
445
+ Build: [PASS] test
446
+
447
+ ### Acceptance Criteria
448
+ - [PASS] It works - Evidence: test output.
449
+
450
+ ### Verdict: PASS
451
+
452
+ ## Final Acceptance
453
+ - [x] CI/relevant checks passed
454
+ MARKDOWN
455
+
456
+ if flow_agents_node "$VALIDATOR" "$BAD/missing-goal-fit--deliver.md" >"$TMPDIR_EVAL/missing.out" 2>&1; then
457
+ _fail "missing Goal Fit artifact should fail"
458
+ elif rg -q 'Goal Fit' "$TMPDIR_EVAL/missing.out"; then
459
+ _pass "missing Goal Fit artifact fails with actionable message"
460
+ else
461
+ _fail "missing Goal Fit failure did not mention Goal Fit"
462
+ fi
463
+
464
+ cat > "$BAD/missing-sandbox--deliver-plan.md" <<'MARKDOWN'
465
+ ---
466
+ role: plan
467
+ created: 2026-05-06T00:00:00Z
468
+ ---
469
+
470
+ ## Plan
471
+ Plan exists.
472
+
473
+ ## Definition Of Done
474
+ - **User outcome:** User can inspect the result.
475
+ - **Acceptance criteria:**
476
+ - [ ] It works - Evidence: test output
477
+ - **Stop-short risks:** Execution boundary could be ambiguous.
478
+ - **Durable docs target:** docs/test.md
479
+
480
+ ### Wave 1 (parallel)
481
+
482
+ #### Task: Test
483
+ - **Files:** test.txt
484
+ - **Changes:** Test fixture.
485
+ - **Acceptance:** Validator reports missing sandbox mode.
486
+ MARKDOWN
487
+
488
+ if flow_agents_node "$VALIDATOR" "$BAD/missing-sandbox--deliver-plan.md" >"$TMPDIR_EVAL/missing-sandbox.out" 2>&1; then
489
+ _fail "missing Sandbox mode artifact should fail"
490
+ elif rg -q 'Sandbox mode' "$TMPDIR_EVAL/missing-sandbox.out"; then
491
+ _pass "missing Sandbox mode artifact fails with actionable message"
492
+ else
493
+ _fail "missing Sandbox mode failure did not mention Sandbox mode"
494
+ fi
495
+
496
+ cat > "$BAD/invalid-sandbox--deliver-plan.md" <<'MARKDOWN'
497
+ ---
498
+ role: plan
499
+ created: 2026-05-06T00:00:00Z
500
+ ---
501
+
502
+ ## Plan
503
+ Plan exists.
504
+
505
+ ## Definition Of Done
506
+ - **User outcome:** User can inspect the result.
507
+ - **Acceptance criteria:**
508
+ - [ ] It works - Evidence: test output
509
+ - **Stop-short risks:** Execution boundary could be ambiguous.
510
+ - **Durable docs target:** docs/test.md
511
+ - **Sandbox mode:** global-admin
512
+
513
+ ### Wave 1 (parallel)
514
+
515
+ #### Task: Test
516
+ - **Files:** test.txt
517
+ - **Changes:** Test fixture.
518
+ - **Acceptance:** Validator reports invalid sandbox mode.
519
+ MARKDOWN
520
+
521
+ if flow_agents_node "$VALIDATOR" "$BAD/invalid-sandbox--deliver-plan.md" >"$TMPDIR_EVAL/invalid-sandbox.out" 2>&1; then
522
+ _fail "invalid Sandbox mode artifact should fail"
523
+ elif rg -q 'invalid Sandbox mode' "$TMPDIR_EVAL/invalid-sandbox.out"; then
524
+ _pass "invalid Sandbox mode artifact fails with actionable message"
525
+ else
526
+ _fail "invalid Sandbox mode failure did not mention invalid Sandbox mode"
527
+ fi
528
+
529
+ cat > "$BAD/green-only--deliver.md" <<'MARKDOWN'
530
+ # Green Build Only
531
+
532
+ status: delivered
533
+ type: deliver
534
+
535
+ ## Plan
536
+ Plan exists.
537
+
538
+ ## Definition Of Done
539
+ - **User outcome:** User can act on the dashboard.
540
+ - **Acceptance criteria:**
541
+ - [x] Build passes - Evidence: build output
542
+ - **Stop-short risks:** Build could pass while the dashboard is not useful.
543
+ - **Durable docs target:** docs/test.md
544
+
545
+ ## Verification Report
546
+ Build: PASS
547
+ Verdict: PASS
548
+
549
+ ## Goal Fit Gate
550
+ - [x] Original user goal restated
551
+
552
+ ## Final Acceptance
553
+ - [x] CI/relevant checks passed
554
+ MARKDOWN
555
+
556
+ if flow_agents_node "$VALIDATOR" "$BAD/green-only--deliver.md" >"$TMPDIR_EVAL/green.out" 2>&1; then
557
+ _fail "green-build-only artifact should fail"
558
+ elif rg -q 'green build is not enough' "$TMPDIR_EVAL/green.out"; then
559
+ _pass "green-build-only artifact fails usefulness gate"
560
+ else
561
+ _fail "green-build-only failure did not mention usefulness gate"
562
+ fi
563
+
564
+ cat > "$BAD/hidden-not-verified--deliver.md" <<'MARKDOWN'
565
+ # Hidden NOT_VERIFIED
566
+
567
+ status: delivered
568
+ type: deliver
569
+
570
+ ## Plan
571
+ Plan exists.
572
+
573
+ ## Definition Of Done
574
+ - **User outcome:** User can rely on verification.
575
+ - **Acceptance criteria:**
576
+ - [x] Browser checked - Evidence: attempted screenshot
577
+ - **Stop-short risks:** Browser check might be unavailable.
578
+ - **Durable docs target:** docs/test.md
579
+
580
+ ## Verification Report
581
+ Build: [PASS] test
582
+
583
+ ### Acceptance Criteria
584
+ - [NOT_VERIFIED] Browser checked - browser was unavailable.
585
+
586
+ ### Verdict: PASS
587
+
588
+ ## Goal Fit Gate
589
+ - [x] Original user goal restated
590
+ - [x] Every acceptance criterion has evidence
591
+
592
+ ## Final Acceptance
593
+ - [x] CI/relevant checks passed
594
+ MARKDOWN
595
+
596
+ if flow_agents_node "$VALIDATOR" "$BAD/hidden-not-verified--deliver.md" >"$TMPDIR_EVAL/notverified.out" 2>&1; then
597
+ _fail "hidden NOT_VERIFIED artifact should fail"
598
+ elif rg -q 'NOT_VERIFIED' "$TMPDIR_EVAL/notverified.out"; then
599
+ _pass "hidden NOT_VERIFIED artifact requires explicit decision"
600
+ else
601
+ _fail "hidden NOT_VERIFIED failure did not mention NOT_VERIFIED"
602
+ fi
603
+
604
+ mkdir -p "$BAD/bad-sidecar"
605
+ cat > "$BAD/bad-sidecar/evidence.json" <<'JSON'
606
+ {
607
+ "schema_version": "1.0",
608
+ "task_slug": "bad-sidecar",
609
+ "verdict": "maybe",
610
+ "checks": []
611
+ }
612
+ JSON
613
+
614
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-sidecar" >"$TMPDIR_EVAL/bad-sidecar.out" 2>&1; then
615
+ _fail "bad sidecar should fail"
616
+ elif rg -q 'verdict must be one of: pass, partial, fail, not_verified' "$TMPDIR_EVAL/bad-sidecar.out"; then
617
+ _pass "bad sidecar fails with actionable message"
618
+ else
619
+ _fail "bad sidecar failure did not mention verdict"
620
+ fi
621
+
622
+ mkdir -p "$BAD/contradictory-evidence"
623
+ cat > "$BAD/contradictory-evidence/evidence.json" <<'JSON'
624
+ {
625
+ "schema_version": "1.0",
626
+ "task_slug": "contradictory-evidence",
627
+ "verdict": "pass",
628
+ "checks": [
629
+ {
630
+ "id": "failing-check",
631
+ "kind": "test",
632
+ "status": "fail",
633
+ "summary": "A failing check cannot produce a pass verdict."
634
+ }
635
+ ]
636
+ }
637
+ JSON
638
+
639
+ if flow_agents_node "$VALIDATOR" "$BAD/contradictory-evidence" >"$TMPDIR_EVAL/contradictory-evidence.out" 2>&1; then
640
+ _fail "contradictory evidence sidecar should fail"
641
+ elif rg -q 'pass verdict requires all non-skipped checks to pass' "$TMPDIR_EVAL/contradictory-evidence.out"; then
642
+ _pass "contradictory evidence sidecar fails with actionable message"
643
+ else
644
+ _fail "contradictory evidence failure did not mention pass verdict"
645
+ fi
646
+
647
+ mkdir -p "$BAD/empty-evidence"
648
+ cat > "$BAD/empty-evidence/evidence.json" <<'JSON'
649
+ {
650
+ "schema_version": "1.0",
651
+ "task_slug": "empty-evidence",
652
+ "verdict": "pass",
653
+ "checks": []
654
+ }
655
+ JSON
656
+
657
+ if flow_agents_node "$VALIDATOR" "$BAD/empty-evidence" >"$TMPDIR_EVAL/empty-evidence.out" 2>&1; then
658
+ _fail "pass evidence with no checks should fail"
659
+ elif rg -q 'checks must contain at least 1 item' "$TMPDIR_EVAL/empty-evidence.out"; then
660
+ _pass "pass evidence with no checks fails with actionable message"
661
+ else
662
+ _fail "empty evidence failure did not mention checks"
663
+ fi
664
+
665
+ mkdir -p "$BAD/bad-standard-ref"
666
+ cat > "$BAD/bad-standard-ref/evidence.json" <<'JSON'
667
+ {
668
+ "schema_version": "1.0",
669
+ "task_slug": "bad-standard-ref",
670
+ "verdict": "pass",
671
+ "checks": [
672
+ {
673
+ "id": "unknown-standard",
674
+ "kind": "policy",
675
+ "status": "pass",
676
+ "summary": "Unknown standards should not pass validation.",
677
+ "standard_refs": [
678
+ {
679
+ "standard": "spreadsheet",
680
+ "ref": "proof.xlsx"
681
+ }
682
+ ]
683
+ }
684
+ ]
685
+ }
686
+ JSON
687
+
688
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-standard-ref" >"$TMPDIR_EVAL/bad-standard-ref.out" 2>&1; then
689
+ _fail "unknown evidence standard should fail"
690
+ elif rg -q 'standard must be one of' "$TMPDIR_EVAL/bad-standard-ref.out"; then
691
+ _pass "evidence sidecar rejects unknown standard refs"
692
+ else
693
+ _fail "bad standard ref failure did not mention standard"
694
+ fi
695
+
696
+ mkdir -p "$BAD/legacy-string-ref"
697
+ cat > "$BAD/legacy-string-ref/acceptance.json" <<'JSON'
698
+ {
699
+ "schema_version": "1.0",
700
+ "task_slug": "legacy-string-ref",
701
+ "source_request": "Legacy refs are rejected.",
702
+ "criteria": [
703
+ {
704
+ "id": "legacy-string-ref",
705
+ "description": "Legacy string evidence refs fail validation.",
706
+ "status": "pass",
707
+ "evidence_refs": ["evidence.json"]
708
+ }
709
+ ],
710
+ "goal_fit": {
711
+ "status": "pass",
712
+ "summary": "Legacy refs are rejected."
713
+ }
714
+ }
715
+ JSON
716
+
717
+ if flow_agents_node "$VALIDATOR" "$BAD/legacy-string-ref" >"$TMPDIR_EVAL/legacy-string-ref.out" 2>&1; then
718
+ _fail "legacy string evidence refs should fail"
719
+ elif rg -q 'evidence_refs\[0\] must be object' "$TMPDIR_EVAL/legacy-string-ref.out"; then
720
+ _pass "custom validator rejects legacy string evidence refs"
721
+ else
722
+ _fail "legacy string ref failure did not mention object refs"
723
+ fi
724
+
725
+ mkdir -p "$BAD/source-missing-required"
726
+ cat > "$BAD/source-missing-required/evidence.json" <<'JSON'
727
+ {
728
+ "schema_version": "1.0",
729
+ "task_slug": "source-missing-required",
730
+ "verdict": "pass",
731
+ "checks": [
732
+ {
733
+ "id": "source-missing-required",
734
+ "kind": "test",
735
+ "status": "pass",
736
+ "summary": "Source refs must include line and excerpt fields.",
737
+ "artifact_refs": [
738
+ {
739
+ "kind": "source",
740
+ "file": "src/index.ts"
741
+ }
742
+ ]
743
+ }
744
+ ]
745
+ }
746
+ JSON
747
+
748
+ if flow_agents_node "$VALIDATOR" "$BAD/source-missing-required" >"$TMPDIR_EVAL/source-missing-required.out" 2>&1; then
749
+ _fail "source ref missing required fields should fail"
750
+ elif rg -q 'line_start is required|line_end is required|excerpt is required' "$TMPDIR_EVAL/source-missing-required.out"; then
751
+ _pass "custom validator rejects source refs missing required fields"
752
+ else
753
+ _fail "source missing required failure did not mention source fields"
754
+ fi
755
+
756
+ mkdir -p "$BAD/empty-non-source-ref"
757
+ cat > "$BAD/empty-non-source-ref/evidence.json" <<'JSON'
758
+ {
759
+ "schema_version": "1.0",
760
+ "task_slug": "empty-non-source-ref",
761
+ "verdict": "pass",
762
+ "checks": [
763
+ {
764
+ "id": "empty-non-source-ref",
765
+ "kind": "test",
766
+ "status": "pass",
767
+ "summary": "Empty non-source refs must fail.",
768
+ "artifact_refs": [
769
+ {
770
+ "kind": "artifact"
771
+ },
772
+ {
773
+ "kind": "command"
774
+ }
775
+ ]
776
+ }
777
+ ],
778
+ "external_evidence": [
779
+ {
780
+ "system": "provider",
781
+ "ref": {
782
+ "kind": "provider"
783
+ },
784
+ "summary": "Provider refs need URLs.",
785
+ "standard": "custom"
786
+ }
787
+ ]
788
+ }
789
+ JSON
790
+
791
+ if flow_agents_node "$VALIDATOR" "$BAD/empty-non-source-ref" >"$TMPDIR_EVAL/empty-non-source-ref.out" 2>&1; then
792
+ _fail "empty non-source refs should fail"
793
+ elif rg -q 'must match at least one allowed schema|url is required' "$TMPDIR_EVAL/empty-non-source-ref.out"; then
794
+ _pass "custom validator rejects empty non-source evidence refs"
795
+ else
796
+ _fail "empty non-source ref failure did not mention required ref detail"
797
+ fi
798
+
799
+ mkdir -p "$BAD/open-critique"
800
+ cat > "$BAD/open-critique/critique.json" <<'JSON'
801
+ {
802
+ "schema_version": "1.0",
803
+ "task_slug": "open-critique",
804
+ "status": "pass",
805
+ "required": true,
806
+ "updated_at": "2026-05-06T00:00:00Z",
807
+ "critiques": [
808
+ {
809
+ "id": "blocking-review",
810
+ "reviewer": "tool-code-reviewer",
811
+ "reviewed_at": "2026-05-06T00:00:00Z",
812
+ "verdict": "fail",
813
+ "summary": "A medium severity finding is still open.",
814
+ "findings": [
815
+ {
816
+ "id": "medium-open",
817
+ "severity": "medium",
818
+ "status": "open",
819
+ "description": "Open findings must be resolved before critique can pass."
820
+ }
821
+ ]
822
+ }
823
+ ]
824
+ }
825
+ JSON
826
+
827
+ if flow_agents_node "$VALIDATOR" "$BAD/open-critique" >"$TMPDIR_EVAL/open-critique.out" 2>&1; then
828
+ _fail "critique pass with open finding should fail"
829
+ elif rg -q 'critique pass cannot have open findings' "$TMPDIR_EVAL/open-critique.out"; then
830
+ _pass "critique sidecar blocks open findings"
831
+ else
832
+ _fail "open critique failure did not mention open findings"
833
+ fi
834
+
835
+ mkdir -p "$BAD/bad-learning"
836
+ cat > "$BAD/bad-learning/learning.json" <<'JSON'
837
+ {
838
+ "schema_version": "1.0",
839
+ "task_slug": "bad-learning",
840
+ "status": "learned",
841
+ "updated_at": "2026-05-06T00:00:00Z",
842
+ "records": [
843
+ {
844
+ "id": "missing-source",
845
+ "recorded_at": "2026-05-06T00:00:00Z",
846
+ "source_refs": [],
847
+ "outcome": "success",
848
+ "facts": ["A learning record without evidence should fail."],
849
+ "interpretation": "Learning must be traceable.",
850
+ "routing": [
851
+ {
852
+ "target": "eval",
853
+ "action": "Reject untraceable learning records.",
854
+ "status": "open"
855
+ }
856
+ ]
857
+ }
858
+ ]
859
+ }
860
+ JSON
861
+
862
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-learning" >"$TMPDIR_EVAL/bad-learning.out" 2>&1; then
863
+ _fail "learning record without source refs should fail"
864
+ elif rg -q 'source_refs must contain at least 1 item' "$TMPDIR_EVAL/bad-learning.out"; then
865
+ _pass "learning sidecar requires traceable source refs"
866
+ else
867
+ _fail "bad learning failure did not mention source refs"
868
+ fi
869
+
870
+ mkdir -p "$BAD/empty-learning"
871
+ cat > "$BAD/empty-learning/learning.json" <<'JSON'
872
+ {
873
+ "schema_version": "1.0",
874
+ "task_slug": "empty-learning",
875
+ "status": "learned",
876
+ "updated_at": "2026-05-06T00:00:00Z",
877
+ "records": []
878
+ }
879
+ JSON
880
+
881
+ if flow_agents_node "$VALIDATOR" "$BAD/empty-learning" >"$TMPDIR_EVAL/empty-learning.out" 2>&1; then
882
+ _fail "learned status with no records should fail"
883
+ elif rg -q 'records must contain at least 1 item' "$TMPDIR_EVAL/empty-learning.out"; then
884
+ _pass "learning sidecar requires at least one record"
885
+ else
886
+ _fail "empty learning failure did not mention records"
887
+ fi
888
+
889
+ mkdir -p "$BAD/learned-missing-correction"
890
+ cat > "$BAD/learned-missing-correction/learning.json" <<'JSON'
891
+ {
892
+ "schema_version": "1.0",
893
+ "task_slug": "learned-missing-correction",
894
+ "status": "learned",
895
+ "updated_at": "2026-05-06T00:00:00Z",
896
+ "records": [
897
+ {
898
+ "id": "missing-correction",
899
+ "recorded_at": "2026-05-06T00:00:00Z",
900
+ "source_refs": ["evidence.json"],
901
+ "outcome": "success",
902
+ "facts": ["Terminal learned records must include a correction decision."],
903
+ "interpretation": "Learned closeout cannot omit correction.needed.",
904
+ "routing": [
905
+ {
906
+ "target": "none",
907
+ "action": "No follow-up.",
908
+ "status": "completed"
909
+ }
910
+ ]
911
+ }
912
+ ]
913
+ }
914
+ JSON
915
+
916
+ if flow_agents_node "$VALIDATOR" "$BAD/learned-missing-correction" >"$TMPDIR_EVAL/learned-missing-correction.out" 2>&1; then
917
+ _fail "learned status without correction should fail"
918
+ elif rg -q 'correction.*needed.*required' "$TMPDIR_EVAL/learned-missing-correction.out"; then
919
+ _pass "learning sidecar requires correction decision for learned status"
920
+ else
921
+ _fail "missing correction failure did not mention correction.needed"
922
+ fi
923
+
924
+ mkdir -p "$BAD/open-learning-routing"
925
+ cat > "$BAD/open-learning-routing/learning.json" <<'JSON'
926
+ {
927
+ "schema_version": "1.0",
928
+ "task_slug": "open-learning-routing",
929
+ "status": "learned",
930
+ "updated_at": "2026-05-06T00:00:00Z",
931
+ "records": [
932
+ {
933
+ "id": "open-routing",
934
+ "recorded_at": "2026-05-06T00:00:00Z",
935
+ "source_refs": ["evidence.json"],
936
+ "outcome": "mixed",
937
+ "facts": ["A follow-up remains open."],
938
+ "interpretation": "Open learning routing should keep the top-level status from being learned.",
939
+ "routing": [
940
+ {
941
+ "target": "backlog",
942
+ "action": "Create a follow-up issue.",
943
+ "status": "open"
944
+ }
945
+ ]
946
+ }
947
+ ]
948
+ }
949
+ JSON
950
+
951
+ if flow_agents_node "$VALIDATOR" "$BAD/open-learning-routing" >"$TMPDIR_EVAL/open-learning-routing.out" 2>&1; then
952
+ _fail "learned status with open routing should fail"
953
+ elif rg -q 'learning status learned cannot have open routing' "$TMPDIR_EVAL/open-learning-routing.out"; then
954
+ _pass "learning sidecar keeps open routing out of learned status"
955
+ else
956
+ _fail "open learning routing failure did not mention status"
957
+ fi
958
+
959
+ mkdir -p "$BAD/bad-release-gate"
960
+ cat > "$BAD/bad-release-gate/release.json" <<'JSON'
961
+ {
962
+ "schema_version": "1.0",
963
+ "task_slug": "bad-release-gate",
964
+ "decision": "merge",
965
+ "updated_at": "2026-05-06T00:00:00Z",
966
+ "scope": "Bad release fixture.",
967
+ "evidence_ref": "evidence.json",
968
+ "gates": [
969
+ {
970
+ "name": "merge",
971
+ "status": "not_verified",
972
+ "summary": "CI was not verified."
973
+ }
974
+ ],
975
+ "rollback_plan": {
976
+ "status": "not_required",
977
+ "summary": "No deploy.",
978
+ "owner": "maintainer"
979
+ },
980
+ "observability_plan": {
981
+ "status": "not_required",
982
+ "summary": "No runtime surface."
983
+ },
984
+ "post_deploy_checks": [],
985
+ "docs": {
986
+ "status": "updated",
987
+ "summary": "Docs are irrelevant for this negative fixture."
988
+ }
989
+ }
990
+ JSON
991
+
992
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-release-gate" >"$TMPDIR_EVAL/bad-release-gate.out" 2>&1; then
993
+ _fail "positive release decision with non-pass gate should fail"
994
+ elif rg -q 'positive release decision requires all required gates to pass' "$TMPDIR_EVAL/bad-release-gate.out"; then
995
+ _pass "release sidecar blocks positive decisions with non-pass gates"
996
+ else
997
+ _fail "bad release gate failure did not mention gate pass"
998
+ fi
999
+
1000
+ mkdir -p "$BAD/bad-deploy-release"
1001
+ cat > "$BAD/bad-deploy-release/release.json" <<'JSON'
1002
+ {
1003
+ "schema_version": "1.0",
1004
+ "task_slug": "bad-deploy-release",
1005
+ "decision": "deploy",
1006
+ "updated_at": "2026-05-06T00:00:00Z",
1007
+ "scope": "Bad deploy fixture.",
1008
+ "evidence_ref": "evidence.json",
1009
+ "gates": [
1010
+ {
1011
+ "name": "deploy",
1012
+ "status": "pass",
1013
+ "summary": "Deploy gate claims pass."
1014
+ }
1015
+ ],
1016
+ "rollback_plan": {
1017
+ "status": "missing",
1018
+ "summary": "Rollback is missing.",
1019
+ "owner": "maintainer"
1020
+ },
1021
+ "observability_plan": {
1022
+ "status": "missing",
1023
+ "summary": "Observability is missing."
1024
+ },
1025
+ "post_deploy_checks": [],
1026
+ "docs": {
1027
+ "status": "updated",
1028
+ "summary": "Docs are irrelevant for this negative fixture."
1029
+ }
1030
+ }
1031
+ JSON
1032
+
1033
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-release" >"$TMPDIR_EVAL/bad-deploy-release.out" 2>&1; then
1034
+ _fail "deploy decision without operational plans should fail"
1035
+ elif rg -q 'deploy decision requires rollback_plan status ready' "$TMPDIR_EVAL/bad-deploy-release.out" && rg -q 'deploy decision requires post_deploy_checks' "$TMPDIR_EVAL/bad-deploy-release.out"; then
1036
+ _pass "release sidecar requires deploy rollback and post-deploy checks"
1037
+ else
1038
+ _fail "bad deploy release failure did not mention operational plans"
1039
+ fi
1040
+
1041
+ mkdir -p "$BAD/bad-deploy-missing-gate"
1042
+ cat > "$BAD/bad-deploy-missing-gate/release.json" <<'JSON'
1043
+ {
1044
+ "schema_version": "1.0",
1045
+ "task_slug": "bad-deploy-missing-gate",
1046
+ "decision": "deploy",
1047
+ "updated_at": "2026-05-06T00:00:00Z",
1048
+ "scope": "Bad deploy missing gate fixture.",
1049
+ "evidence_ref": "evidence.json",
1050
+ "gates": [
1051
+ {
1052
+ "name": "merge",
1053
+ "status": "pass",
1054
+ "summary": "Merge gate passed, but deploy gate is missing."
1055
+ }
1056
+ ],
1057
+ "rollback_plan": {
1058
+ "status": "ready",
1059
+ "summary": "Rollback is ready.",
1060
+ "owner": "maintainer"
1061
+ },
1062
+ "observability_plan": {
1063
+ "status": "ready",
1064
+ "summary": "Observability is ready."
1065
+ },
1066
+ "post_deploy_checks": [
1067
+ {
1068
+ "id": "smoke",
1069
+ "status": "planned",
1070
+ "summary": "Smoke test is planned."
1071
+ }
1072
+ ],
1073
+ "docs": {
1074
+ "status": "updated",
1075
+ "summary": "Docs are irrelevant for this negative fixture."
1076
+ }
1077
+ }
1078
+ JSON
1079
+
1080
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-missing-gate" >"$TMPDIR_EVAL/bad-deploy-missing-gate.out" 2>&1; then
1081
+ _fail "deploy decision without deploy gate should fail"
1082
+ elif rg -q 'positive release decision requires deploy gate to pass' "$TMPDIR_EVAL/bad-deploy-missing-gate.out"; then
1083
+ _pass "release sidecar requires matching gate for positive decisions"
1084
+ else
1085
+ _fail "bad deploy missing gate failure did not mention matching gate"
1086
+ fi
1087
+
1088
+ mkdir -p "$BAD/bad-deploy-check"
1089
+ cat > "$BAD/bad-deploy-check/release.json" <<'JSON'
1090
+ {
1091
+ "schema_version": "1.0",
1092
+ "task_slug": "bad-deploy-check",
1093
+ "decision": "deploy",
1094
+ "updated_at": "2026-05-06T00:00:00Z",
1095
+ "scope": "Bad deploy check fixture.",
1096
+ "evidence_ref": "evidence.json",
1097
+ "gates": [
1098
+ {
1099
+ "name": "deploy",
1100
+ "status": "pass",
1101
+ "summary": "Deploy gate passed."
1102
+ }
1103
+ ],
1104
+ "rollback_plan": {
1105
+ "status": "ready",
1106
+ "summary": "Rollback is ready.",
1107
+ "owner": "maintainer"
1108
+ },
1109
+ "observability_plan": {
1110
+ "status": "ready",
1111
+ "summary": "Observability is ready."
1112
+ },
1113
+ "post_deploy_checks": [
1114
+ {
1115
+ "id": "smoke",
1116
+ "status": "fail",
1117
+ "summary": "Smoke test failed."
1118
+ }
1119
+ ],
1120
+ "docs": {
1121
+ "status": "updated",
1122
+ "summary": "Docs are irrelevant for this negative fixture."
1123
+ }
1124
+ }
1125
+ JSON
1126
+
1127
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-check" >"$TMPDIR_EVAL/bad-deploy-check.out" 2>&1; then
1128
+ _fail "deploy decision with failed post-deploy check should fail"
1129
+ elif rg -q 'deploy decision requires post_deploy_checks to be planned or pass' "$TMPDIR_EVAL/bad-deploy-check.out"; then
1130
+ _pass "release sidecar rejects failed deploy checks"
1131
+ else
1132
+ _fail "bad deploy check failure did not mention post-deploy status"
1133
+ fi
1134
+
1135
+ mkdir -p "$BAD/missing-sidecars"
1136
+ cp "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" "$BAD/missing-sidecars/missing-sidecars--deliver.md"
1137
+
1138
+ if flow_agents_node "$VALIDATOR" --require-sidecars "$BAD/missing-sidecars" >"$TMPDIR_EVAL/missing-sidecars.out" 2>&1; then
1139
+ _fail "missing required sidecars should fail"
1140
+ elif rg -q 'required sidecar is missing' "$TMPDIR_EVAL/missing-sidecars.out"; then
1141
+ _pass "missing required sidecars fail with actionable message"
1142
+ else
1143
+ _fail "missing sidecar failure did not mention required sidecar"
1144
+ fi
1145
+
1146
+ mkdir -p "$BAD/mismatched-sidecars"
1147
+ cat > "$BAD/mismatched-sidecars/state.json" <<'JSON'
1148
+ {
1149
+ "schema_version": "1.0",
1150
+ "task_slug": "left",
1151
+ "status": "planned",
1152
+ "phase": "planning",
1153
+ "updated_at": "2026-05-06T00:00:00Z",
1154
+ "next_action": {
1155
+ "status": "continue",
1156
+ "summary": "Continue."
1157
+ }
1158
+ }
1159
+ JSON
1160
+ cat > "$BAD/mismatched-sidecars/acceptance.json" <<'JSON'
1161
+ {
1162
+ "schema_version": "1.0",
1163
+ "task_slug": "right",
1164
+ "criteria": [
1165
+ {
1166
+ "id": "criterion",
1167
+ "description": "Criterion.",
1168
+ "status": "pending"
1169
+ }
1170
+ ],
1171
+ "goal_fit": {
1172
+ "status": "pending",
1173
+ "summary": "Pending."
1174
+ }
1175
+ }
1176
+ JSON
1177
+
1178
+ if flow_agents_node "$VALIDATOR" "$BAD/mismatched-sidecars" >"$TMPDIR_EVAL/mismatched-sidecars.out" 2>&1; then
1179
+ _fail "mismatched sidecar task slugs should fail"
1180
+ elif rg -q 'sidecar task_slug mismatch' "$TMPDIR_EVAL/mismatched-sidecars.out"; then
1181
+ _pass "mismatched sidecar task slugs fail with actionable message"
1182
+ else
1183
+ _fail "mismatched sidecar failure did not mention task_slug"
1184
+ fi
1185
+
1186
+ mkdir -p "$BAD/bad-date"
1187
+ cat > "$BAD/bad-date/state.json" <<'JSON'
1188
+ {
1189
+ "schema_version": "1.0",
1190
+ "task_slug": "bad-date",
1191
+ "status": "planned",
1192
+ "phase": "planning",
1193
+ "updated_at": "2026-05-06T00:00:00",
1194
+ "next_action": {
1195
+ "status": "continue",
1196
+ "summary": "Continue."
1197
+ }
1198
+ }
1199
+ JSON
1200
+
1201
+ if flow_agents_node "$VALIDATOR" "$BAD/bad-date" >"$TMPDIR_EVAL/bad-date.out" 2>&1; then
1202
+ _fail "date-time without timezone should fail"
1203
+ elif rg -q 'updated_at must be date-time' "$TMPDIR_EVAL/bad-date.out"; then
1204
+ _pass "date-time without timezone fails with actionable message"
1205
+ else
1206
+ _fail "bad date failure did not mention date-time"
1207
+ fi
1208
+
1209
+ mkdir -p "$BAD/extra-criteria"
1210
+ cp "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" "$BAD/extra-criteria/extra-criteria--deliver.md"
1211
+ cp "$ARTIFACT_DIR/state.json" "$BAD/extra-criteria/state.json"
1212
+ cp "$ARTIFACT_DIR/evidence.json" "$BAD/extra-criteria/evidence.json"
1213
+ cp "$ARTIFACT_DIR/handoff.json" "$BAD/extra-criteria/handoff.json"
1214
+ cp "$ARTIFACT_DIR/critique.json" "$BAD/extra-criteria/critique.json"
1215
+ cat > "$BAD/extra-criteria/acceptance.json" <<'JSON'
1216
+ {
1217
+ "schema_version": "1.0",
1218
+ "task_slug": "workflow-contract-e2e",
1219
+ "criteria": [
1220
+ {"id": "a", "description": "A.", "status": "pass"},
1221
+ {"id": "b", "description": "B.", "status": "pass"},
1222
+ {"id": "c", "description": "C.", "status": "pass"},
1223
+ {"id": "d", "description": "D.", "status": "pass"},
1224
+ {"id": "e", "description": "E.", "status": "pass"}
1225
+ ],
1226
+ "goal_fit": {
1227
+ "status": "pass",
1228
+ "summary": "Pass."
1229
+ }
1230
+ }
1231
+ JSON
1232
+
1233
+ if flow_agents_node "$VALIDATOR" --require-sidecars "$BAD/extra-criteria" >"$TMPDIR_EVAL/extra-criteria.out" 2>&1; then
1234
+ _fail "extra acceptance criteria should fail"
1235
+ elif rg -q 'acceptance.json has 5 criteria but Markdown defines 4' "$TMPDIR_EVAL/extra-criteria.out"; then
1236
+ _pass "extra acceptance criteria fail with actionable message"
1237
+ else
1238
+ _fail "extra criteria failure did not mention criteria mismatch"
1239
+ fi
1240
+
1241
+ if [[ "$errors" -eq 0 ]]; then
1242
+ echo "Workflow artifact integration passed."
1243
+ exit 0
1244
+ fi
1245
+
1246
+ echo "Workflow artifact integration failed: $errors issue(s)."
1247
+ exit 1