@kontourai/flow-agents 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/.githooks/pre-push +11 -0
  2. package/.github/workflows/ci.yml +210 -0
  3. package/.github/workflows/docs-pages.yml +52 -0
  4. package/.github/workflows/publish-npm.yml +104 -0
  5. package/AGENTS.md +26 -0
  6. package/CHANGELOG.md +66 -0
  7. package/CODE_OF_CONDUCT.md +25 -0
  8. package/CONTEXT.md +300 -0
  9. package/CONTRIBUTING.md +44 -0
  10. package/LICENSE +201 -0
  11. package/README.md +129 -0
  12. package/SECURITY.md +33 -0
  13. package/agent-cards/dev.json +19 -0
  14. package/agents/dev.json +127 -0
  15. package/agents/tool-code-reviewer.json +61 -0
  16. package/agents/tool-dependencies-updater.json +118 -0
  17. package/agents/tool-explore-config.json +92 -0
  18. package/agents/tool-explore-deps.json +92 -0
  19. package/agents/tool-explore-entry.json +92 -0
  20. package/agents/tool-explore-patterns.json +92 -0
  21. package/agents/tool-explore-structure.json +92 -0
  22. package/agents/tool-explore-tests.json +92 -0
  23. package/agents/tool-planner.json +57 -0
  24. package/agents/tool-playwright.json +145 -0
  25. package/agents/tool-security-reviewer.json +56 -0
  26. package/agents/tool-verifier.json +61 -0
  27. package/agents/tool-worker.json +58 -0
  28. package/build/src/cli/console-learning-projection.js +123 -0
  29. package/build/src/cli/docs-preview.js +39 -0
  30. package/build/src/cli/effective-backlog-settings.js +102 -0
  31. package/build/src/cli/export-bookmarks.js +38 -0
  32. package/build/src/cli/fixture-retirement-audit.js +140 -0
  33. package/build/src/cli/flow-kit.js +138 -0
  34. package/build/src/cli/import-bookmarks.js +50 -0
  35. package/build/src/cli/init.js +239 -0
  36. package/build/src/cli/instinct-cli.js +93 -0
  37. package/build/src/cli/promote-workflow-artifact.js +63 -0
  38. package/build/src/cli/publish-change-helper.js +154 -0
  39. package/build/src/cli/pull-work-provider.js +469 -0
  40. package/build/src/cli/runtime-adapter.js +23 -0
  41. package/build/src/cli/telemetry-doctor.js +221 -0
  42. package/build/src/cli/usage-feedback.js +443 -0
  43. package/build/src/cli/validate-hook-influence.js +152 -0
  44. package/build/src/cli/validate-source-tree.js +31 -0
  45. package/build/src/cli/validate-workflow-artifacts.js +486 -0
  46. package/build/src/cli/veritas-governance.js +262 -0
  47. package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
  48. package/build/src/cli/workflow-sidecar.js +816 -0
  49. package/build/src/cli.js +89 -0
  50. package/build/src/flow-kit/validate.js +75 -0
  51. package/build/src/lib/args.js +45 -0
  52. package/build/src/lib/fs.js +62 -0
  53. package/build/src/lib/workflow-learning-projection.js +334 -0
  54. package/build/src/runtime-adapters.js +146 -0
  55. package/build/src/tools/build-universal-bundles.js +397 -0
  56. package/build/src/tools/common.js +56 -0
  57. package/build/src/tools/filter-installed-packs.js +132 -0
  58. package/build/src/tools/generate-context-map.js +198 -0
  59. package/build/src/tools/validate-package.js +64 -0
  60. package/build/src/tools/validate-source-tree.js +622 -0
  61. package/console.telemetry.json +176 -0
  62. package/context/base-rules.md +17 -0
  63. package/context/code-review-standards.md +62 -0
  64. package/context/coding-standards.md +42 -0
  65. package/context/common/orchestrators.md +12 -0
  66. package/context/common/subagents.md +28 -0
  67. package/context/contracts/artifact-contract.md +182 -0
  68. package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
  69. package/context/contracts/delivery-contract.md +69 -0
  70. package/context/contracts/execution-contract.md +53 -0
  71. package/context/contracts/governance-adapter-contract.md +67 -0
  72. package/context/contracts/planning-contract.md +85 -0
  73. package/context/contracts/review-contract.md +104 -0
  74. package/context/contracts/sandbox-policy.md +52 -0
  75. package/context/contracts/verification-contract.md +134 -0
  76. package/context/contracts/work-item-contract.md +215 -0
  77. package/context/deferred/demo-mode.md +33 -0
  78. package/context/deferred/languages/go.md +31 -0
  79. package/context/deferred/languages/python.md +31 -0
  80. package/context/deferred/languages/typescript.md +34 -0
  81. package/context/deferred/parallelization.md +35 -0
  82. package/context/deferred/worktree-isolation.md +24 -0
  83. package/context/development-workflow.md +50 -0
  84. package/context/scripts/context-budget/budget-scan.sh +166 -0
  85. package/context/scripts/detect-tools.sh +3 -0
  86. package/context/scripts/discover-agents.sh +28 -0
  87. package/context/scripts/git-status.sh +49 -0
  88. package/context/scripts/hooks/config-protection.js +79 -0
  89. package/context/scripts/hooks/desktop-notify.sh +39 -0
  90. package/context/scripts/hooks/governance-audit.sh +135 -0
  91. package/context/scripts/hooks/lib/audit-transport.sh +40 -0
  92. package/context/scripts/hooks/lib/hook-flags.js +49 -0
  93. package/context/scripts/hooks/lib/patterns.sh +57 -0
  94. package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
  95. package/context/scripts/hooks/post-edit-accumulator.js +66 -0
  96. package/context/scripts/hooks/pre-commit-quality.js +194 -0
  97. package/context/scripts/hooks/quality-gate.js +93 -0
  98. package/context/scripts/hooks/report-only-guard.js +21 -0
  99. package/context/scripts/hooks/run-hook.js +136 -0
  100. package/context/scripts/hooks/stop-format-typecheck.js +141 -0
  101. package/context/scripts/hooks/stop-goal-fit.js +337 -0
  102. package/context/scripts/hooks/workflow-steering.js +250 -0
  103. package/context/scripts/telemetry/console-presets.sh +14 -0
  104. package/context/scripts/telemetry/install-console-config.sh +214 -0
  105. package/context/scripts/telemetry/lib/config.sh +85 -0
  106. package/context/scripts/telemetry/lib/enrich.sh +115 -0
  107. package/context/scripts/telemetry/lib/redact.sh +22 -0
  108. package/context/scripts/telemetry/lib/session.sh +63 -0
  109. package/context/scripts/telemetry/lib/transport.sh +183 -0
  110. package/context/scripts/telemetry/lib/usage.sh +29 -0
  111. package/context/scripts/telemetry/sync-agents.sh +173 -0
  112. package/context/scripts/telemetry/telemetry.conf +23 -0
  113. package/context/scripts/telemetry/telemetry.sh +387 -0
  114. package/context/scripts/validate-package.sh +89 -0
  115. package/context/settings/backlog-provider-settings.json +54 -0
  116. package/context/templates/core/identity.md +26 -0
  117. package/context/templates/core/user.md +15 -0
  118. package/docs/_config.yml +15 -0
  119. package/docs/_layouts/default.html +87 -0
  120. package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
  121. package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
  122. package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
  123. package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
  124. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
  125. package/docs/adr/0006-typescript-first-source-policy.md +98 -0
  126. package/docs/agent-system-guidebook.md +391 -0
  127. package/docs/agent-usage-feedback-loop.md +351 -0
  128. package/docs/assets/favicon.svg +13 -0
  129. package/docs/assets/og-image.png +0 -0
  130. package/docs/assets/site.css +774 -0
  131. package/docs/assets/site.js +139 -0
  132. package/docs/configurable-workflow-routing.md +174 -0
  133. package/docs/context-map.md +145 -0
  134. package/docs/developer-architecture.md +145 -0
  135. package/docs/developer-hook-setup.md +61 -0
  136. package/docs/fixture-ownership.md +44 -0
  137. package/docs/flow-kit-repository-contract.md +180 -0
  138. package/docs/index.md +129 -0
  139. package/docs/kontour-resource-contract.md +358 -0
  140. package/docs/migrations.md +64 -0
  141. package/docs/north-star.md +322 -0
  142. package/docs/operating-layers.md +110 -0
  143. package/docs/repository-structure.md +132 -0
  144. package/docs/sandbox-policy.md +56 -0
  145. package/docs/skills-map.md +203 -0
  146. package/docs/standards-register.md +96 -0
  147. package/docs/veritas-integration.md +165 -0
  148. package/docs/work-item-adapters.md +72 -0
  149. package/docs/workflow-artifact-lifecycle.md +141 -0
  150. package/docs/workflow-eval-strategy.md +295 -0
  151. package/docs/workflow-shared-contracts.md +51 -0
  152. package/docs/workflow-usage-guide.md +443 -0
  153. package/evals/ARCHITECTURE.md +143 -0
  154. package/evals/CONVENTIONS.md +58 -0
  155. package/evals/README.md +128 -0
  156. package/evals/acceptance/run.sh +29 -0
  157. package/evals/acceptance/test_claude_harness.sh +242 -0
  158. package/evals/acceptance/test_codex_harness.sh +108 -0
  159. package/evals/acceptance/test_kiro_harness.sh +128 -0
  160. package/evals/cases/dev/404.html +97 -0
  161. package/evals/cases/dev/code-review.yaml +44 -0
  162. package/evals/cases/dev/dashboard.html +300 -0
  163. package/evals/cases/dev/deliver.yaml +66 -0
  164. package/evals/cases/dev/dependency-update.yaml +16 -0
  165. package/evals/cases/dev/explore.yaml +20 -0
  166. package/evals/cases/dev/index.html +370 -0
  167. package/evals/cases/dev/package-lock.json +28 -0
  168. package/evals/cases/dev/package.json +16 -0
  169. package/evals/cases/dev/plan-work.yaml +20 -0
  170. package/evals/cases/dev/promptfooconfig.yaml +666 -0
  171. package/evals/cases/dev/search-first.yaml +20 -0
  172. package/evals/cases/dev/tdd-workflow.yaml +48 -0
  173. package/evals/cases/dev/verify-work.yaml +44 -0
  174. package/evals/cases/dev/workflow.yaml +34 -0
  175. package/evals/ci/run-baseline.sh +283 -0
  176. package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
  177. package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
  178. package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
  179. package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
  180. package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
  181. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
  182. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
  183. package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
  184. package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
  185. package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
  186. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
  187. package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
  188. package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
  189. package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
  190. package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
  191. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
  192. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
  193. package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
  194. package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
  195. package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
  196. package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
  197. package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
  198. package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
  199. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
  200. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
  201. package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
  202. package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
  203. package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
  204. package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
  205. package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
  206. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
  207. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
  208. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
  209. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
  210. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
  211. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
  212. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
  213. package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
  214. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
  215. package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
  216. package/evals/fixtures/hook-influence/cases.json +336 -0
  217. package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
  218. package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
  219. package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
  220. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
  221. package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
  222. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
  223. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
  224. package/evals/fixtures/surface-trust/provider-absent.json +19 -0
  225. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
  226. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
  227. package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
  228. package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
  229. package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
  230. package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
  231. package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
  232. package/evals/integration/test_bundle_install.sh +541 -0
  233. package/evals/integration/test_console_learning_projection.sh +192 -0
  234. package/evals/integration/test_context_map.sh +65 -0
  235. package/evals/integration/test_effective_backlog_settings.sh +58 -0
  236. package/evals/integration/test_fixture_retirement_audit.sh +58 -0
  237. package/evals/integration/test_flow_agents_statusline.sh +93 -0
  238. package/evals/integration/test_flow_kit_repository.sh +90 -0
  239. package/evals/integration/test_goal_fit_hook.sh +482 -0
  240. package/evals/integration/test_hook_category_behaviors.sh +190 -0
  241. package/evals/integration/test_hook_influence_cases.sh +69 -0
  242. package/evals/integration/test_local_flow_kit_install.sh +145 -0
  243. package/evals/integration/test_publish_change_helper.sh +176 -0
  244. package/evals/integration/test_pull_work_provider.sh +140 -0
  245. package/evals/integration/test_runtime_adapter_activation.sh +106 -0
  246. package/evals/integration/test_telemetry.sh +485 -0
  247. package/evals/integration/test_telemetry_doctor.sh +193 -0
  248. package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
  249. package/evals/integration/test_usage_feedback_global.sh +117 -0
  250. package/evals/integration/test_usage_feedback_import.sh +227 -0
  251. package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
  252. package/evals/integration/test_usage_feedback_report.sh +263 -0
  253. package/evals/integration/test_veritas_governance_adapter.sh +235 -0
  254. package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
  255. package/evals/integration/test_workflow_artifacts.sh +1247 -0
  256. package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
  257. package/evals/integration/test_workflow_steering_hook.sh +337 -0
  258. package/evals/lib/assertions/delegated-to.js +40 -0
  259. package/evals/lib/assertions/max-tool-calls.js +15 -0
  260. package/evals/lib/assertions/no-write-tools.js +27 -0
  261. package/evals/lib/assertions/pass-at-k.js +39 -0
  262. package/evals/lib/assertions/telemetry-utils.js +105 -0
  263. package/evals/lib/assertions/tool-called.js +39 -0
  264. package/evals/lib/assertions/verify-after-fix.js +61 -0
  265. package/evals/lib/claude-judge.sh +40 -0
  266. package/evals/lib/claude-provider.sh +74 -0
  267. package/evals/lib/codex-judge.sh +39 -0
  268. package/evals/lib/codex-provider.sh +81 -0
  269. package/evals/lib/eval-dev.sh +5 -0
  270. package/evals/lib/eval-judge.sh +22 -0
  271. package/evals/lib/eval-provider.sh +26 -0
  272. package/evals/lib/eval-report.sh +73 -0
  273. package/evals/lib/kiro-dev.sh +4 -0
  274. package/evals/lib/kiro-judge.sh +17 -0
  275. package/evals/lib/kiro-provider.sh +62 -0
  276. package/evals/lib/node.sh +111 -0
  277. package/evals/promptfooconfig.yaml +70 -0
  278. package/evals/run.sh +309 -0
  279. package/evals/static/test_evidence_refs.sh +141 -0
  280. package/evals/static/test_package.sh +407 -0
  281. package/evals/static/test_repo_hooks.sh +68 -0
  282. package/evals/static/test_universal_bundles.sh +274 -0
  283. package/evals/static/test_workflow_skills.sh +1207 -0
  284. package/install.sh +64 -0
  285. package/integrations/veritas/flow-agents.adapter.json +138 -0
  286. package/integrations/veritas/flow-agents.authority-settings.json +26 -0
  287. package/integrations/veritas/flow-agents.repo-standards.json +82 -0
  288. package/kits/builder/flows/build.flow.json +218 -0
  289. package/kits/builder/flows/shape.flow.json +127 -0
  290. package/kits/builder/kit.json +19 -0
  291. package/kits/catalog.json +11 -0
  292. package/package.json +130 -0
  293. package/packaging/README.md +60 -0
  294. package/packaging/manifest.json +173 -0
  295. package/packaging/packs.json +69 -0
  296. package/powers/dependency-checker/POWER.md +20 -0
  297. package/powers/dependency-checker/mcp.json +20 -0
  298. package/powers/playwright/POWER.md +25 -0
  299. package/powers/playwright/mcp.json +12 -0
  300. package/prompts/code-audit.md +123 -0
  301. package/prompts/kcommit.md +88 -0
  302. package/schemas/backlog-provider-settings.schema.json +138 -0
  303. package/schemas/workflow-acceptance.schema.json +216 -0
  304. package/schemas/workflow-critique.schema.json +113 -0
  305. package/schemas/workflow-evidence.schema.json +357 -0
  306. package/schemas/workflow-handoff.schema.json +52 -0
  307. package/schemas/workflow-learning.schema.json +223 -0
  308. package/schemas/workflow-release.schema.json +172 -0
  309. package/schemas/workflow-state.schema.json +80 -0
  310. package/scripts/README.md +111 -0
  311. package/scripts/build-universal-bundles.js +3 -0
  312. package/scripts/check-content-boundary.cjs +99 -0
  313. package/scripts/context-budget/budget-scan.sh +166 -0
  314. package/scripts/detect-tools.sh +3 -0
  315. package/scripts/discover-agents.sh +28 -0
  316. package/scripts/effective-backlog-settings.js +2 -0
  317. package/scripts/filter-installed-packs.js +2 -0
  318. package/scripts/flow-kit.js +2 -0
  319. package/scripts/generate-context-map.js +2 -0
  320. package/scripts/git-status.sh +49 -0
  321. package/scripts/hooks/claude-hook-adapter.js +174 -0
  322. package/scripts/hooks/claude-telemetry-hook.js +115 -0
  323. package/scripts/hooks/codex-hook-adapter.js +176 -0
  324. package/scripts/hooks/codex-telemetry-hook.js +95 -0
  325. package/scripts/hooks/config-protection.js +79 -0
  326. package/scripts/hooks/desktop-notify.sh +39 -0
  327. package/scripts/hooks/governance-audit.sh +135 -0
  328. package/scripts/hooks/lib/audit-transport.sh +40 -0
  329. package/scripts/hooks/lib/hook-flags.js +49 -0
  330. package/scripts/hooks/lib/patterns.sh +57 -0
  331. package/scripts/hooks/lib/resolve-formatter.js +80 -0
  332. package/scripts/hooks/post-edit-accumulator.js +66 -0
  333. package/scripts/hooks/pre-commit-quality.js +194 -0
  334. package/scripts/hooks/quality-gate.js +93 -0
  335. package/scripts/hooks/report-only-guard.js +21 -0
  336. package/scripts/hooks/run-hook.js +136 -0
  337. package/scripts/hooks/stop-format-typecheck.js +141 -0
  338. package/scripts/hooks/stop-goal-fit.js +337 -0
  339. package/scripts/hooks/workflow-steering.js +250 -0
  340. package/scripts/install-codex-home.sh +106 -0
  341. package/scripts/package.json +3 -0
  342. package/scripts/promote-workflow-artifact.js +2 -0
  343. package/scripts/publish-change-helper.js +2 -0
  344. package/scripts/pull-work-provider.js +2 -0
  345. package/scripts/setup-repo-hooks.sh +8 -0
  346. package/scripts/statusline/flow-agents-statusline.js +157 -0
  347. package/scripts/telemetry/console-presets.sh +14 -0
  348. package/scripts/telemetry/install-console-config.sh +214 -0
  349. package/scripts/telemetry/lib/config.sh +85 -0
  350. package/scripts/telemetry/lib/enrich.sh +115 -0
  351. package/scripts/telemetry/lib/redact.sh +22 -0
  352. package/scripts/telemetry/lib/session.sh +63 -0
  353. package/scripts/telemetry/lib/transport.sh +183 -0
  354. package/scripts/telemetry/lib/usage.sh +29 -0
  355. package/scripts/telemetry/sync-agents.sh +173 -0
  356. package/scripts/telemetry/telemetry.conf +23 -0
  357. package/scripts/telemetry/telemetry.sh +387 -0
  358. package/scripts/usage-feedback.js +2 -0
  359. package/scripts/validate-hook-influence-cases.js +2 -0
  360. package/scripts/validate-package.sh +89 -0
  361. package/scripts/validate-source-tree.js +9 -0
  362. package/skills/agentic-engineering/SKILL.md +62 -0
  363. package/skills/browser-test/SKILL.md +51 -0
  364. package/skills/builder-shape/SKILL.md +76 -0
  365. package/skills/context-budget/SKILL.md +40 -0
  366. package/skills/deliver/SKILL.md +241 -0
  367. package/skills/dependency-update/SKILL.md +68 -0
  368. package/skills/design-probe/SKILL.md +107 -0
  369. package/skills/eval-rebuild/SKILL.md +39 -0
  370. package/skills/evidence-gate/SKILL.md +186 -0
  371. package/skills/execute-plan/SKILL.md +110 -0
  372. package/skills/explore/SKILL.md +137 -0
  373. package/skills/feedback-loop/SKILL.md +87 -0
  374. package/skills/fix-bug/SKILL.md +133 -0
  375. package/skills/frontend-design/SKILL.md +80 -0
  376. package/skills/github-cli/SKILL.md +63 -0
  377. package/skills/idea-to-backlog/SKILL.md +267 -0
  378. package/skills/knowledge-capture/SKILL.md +55 -0
  379. package/skills/learning-review/SKILL.md +115 -0
  380. package/skills/pickup-probe/SKILL.md +114 -0
  381. package/skills/plan-work/SKILL.md +176 -0
  382. package/skills/pull-work/SKILL.md +309 -0
  383. package/skills/release-readiness/SKILL.md +121 -0
  384. package/skills/review-work/SKILL.md +161 -0
  385. package/skills/search-first/SKILL.md +66 -0
  386. package/skills/tdd-workflow/SKILL.md +140 -0
  387. package/skills/verify-work/SKILL.md +109 -0
  388. package/src/cli/console-learning-projection.ts +140 -0
  389. package/src/cli/effective-backlog-settings.ts +99 -0
  390. package/src/cli/fixture-retirement-audit.ts +154 -0
  391. package/src/cli/flow-kit.ts +139 -0
  392. package/src/cli/init.ts +248 -0
  393. package/src/cli/promote-workflow-artifact.ts +64 -0
  394. package/src/cli/publish-change-helper.ts +143 -0
  395. package/src/cli/pull-work-provider.ts +481 -0
  396. package/src/cli/runtime-adapter.ts +24 -0
  397. package/src/cli/telemetry-doctor.ts +243 -0
  398. package/src/cli/usage-feedback.ts +418 -0
  399. package/src/cli/validate-hook-influence.ts +119 -0
  400. package/src/cli/validate-source-tree.ts +30 -0
  401. package/src/cli/validate-workflow-artifacts.ts +411 -0
  402. package/src/cli/veritas-governance.ts +322 -0
  403. package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
  404. package/src/cli/workflow-sidecar.ts +676 -0
  405. package/src/cli.ts +95 -0
  406. package/src/flow-kit/validate.ts +74 -0
  407. package/src/lib/args.ts +43 -0
  408. package/src/lib/fs.ts +62 -0
  409. package/src/lib/workflow-learning-projection.ts +491 -0
  410. package/src/runtime-adapters.ts +154 -0
  411. package/src/tools/build-universal-bundles.ts +366 -0
  412. package/src/tools/common.ts +61 -0
  413. package/src/tools/filter-installed-packs.ts +129 -0
  414. package/src/tools/generate-context-map.ts +199 -0
  415. package/src/tools/validate-package.ts +57 -0
  416. package/src/tools/validate-source-tree.ts +488 -0
  417. package/tsconfig.json +19 -0
  418. package/veritas.claims.json +6 -0
@@ -0,0 +1,2112 @@
1
+ #!/usr/bin/env bash
2
+ # test_workflow_sidecar_writer.sh - workflow sidecar writer integration tests
3
+ set -uo pipefail
4
+
5
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT/evals/lib/node.sh"
7
+
8
+ TMPDIR_EVAL="$(mktemp -d)"
9
+ errors=0
10
+
11
+ cleanup() {
12
+ rm -rf "$TMPDIR_EVAL"
13
+ }
14
+ trap cleanup EXIT
15
+
16
+ _pass() { echo " ✓ $1"; }
17
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
18
+ run_bounded() {
19
+ local seconds="$1"
20
+ shift
21
+ "$@" &
22
+ local pid=$!
23
+ local deadline=$((SECONDS + seconds))
24
+ while kill -0 "$pid" 2>/dev/null; do
25
+ if [[ "$SECONDS" -ge "$deadline" ]]; then
26
+ kill "$pid" 2>/dev/null || true
27
+ wait "$pid" 2>/dev/null || true
28
+ return 124
29
+ fi
30
+ sleep 0.05
31
+ done
32
+ wait "$pid"
33
+ }
34
+
35
+ WRITER="workflow-sidecar"
36
+ VALIDATOR="validate-workflow-artifacts"
37
+ ARTIFACT_DIR="$TMPDIR_EVAL/repo/.flow-agents/auto-sidecars"
38
+ mkdir -p "$ARTIFACT_DIR"
39
+
40
+ SESSION_ROOT="$TMPDIR_EVAL/repo/.flow-agents"
41
+ if flow_agents_node "$WRITER" ensure-session \
42
+ --artifact-root "$SESSION_ROOT" \
43
+ --task-slug ensured-session \
44
+ --source-request "Create a current workflow session automatically." \
45
+ --title "Ensured Session" \
46
+ --summary "Automatically create a durable session artifact and initial sidecars." \
47
+ --criterion "Session artifact exists" \
48
+ --criterion "Initial sidecars validate" \
49
+ --next-action "Continue execution with durable state." \
50
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/ensure.out" 2>"$TMPDIR_EVAL/ensure.err"; then
51
+ _pass "sidecar writer ensures current session artifact"
52
+ else
53
+ _fail "sidecar writer ensure-session failed: $(cat "$TMPDIR_EVAL/ensure.out" "$TMPDIR_EVAL/ensure.err")"
54
+ fi
55
+
56
+ ENSURED_DIR="$SESSION_ROOT/ensured-session"
57
+ if [[ -f "$ENSURED_DIR/ensured-session--deliver.md" ]] \
58
+ && [[ -f "$ENSURED_DIR/state.json" ]] \
59
+ && [[ -f "$ENSURED_DIR/acceptance.json" ]] \
60
+ && [[ -f "$ENSURED_DIR/handoff.json" ]] \
61
+ && [[ -f "$SESSION_ROOT/current.json" ]]; then
62
+ _pass "sidecar writer creates session markdown and initial sidecars"
63
+ else
64
+ _fail "sidecar writer did not create expected session files"
65
+ fi
66
+
67
+ if node - "$ENSURED_DIR/state.json" "$ENSURED_DIR/acceptance.json" "$ENSURED_DIR/handoff.json" <<'NODE'
68
+ const fs = require("node:fs");
69
+ for (const file of process.argv.slice(2)) {
70
+ const repo = JSON.parse(fs.readFileSync(file, "utf8")).repo;
71
+ if (repo !== "kontourai/flow-agents") throw new Error(`${file} repo was ${JSON.stringify(repo)}`);
72
+ if (repo.includes("/") && repo.startsWith("/")) throw new Error(`${file} repo is an absolute path`);
73
+ }
74
+ NODE
75
+ then
76
+ _pass "sidecar writer records stable repository identity without local paths"
77
+ else
78
+ _fail "sidecar writer did not record stable repository identity"
79
+ fi
80
+
81
+ UNSAFE_REPO_ROOT="$TMPDIR_EVAL/unsafe-repo"
82
+ mkdir -p "$UNSAFE_REPO_ROOT"
83
+ if (cd "$UNSAFE_REPO_ROOT" \
84
+ && git init -q \
85
+ && git remote add origin "file:///Users/alice/customer-secret.git" \
86
+ && FLOW_AGENTS_REPO="/Users/alice/customer-secret" flow_agents_node "$WRITER" ensure-session \
87
+ --artifact-root ".flow-agents" \
88
+ --task-slug unsafe-repo \
89
+ --title "Unsafe repo" \
90
+ --summary "Unsafe repo fallback." \
91
+ --timestamp "2026-05-09T00:00:00Z" >/dev/null 2>"$TMPDIR_EVAL/unsafe-repo.err" \
92
+ && node - ".flow-agents/unsafe-repo/state.json" <<'NODE'
93
+ const fs = require("node:fs");
94
+ const repo = JSON.parse(fs.readFileSync(process.argv[2], "utf8")).repo;
95
+ if (repo !== "unsafe-repo") throw new Error(`unsafe repo fallback was ${JSON.stringify(repo)}`);
96
+ if (repo.includes("alice") || repo.includes("/") || repo.startsWith("/")) throw new Error(`unsafe repo leaked local path material: ${repo}`);
97
+ NODE
98
+ ); then
99
+ _pass "sidecar writer rejects path-like repository identity inputs"
100
+ else
101
+ _fail "sidecar writer leaked or rejected path-like repository identity inputs: $(cat "$TMPDIR_EVAL/unsafe-repo.err" 2>/dev/null)"
102
+ fi
103
+
104
+ if flow_agents_node "$WRITER" current --artifact-root "$SESSION_ROOT" --format slug >"$TMPDIR_EVAL/current-slug.out" 2>"$TMPDIR_EVAL/current-slug.err" \
105
+ && [[ "$(cat "$TMPDIR_EVAL/current-slug.out")" == "ensured-session" ]] \
106
+ && flow_agents_node "$WRITER" current --artifact-root "$SESSION_ROOT" --format path >"$TMPDIR_EVAL/current-path.out" 2>"$TMPDIR_EVAL/current-path.err" \
107
+ && [[ "$(cd "$TMPDIR_EVAL/repo" && realpath "$(cat "$TMPDIR_EVAL/current-path.out")")" == "$(realpath "$ENSURED_DIR")" ]]; then
108
+ _pass "sidecar writer resolves current workflow identity"
109
+ else
110
+ _fail "sidecar writer did not resolve current workflow identity: $(cat "$TMPDIR_EVAL/current-slug.out" "$TMPDIR_EVAL/current-slug.err" "$TMPDIR_EVAL/current-path.out" "$TMPDIR_EVAL/current-path.err")"
111
+ fi
112
+
113
+ AGENT_EVENT_PATH="$ENSURED_DIR/ag""ents/tool-worker-1/events.jsonl"
114
+ if flow_agents_node "$WRITER" record-agent-event \
115
+ --artifact-root "$SESSION_ROOT" \
116
+ --agent-id tool-worker-1 \
117
+ --kind evidence \
118
+ --status active \
119
+ --summary "Worker started a bounded implementation pass." \
120
+ --ref wave-1 \
121
+ --timestamp "2026-05-09T00:00:30Z" >"$TMPDIR_EVAL/agent-event.out" 2>"$TMPDIR_EVAL/agent-event.err" \
122
+ && [[ -f "$AGENT_EVENT_PATH" ]] \
123
+ && rg -q '"agent_id": "tool-worker-1"' "$AGENT_EVENT_PATH" \
124
+ && rg -q '"agent_id": "tool-worker-1"' "$SESSION_ROOT/current.json"; then
125
+ _pass "sidecar writer records delegation-safe agent events"
126
+ else
127
+ _fail "sidecar writer did not record delegation-safe agent event: $(cat "$TMPDIR_EVAL/agent-event.out" "$TMPDIR_EVAL/agent-event.err")"
128
+ fi
129
+
130
+ cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-traversal-agent.json"
131
+ TRAVERSAL_AGENT_OUTSIDE="$TMPDIR_EVAL/repo/.flow-agents/evil-agent-outside.jsonl"
132
+ if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
133
+ --artifact-root "$SESSION_ROOT" \
134
+ --agent-id ../evil-agent-outside \
135
+ --kind evidence \
136
+ --status active \
137
+ --summary "This traversal agent id should fail before mutation." >"$TMPDIR_EVAL/traversal-agent-event.out" 2>&1; then
138
+ _fail "sidecar writer should reject traversal agent ids"
139
+ elif rg -q -- '--agent-id must not contain' "$TMPDIR_EVAL/traversal-agent-event.out" \
140
+ && [[ ! -e "$TRAVERSAL_AGENT_OUTSIDE" ]] \
141
+ && cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-traversal-agent.json" \
142
+ && [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
143
+ _pass "sidecar writer rejects traversal agent ids without mutation or lock residue"
144
+ else
145
+ _fail "sidecar writer traversal agent rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/traversal-agent-event.out")"
146
+ fi
147
+
148
+ cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-bad-agent.json"
149
+ if flow_agents_node "$WRITER" record-agent-event \
150
+ --artifact-root "$SESSION_ROOT" \
151
+ --artifact-dir "$SESSION_ROOT/ensured-sessoin" \
152
+ --agent-id typo-worker \
153
+ --kind evidence \
154
+ --status active \
155
+ --summary "This typo should not create a workflow." >"$TMPDIR_EVAL/bad-agent-event.out" 2>&1; then
156
+ _fail "sidecar writer should reject missing explicit artifact dirs"
157
+ elif cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-bad-agent.json"; then
158
+ _pass "sidecar writer rejects bad explicit artifact dirs without changing current"
159
+ else
160
+ _fail "sidecar writer changed current after bad explicit artifact dir"
161
+ fi
162
+
163
+ if flow_agents_node "$WRITER" ensure-session \
164
+ --artifact-root "$SESSION_ROOT" \
165
+ --task-slug fresh-session \
166
+ --source-request "Create a fresh session after worker activity." \
167
+ --title "Fresh Session" \
168
+ --summary "A new active workflow should not inherit agents from the prior slug." \
169
+ --criterion "Fresh session is active" \
170
+ --timestamp "2026-05-09T00:00:45Z" >"$TMPDIR_EVAL/ensure-fresh.out" 2>"$TMPDIR_EVAL/ensure-fresh.err" \
171
+ && rg -q '"active_slug": "fresh-session"' "$SESSION_ROOT/current.json" \
172
+ && node -e 'const fs=require("fs"); const current=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); if (JSON.stringify(current.active_agents)!=="[]") process.exit(1);' "$SESSION_ROOT/current.json"
173
+ then
174
+ _pass "sidecar writer resets active agents for a new current workflow"
175
+ else
176
+ _fail "sidecar writer carried stale active agents into a new workflow: $(cat "$TMPDIR_EVAL/ensure-fresh.out" "$TMPDIR_EVAL/ensure-fresh.err")"
177
+ fi
178
+
179
+ if flow_agents_node "$WRITER" ensure-session \
180
+ --artifact-root "$SESSION_ROOT" \
181
+ --task-slug ../outside \
182
+ --source-request "Traversal should be rejected." \
183
+ --title "Traversal Fixture" \
184
+ --summary "This must not create artifacts outside the root." \
185
+ --timestamp "2026-05-09T00:00:50Z" >"$TMPDIR_EVAL/ensure-traversal.out" 2>&1; then
186
+ _fail "sidecar writer should reject traversal task slugs"
187
+ elif rg -q -- '--task-slug must not contain' "$TMPDIR_EVAL/ensure-traversal.out" \
188
+ && [[ ! -d "$TMPDIR_EVAL/repo/.flow-agents/outside" ]]; then
189
+ _pass "sidecar writer rejects traversal task slugs without creating outside dirs"
190
+ else
191
+ _fail "sidecar writer traversal rejection was not fail-closed: $(cat "$TMPDIR_EVAL/ensure-traversal.out")"
192
+ fi
193
+
194
+ LATE_AGENT_EVENT_PATH="$ENSURED_DIR/ag""ents/late-worker/events.jsonl"
195
+ if flow_agents_node "$WRITER" record-agent-event \
196
+ --artifact-root "$SESSION_ROOT" \
197
+ --artifact-dir "$ENSURED_DIR" \
198
+ --agent-id late-worker \
199
+ --kind completed \
200
+ --status done \
201
+ --summary "A late worker finished the old workflow after a newer session became active." \
202
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/late-agent-event.out" 2>"$TMPDIR_EVAL/late-agent-event.err" \
203
+ && [[ -f "$LATE_AGENT_EVENT_PATH" ]] \
204
+ && rg -q '"agent_id": "late-worker"' "$LATE_AGENT_EVENT_PATH" \
205
+ && rg -q '"active_slug": "fresh-session"' "$SESSION_ROOT/current.json" \
206
+ && ! rg -q '"agent_id": "late-worker"' "$SESSION_ROOT/current.json"; then
207
+ _pass "sidecar writer keeps late explicit agent events from stealing current workflow"
208
+ else
209
+ _fail "sidecar writer let a late explicit agent event change current workflow: $(cat "$TMPDIR_EVAL/late-agent-event.out" "$TMPDIR_EVAL/late-agent-event.err")"
210
+ fi
211
+
212
+ COPIED_ROOT="$TMPDIR_EVAL/copied-workflows"
213
+ COPIED_DIR="$COPIED_ROOT/ensured-session"
214
+ mkdir -p "$COPIED_ROOT"
215
+ cp -R "$ENSURED_DIR" "$COPIED_DIR"
216
+ cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-copied-agent.json"
217
+ COPIED_AGENT_EVENT_PATH="$COPIED_DIR/ag""ents/copied-worker/events.jsonl"
218
+ if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
219
+ --artifact-dir "$COPIED_DIR" \
220
+ --agent-id copied-worker \
221
+ --kind evidence \
222
+ --status done \
223
+ --summary "A copied workflow outside the default root records without hanging." \
224
+ --timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/copied-agent-event.out" 2>"$TMPDIR_EVAL/copied-agent-event.err" \
225
+ && [[ -f "$COPIED_AGENT_EVENT_PATH" ]] \
226
+ && rg -q '"agent_id": "copied-worker"' "$COPIED_AGENT_EVENT_PATH" \
227
+ && cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-copied-agent.json" \
228
+ && [[ ! -e "$COPIED_ROOT/.workflow-sidecar.lockdir" ]] \
229
+ && [[ ! -e "$COPIED_DIR/.workflow-sidecar.lockdir" ]] \
230
+ && [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
231
+ _pass "sidecar writer records bounded explicit events in copied workflow dirs"
232
+ else
233
+ _fail "sidecar writer copied explicit event failed or left residue: $(cat "$TMPDIR_EVAL/copied-agent-event.out" "$TMPDIR_EVAL/copied-agent-event.err")"
234
+ fi
235
+
236
+ cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-mismatch-agent.json"
237
+ MISMATCH_AGENT_EVENT_PATH="$COPIED_DIR/ag""ents/mismatch-worker/events.jsonl"
238
+ if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
239
+ --artifact-root "$SESSION_ROOT" \
240
+ --artifact-dir "$COPIED_DIR" \
241
+ --agent-id mismatch-worker \
242
+ --kind evidence \
243
+ --status active \
244
+ --summary "This root mismatch should fail before mutation." >"$TMPDIR_EVAL/mismatch-agent-event.out" 2>&1; then
245
+ _fail "sidecar writer should reject explicit artifact-dir/root mismatches"
246
+ elif rg -q 'artifact directory must be under artifact root' "$TMPDIR_EVAL/mismatch-agent-event.out" \
247
+ && [[ ! -e "$MISMATCH_AGENT_EVENT_PATH" ]] \
248
+ && cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-mismatch-agent.json" \
249
+ && [[ ! -e "$COPIED_ROOT/.workflow-sidecar.lockdir" ]] \
250
+ && [[ ! -e "$COPIED_DIR/.workflow-sidecar.lockdir" ]] \
251
+ && [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
252
+ _pass "sidecar writer rejects artifact-dir/root mismatches without mutation or lock residue"
253
+ else
254
+ _fail "sidecar writer mismatch rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/mismatch-agent-event.out")"
255
+ fi
256
+
257
+ SYMLINK_TARGET="$TMPDIR_EVAL/symlink-target-workflow"
258
+ SYMLINK_DIR="$SESSION_ROOT/symlink-session"
259
+ mkdir -p "$SYMLINK_TARGET"
260
+ if ln -s "$SYMLINK_TARGET" "$SYMLINK_DIR" 2>"$TMPDIR_EVAL/symlink-create.err"; then
261
+ cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-symlink-agent.json"
262
+ if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
263
+ --artifact-root "$SESSION_ROOT" \
264
+ --artifact-dir "$SYMLINK_DIR" \
265
+ --agent-id symlink-worker \
266
+ --kind evidence \
267
+ --status active \
268
+ --summary "A symlink artifact dir should fail before mutation." >"$TMPDIR_EVAL/symlink-agent-event.out" 2>&1; then
269
+ _fail "sidecar writer should reject symlink artifact dirs"
270
+ elif rg -q 'artifact directory must not be a symlink' "$TMPDIR_EVAL/symlink-agent-event.out" \
271
+ && [[ ! -e "$SYMLINK_TARGET/ag""ents/symlink-worker/events.jsonl" ]] \
272
+ && cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-symlink-agent.json" \
273
+ && [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]] \
274
+ && [[ ! -e "$SYMLINK_TARGET/.workflow-sidecar.lockdir" ]]; then
275
+ _pass "sidecar writer rejects symlink artifact dirs without mutation or lock residue"
276
+ else
277
+ _fail "sidecar writer symlink artifact-dir rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/symlink-agent-event.out")"
278
+ fi
279
+ else
280
+ _pass "sidecar writer symlink artifact-dir coverage skipped because symlink creation is unavailable: $(cat "$TMPDIR_EVAL/symlink-create.err")"
281
+ fi
282
+
283
+ if flow_agents_node "$WRITER" ensure-session \
284
+ --artifact-root "$SESSION_ROOT" \
285
+ --task-slug race-session-a \
286
+ --source-request "Create a race fixture session." \
287
+ --title "Race Session A" \
288
+ --summary "Explicit agent events should serialize with current session switches." \
289
+ --criterion "Race session A exists" \
290
+ --timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/ensure-race-a.out" 2>"$TMPDIR_EVAL/ensure-race-a.err"; then
291
+ RACE_A_DIR="$SESSION_ROOT/race-session-a"
292
+ FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=1.2 flow_agents_node "$WRITER" record-agent-event \
293
+ --artifact-root "$SESSION_ROOT" \
294
+ --artifact-dir "$RACE_A_DIR" \
295
+ --agent-id race-worker \
296
+ --kind evidence \
297
+ --status active \
298
+ --summary "This explicit event races with a session switch." \
299
+ --timestamp "2026-05-09T00:01:12Z" >"$TMPDIR_EVAL/race-agent-event.out" 2>"$TMPDIR_EVAL/race-agent-event.err" &
300
+ race_pid=$!
301
+ node -e 'const fs=require("fs"); const lock=process.argv[1]; const deadline=Date.now()+5000; (function wait(){ if (fs.existsSync(lock)) process.exit(0); if (Date.now()>deadline) { console.error("record-agent-event did not acquire root lock before timeout"); process.exit(1); } setTimeout(wait,20); })();' "$SESSION_ROOT/.workflow-sidecar.lockdir"
302
+ flow_agents_node "$WRITER" ensure-session \
303
+ --artifact-root "$SESSION_ROOT" \
304
+ --task-slug race-session-b \
305
+ --source-request "Switch current session during explicit event." \
306
+ --title "Race Session B" \
307
+ --summary "The current workflow switch should not be lost." \
308
+ --criterion "Race session B remains current" \
309
+ --timestamp "2026-05-09T00:01:15Z" >"$TMPDIR_EVAL/ensure-race-b.out" 2>"$TMPDIR_EVAL/ensure-race-b.err"
310
+ race_status_b=$?
311
+ wait "$race_pid"
312
+ race_status_event=$?
313
+ race_event_path="$RACE_A_DIR/agen""ts/race-worker/even""ts.jsonl"
314
+ if [[ "$race_status_event" -eq 0 && "$race_status_b" -eq 0 ]] \
315
+ && rg -q '"active_slug": "race-session-b"' "$SESSION_ROOT/current.json" \
316
+ && [[ -f "$race_event_path" ]] \
317
+ && rg -q '"agent_id": "race-worker"' "$race_event_path"
318
+ then
319
+ _pass "sidecar writer serializes explicit agent events with current workflow switches"
320
+ else
321
+ _fail "sidecar writer did not serialize explicit agent events with current workflow switches: $(cat "$TMPDIR_EVAL/race-agent-event.out" "$TMPDIR_EVAL/race-agent-event.err" "$TMPDIR_EVAL/ensure-race-b.out" "$TMPDIR_EVAL/ensure-race-b.err")"
322
+ fi
323
+ else
324
+ _fail "sidecar writer could not create race fixture: $(cat "$TMPDIR_EVAL/ensure-race-a.out" "$TMPDIR_EVAL/ensure-race-a.err")"
325
+ fi
326
+
327
+ if flow_agents_node "$VALIDATOR" --require-sidecars "$ENSURED_DIR" >"$TMPDIR_EVAL/ensure-valid.out" 2>"$TMPDIR_EVAL/ensure-valid.err"; then
328
+ _pass "ensured session artifacts validate"
329
+ else
330
+ _fail "ensured session artifacts failed validation: $(cat "$TMPDIR_EVAL/ensure-valid.out" "$TMPDIR_EVAL/ensure-valid.err")"
331
+ fi
332
+
333
+ EXISTING_ONLY_DIR="$SESSION_ROOT/existing-session"
334
+ mkdir -p "$EXISTING_ONLY_DIR"
335
+ cat > "$EXISTING_ONLY_DIR/existing-session--deliver.md" <<'MARKDOWN'
336
+ # Existing Session
337
+
338
+ branch: main
339
+ worktree: main
340
+ created: 2026-05-09T00:00:00Z
341
+ status: planning
342
+ type: deliver
343
+ iteration: 1
344
+
345
+ ## Plan
346
+
347
+ Existing artifact should keep its own criteria when sidecars are filled in later.
348
+
349
+ ## Definition Of Done
350
+
351
+ - **User outcome:** Existing session remains the source of truth.
352
+ - **Scope:** Existing Markdown plus missing sidecars.
353
+ - **Acceptance criteria:**
354
+ - [ ] Existing artifact criterion - Evidence: existing Markdown.
355
+ - **Usefulness checks:**
356
+ - [ ] User-facing workflow is documented or discoverable
357
+ - **Stop-short risks:** Sidecars could drift from existing Markdown.
358
+ - **Durable docs target:** not needed
359
+ - **Sandbox mode:** local-edit
360
+
361
+ ## Execution Progress
362
+
363
+ - [ ] Session initialized.
364
+
365
+ ## Verification Report
366
+
367
+ Build: [FAIL] Verification has not run yet.
368
+
369
+ ### Acceptance Criteria
370
+ - [FAIL] Verification has not run yet - Evidence: pending workflow execution and checks.
371
+
372
+ ### Verdict: FAIL
373
+
374
+ ## Goal Fit Gate
375
+
376
+ - [ ] Original user goal restated
377
+
378
+ ## Final Acceptance
379
+
380
+ - [ ] CI/relevant checks passed or local equivalent recorded
381
+ MARKDOWN
382
+
383
+ if flow_agents_node "$WRITER" ensure-session \
384
+ --artifact-root "$SESSION_ROOT" \
385
+ --task-slug existing-session \
386
+ --source-request "Select existing session." \
387
+ --summary "Fill missing sidecars for an existing artifact." \
388
+ --criterion "Different CLI criterion" \
389
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/ensure-existing.out" 2>"$TMPDIR_EVAL/ensure-existing.err" \
390
+ && rg -q '"description": "Existing artifact criterion"' "$EXISTING_ONLY_DIR/acceptance.json" \
391
+ && ! rg -q 'Different CLI criterion' "$EXISTING_ONLY_DIR/acceptance.json"; then
392
+ _pass "sidecar writer derives missing sidecars from existing session Markdown"
393
+ else
394
+ _fail "sidecar writer drifted sidecars from existing session Markdown: $(cat "$TMPDIR_EVAL/ensure-existing.out" "$TMPDIR_EVAL/ensure-existing.err")"
395
+ fi
396
+
397
+ printf 'DO NOT OVERWRITE\n' >> "$ENSURED_DIR/ensured-session--deliver.md"
398
+ if flow_agents_node "$WRITER" ensure-session \
399
+ --artifact-root "$SESSION_ROOT" \
400
+ --task-slug ensured-session \
401
+ --source-request "Create a current workflow session automatically." \
402
+ --summary "This second call should select the existing session." \
403
+ --criterion "Should not replace the artifact" \
404
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/ensure-again.out" 2>"$TMPDIR_EVAL/ensure-again.err" \
405
+ && rg -q 'DO NOT OVERWRITE' "$ENSURED_DIR/ensured-session--deliver.md"; then
406
+ _pass "sidecar writer selects existing session without overwrite"
407
+ else
408
+ _fail "sidecar writer overwrote existing ensured session: $(cat "$TMPDIR_EVAL/ensure-again.out" "$TMPDIR_EVAL/ensure-again.err")"
409
+ fi
410
+
411
+ cat > "$ARTIFACT_DIR/auto-sidecars--deliver.md" <<'MARKDOWN'
412
+ # Generate sidecars automatically
413
+
414
+ status: delivered
415
+ type: deliver
416
+
417
+ ## Plan
418
+
419
+ Use a writer utility to create machine-readable workflow sidecars.
420
+
421
+ ## Definition Of Done
422
+
423
+ - **User outcome:** Workflow agents can create sidecars without hand-writing JSON.
424
+ - **Scope:** Sidecar writer utility and integration tests.
425
+ - **Acceptance criteria:**
426
+ - [x] Planning sidecars are initialized - Evidence: writer creates state, acceptance, and handoff JSON.
427
+ - [x] Evidence sidecar is recorded - Evidence: writer records evidence JSON and updates acceptance state.
428
+ - [x] Critique sidecar is recorded - Evidence: writer records critique JSON and strict validation passes.
429
+ - [x] Release and learning sidecars are recorded - Evidence: writer records release and learning JSON and updates workflow state.
430
+ - **Usefulness checks:**
431
+ - [x] User-facing workflow is documented or discoverable
432
+ - [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
433
+ - **Stop-short risks:** Writer output could be syntactically valid but inconsistent with Markdown.
434
+ - **Durable docs target:** docs/workflow-usage-guide.md
435
+ - **Sandbox mode:** local-edit
436
+
437
+ ## Verification Report
438
+
439
+ Build: [PASS] sidecar writer fixture
440
+
441
+ ### Acceptance Criteria
442
+ - [PASS] Planning sidecars are initialized - Evidence: state, acceptance, and handoff JSON exist.
443
+ - [PASS] Evidence sidecar is recorded - Evidence: evidence JSON exists.
444
+ - [PASS] Critique sidecar is recorded - Evidence: critique JSON exists.
445
+
446
+ ### Verdict: PASS
447
+
448
+ ## Goal Fit Gate
449
+
450
+ - [x] Original user goal restated
451
+ - [x] Every acceptance criterion has evidence
452
+
453
+ ## Final Acceptance
454
+
455
+ - [x] CI/relevant checks passed
456
+ MARKDOWN
457
+
458
+ if flow_agents_node "$WRITER" init-plan "$ARTIFACT_DIR/auto-sidecars--deliver.md" \
459
+ --source-request "Generate workflow sidecars automatically." \
460
+ --summary "Planning sidecars were initialized from Markdown." \
461
+ --next-action "Record evidence after checks run." \
462
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/init.out" 2>"$TMPDIR_EVAL/init.err"; then
463
+ _pass "sidecar writer initializes planning sidecars"
464
+ else
465
+ _fail "sidecar writer init failed: $(cat "$TMPDIR_EVAL/init.out" "$TMPDIR_EVAL/init.err")"
466
+ fi
467
+
468
+ if rg -q '"id": "planning-sidecars-are-initialized"' "$ARTIFACT_DIR/acceptance.json"; then
469
+ _pass "sidecar writer extracts Definition Of Done criteria"
470
+ else
471
+ _fail "sidecar writer did not extract expected acceptance criterion"
472
+ fi
473
+
474
+ if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
475
+ --verdict pass \
476
+ --check-json '{"id":"writer-fixture","kind":"test","status":"pass","summary":"Writer fixture passed.","command":"test_workflow_sidecar_writer.sh"}' \
477
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/evidence.out" 2>"$TMPDIR_EVAL/evidence.err"; then
478
+ _pass "sidecar writer records evidence"
479
+ else
480
+ _fail "sidecar writer evidence failed: $(cat "$TMPDIR_EVAL/evidence.out" "$TMPDIR_EVAL/evidence.err")"
481
+ fi
482
+
483
+ if rg -q '"status": "verified"' "$ARTIFACT_DIR/state.json" && rg -q '"status": "pass"' "$ARTIFACT_DIR/acceptance.json"; then
484
+ _pass "sidecar writer updates state and acceptance from evidence"
485
+ else
486
+ _fail "sidecar writer did not update state and acceptance"
487
+ fi
488
+
489
+ INVALID_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-evidence-ref"
490
+ mkdir -p "$INVALID_REF_DIR"
491
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_REF_DIR/invalid-evidence-ref--deliver.md"
492
+ flow_agents_node "$WRITER" init-plan "$INVALID_REF_DIR/invalid-evidence-ref--deliver.md" \
493
+ --source-request "Reject invalid evidence refs." \
494
+ --summary "Invalid evidence refs fixture." \
495
+ --next-action "Try invalid evidence refs." \
496
+ --timestamp "2026-05-09T00:01:01Z" >"$TMPDIR_EVAL/invalid-ref-init.out" 2>"$TMPDIR_EVAL/invalid-ref-init.err"
497
+
498
+ if flow_agents_node "$WRITER" record-evidence "$INVALID_REF_DIR" \
499
+ --verdict pass \
500
+ --check-json '{"id":"legacy-ref-check","kind":"test","status":"pass","summary":"Should fail.","artifact_refs":["legacy-string-ref"]}' \
501
+ --timestamp "2026-05-09T00:01:02Z" >"$TMPDIR_EVAL/legacy-ref.out" 2>"$TMPDIR_EVAL/legacy-ref.err"; then
502
+ _fail "sidecar writer should reject legacy string artifact_refs"
503
+ elif rg -q 'legacy string refs are not supported' "$TMPDIR_EVAL/legacy-ref.out" "$TMPDIR_EVAL/legacy-ref.err" \
504
+ && [[ ! -f "$INVALID_REF_DIR/evidence.json" ]] \
505
+ && rg -q '"status": "planned"' "$INVALID_REF_DIR/state.json"; then
506
+ _pass "sidecar writer rejects legacy string artifact_refs before mutation"
507
+ else
508
+ _fail "legacy string artifact_refs rejection was not fail-closed: $(cat "$TMPDIR_EVAL/legacy-ref.out" "$TMPDIR_EVAL/legacy-ref.err")"
509
+ fi
510
+
511
+ if flow_agents_node "$WRITER" record-evidence "$INVALID_REF_DIR" \
512
+ --verdict pass \
513
+ --check-json '{"id":"incomplete-ref-check","kind":"test","status":"pass","summary":"Should fail.","artifact_refs":[{"kind":"artifact"}]}' \
514
+ --timestamp "2026-05-09T00:01:03Z" >"$TMPDIR_EVAL/incomplete-ref.out" 2>"$TMPDIR_EVAL/incomplete-ref.err"; then
515
+ _fail "sidecar writer should reject incomplete structured artifact_refs"
516
+ elif rg -q 'artifact refs require file or url' "$TMPDIR_EVAL/incomplete-ref.out" "$TMPDIR_EVAL/incomplete-ref.err" \
517
+ && [[ ! -f "$INVALID_REF_DIR/evidence.json" ]] \
518
+ && rg -q '"status": "planned"' "$INVALID_REF_DIR/state.json"; then
519
+ _pass "sidecar writer rejects incomplete structured artifact_refs before mutation"
520
+ else
521
+ _fail "incomplete structured artifact_refs rejection was not fail-closed: $(cat "$TMPDIR_EVAL/incomplete-ref.out" "$TMPDIR_EVAL/incomplete-ref.err")"
522
+ fi
523
+
524
+ INVALID_ACCEPTANCE_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-acceptance-ref"
525
+ mkdir -p "$INVALID_ACCEPTANCE_REF_DIR"
526
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_ACCEPTANCE_REF_DIR/invalid-acceptance-ref--deliver.md"
527
+ flow_agents_node "$WRITER" init-plan "$INVALID_ACCEPTANCE_REF_DIR/invalid-acceptance-ref--deliver.md" \
528
+ --source-request "Reject invalid existing acceptance refs." \
529
+ --summary "Invalid acceptance refs fixture." \
530
+ --next-action "Try invalid acceptance refs." \
531
+ --timestamp "2026-05-09T00:01:04Z" >"$TMPDIR_EVAL/invalid-acceptance-ref-init.out" 2>"$TMPDIR_EVAL/invalid-acceptance-ref-init.err"
532
+ node -e 'const fs=require("fs"); const file=process.argv[1]; const data=JSON.parse(fs.readFileSync(file,"utf8")); data.criteria[0].evidence_refs=["legacy-acceptance-ref.md"]; fs.writeFileSync(file, JSON.stringify(data, null, 2) + "\n");' "$INVALID_ACCEPTANCE_REF_DIR/acceptance.json"
533
+
534
+ if flow_agents_node "$WRITER" record-evidence "$INVALID_ACCEPTANCE_REF_DIR" \
535
+ --verdict pass \
536
+ --check-json '{"id":"valid-check","kind":"test","status":"pass","summary":"Valid check."}' \
537
+ --timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/invalid-acceptance-ref.out" 2>"$TMPDIR_EVAL/invalid-acceptance-ref.err"; then
538
+ _fail "sidecar writer should reject existing legacy acceptance evidence_refs"
539
+ elif rg -q 'acceptance\.criteria\[0\]\.evidence_refs entries must be structured evidence reference objects' "$TMPDIR_EVAL/invalid-acceptance-ref.out" "$TMPDIR_EVAL/invalid-acceptance-ref.err" \
540
+ && [[ ! -f "$INVALID_ACCEPTANCE_REF_DIR/evidence.json" ]] \
541
+ && rg -q '"status": "planned"' "$INVALID_ACCEPTANCE_REF_DIR/state.json"; then
542
+ _pass "sidecar writer rejects existing invalid acceptance refs before mutation"
543
+ else
544
+ _fail "existing invalid acceptance ref rejection was not fail-closed: $(cat "$TMPDIR_EVAL/invalid-acceptance-ref.out" "$TMPDIR_EVAL/invalid-acceptance-ref.err")"
545
+ fi
546
+
547
+ SURFACE_CHECK='{"id":"surface-trust-fixture","kind":"policy","status":"pass","summary":"Surface trust evidence passed.","surface_trust_refs":[{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Issued during this workflow."},"authority":{"producer":"surface-local","summary":"Local Surface trust producer."},"integrity":{"status":"matched","summary":"Artifact digest matched expected subject and gate.","digest":"sha256:abc123"},"status":"pass","summary":"Accepted Surface claim."}]}'
548
+ if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
549
+ --verdict pass \
550
+ --check-json "$SURFACE_CHECK" \
551
+ --timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/surface-evidence.out" 2>"$TMPDIR_EVAL/surface-evidence.err" \
552
+ && rg -q '"surface_trust_refs"' "$ARTIFACT_DIR/evidence.json" \
553
+ && rg -q '"artifact_kind": "TrustReport"' "$ARTIFACT_DIR/evidence.json" \
554
+ && ! rg -q 'veritas' "$ARTIFACT_DIR/evidence.json"; then
555
+ _pass "sidecar writer records provider-neutral Surface trust refs"
556
+ else
557
+ _fail "sidecar writer did not record Surface trust refs: $(cat "$TMPDIR_EVAL/surface-evidence.out" "$TMPDIR_EVAL/surface-evidence.err")"
558
+ fi
559
+
560
+ if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
561
+ --verdict pass \
562
+ --check-json '{"id":"surface-trust-native-field","kind":"policy","status":"pass","summary":"Should fail.","surface_trust_refs":[{"artifact_kind":"Trust Snapshot","artifact_ref":"trust/snapshot.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists.","veritas_policy":"native-field"},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}]}' >"$TMPDIR_EVAL/surface-invalid.out" 2>&1; then
563
+ _fail "sidecar writer should reject provider-specific Surface trust fields"
564
+ elif rg -q 'unsupported field' "$TMPDIR_EVAL/surface-invalid.out"; then
565
+ _pass "sidecar writer rejects provider-specific Surface trust fields"
566
+ else
567
+ _fail "provider-specific Surface trust failure was not actionable: $(cat "$TMPDIR_EVAL/surface-invalid.out")"
568
+ fi
569
+
570
+ check_contradictory_surface_ref() {
571
+ local name="$1"
572
+ local ref="$2"
573
+ if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
574
+ --verdict pass \
575
+ --check-json "{\"id\":\"surface-trust-$name\",\"kind\":\"policy\",\"status\":\"pass\",\"summary\":\"Should fail.\",\"surface_trust_refs\":[$ref]}" >"$TMPDIR_EVAL/surface-contradictory-$name.out" 2>&1; then
576
+ _fail "sidecar writer should reject contradictory Surface trust ref: $name"
577
+ elif rg -q 'contradicts Surface trust facts' "$TMPDIR_EVAL/surface-contradictory-$name.out"; then
578
+ _pass "sidecar writer rejects contradictory Surface trust ref: $name"
579
+ else
580
+ _fail "contradictory Surface trust ref failure was not actionable for $name: $(cat "$TMPDIR_EVAL/surface-contradictory-$name.out")"
581
+ fi
582
+ }
583
+
584
+ check_contradictory_surface_ref "rejected-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"rejected","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
585
+ check_contradictory_surface_ref "stale-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"stale","summary":"Stale."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
586
+ check_contradictory_surface_ref "missing-authority-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"unknown","summary":"Producer missing."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
587
+ check_contradictory_surface_ref "integrity-mismatch-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"mismatch","summary":"Mismatch."},"status":"pass"}'
588
+
589
+ SURFACE_FIXTURE_DIR="$ROOT/evals/fixtures/surface-trust"
590
+ check_surface_fixture() {
591
+ local name="$1"
592
+ local fixture="$2"
593
+ local verdict="$3"
594
+ local expected_status="$4"
595
+ local expected_text="$5"
596
+ local dir="$TMPDIR_EVAL/repo/.flow-agents/surface-$name"
597
+ mkdir -p "$dir"
598
+ if flow_agents_node "$WRITER" record-evidence "$dir" \
599
+ --task-slug "surface-$name" \
600
+ --verdict "$verdict" \
601
+ --check-json '{"id":"ordinary-builder-evidence","kind":"test","status":"pass","summary":"Ordinary Builder Kit evidence still records."}' \
602
+ --surface-trust-json "$SURFACE_FIXTURE_DIR/$fixture" \
603
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/surface-$name.out" 2>"$TMPDIR_EVAL/surface-$name.err" \
604
+ && node -e 'const fs=require("fs"); const [file, expectedStatus, expectedText]=process.argv.slice(1); const data=JSON.parse(fs.readFileSync(file,"utf8")); const trustChecks=data.checks.filter((check)=>check.id.startsWith("surface-trust-")); if (trustChecks.length!==1) throw new Error(`expected one surface trust check, found ${trustChecks.length}`); const check=trustChecks[0]; if (check.status!==expectedStatus) throw new Error(`expected ${expectedStatus}, got ${check.status}`); const ref=check.surface_trust_refs[0]; const blob=JSON.stringify(check); if (!blob.includes(expectedText)) throw new Error(`missing expected text ${expectedText}: ${blob}`); if (blob.toLowerCase().includes("veritas")) throw new Error("surface trust output leaked a Veritas-specific field"); if (ref.gate_id==="unknown" || ref.claim_type==="unknown") throw new Error("surface trust ref did not map gate and claim metadata");' "$dir/evidence.json" "$expected_status" "$expected_text"
605
+ then
606
+ _pass "surface trust fixture maps $name to $expected_status evidence"
607
+ else
608
+ _fail "surface trust fixture $name failed: $(cat "$TMPDIR_EVAL/surface-$name.out" "$TMPDIR_EVAL/surface-$name.err")"
609
+ fi
610
+ }
611
+
612
+ check_surface_fixture "accepted" "accepted-claim-trust-report.json" "pass" "pass" "accepted"
613
+ check_surface_fixture "rejected" "rejected-claim-trust-report.json" "fail" "fail" "rejected"
614
+ check_surface_fixture "stale" "stale-claim-trust-snapshot.json" "not_verified" "not_verified" "not currently verifiable"
615
+ check_surface_fixture "missing-authority" "missing-authority-trust-report.json" "fail" "fail" "missing authority"
616
+ check_surface_fixture "integrity-mismatch" "integrity-mismatch-trust-report.json" "fail" "fail" "integrity"
617
+ check_surface_fixture "provider-absent" "provider-absent.json" "not_verified" "not_verified" "No trust provider is configured"
618
+ check_surface_fixture "artifact-absent" "artifact-absent.json" "not_verified" "not_verified" "not readable"
619
+
620
+ PURE_SURFACE_DIR="$TMPDIR_EVAL/repo/.flow-agents/surface-trust-only"
621
+ mkdir -p "$PURE_SURFACE_DIR"
622
+ if flow_agents_node "$WRITER" record-evidence "$PURE_SURFACE_DIR" \
623
+ --task-slug "surface-trust-only" \
624
+ --verdict pass \
625
+ --surface-trust-json "$SURFACE_FIXTURE_DIR/accepted-claim-trust-report.json" \
626
+ --timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/surface-only.out" 2>"$TMPDIR_EVAL/surface-only.err" \
627
+ && rg -q '"surface_trust_refs"' "$PURE_SURFACE_DIR/evidence.json"; then
628
+ _pass "sidecar writer records Surface trust evidence without unrelated check-json"
629
+ else
630
+ _fail "sidecar writer should accept Surface trust evidence without check-json: $(cat "$TMPDIR_EVAL/surface-only.out" "$TMPDIR_EVAL/surface-only.err")"
631
+ fi
632
+
633
+ if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
634
+ --status in_progress \
635
+ --phase execution \
636
+ --summary "Execution started from the planned sidecars." \
637
+ --next-action "Run focused validation and record evidence." \
638
+ --target-phase verification \
639
+ --artifact-ref auto-sidecars--deliver.md \
640
+ --timestamp "2026-05-09T00:01:30Z" >"$TMPDIR_EVAL/advance.out" 2>"$TMPDIR_EVAL/advance.err"; then
641
+ _pass "sidecar writer advances workflow state"
642
+ else
643
+ _fail "sidecar writer advance-state failed: $(cat "$TMPDIR_EVAL/advance.out" "$TMPDIR_EVAL/advance.err")"
644
+ fi
645
+
646
+ if rg -q '"phase": "execution"' "$ARTIFACT_DIR/state.json" && rg -q 'Run focused validation' "$ARTIFACT_DIR/handoff.json"; then
647
+ _pass "sidecar writer updates handoff during phase transitions"
648
+ else
649
+ _fail "sidecar writer did not update state and handoff for phase transition"
650
+ fi
651
+
652
+ if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
653
+ --status dancing \
654
+ --phase execution \
655
+ --summary "Invalid status fixture." \
656
+ --next-action "Should fail." >"$TMPDIR_EVAL/advance-invalid.out" 2>&1; then
657
+ _fail "sidecar writer should reject invalid workflow states"
658
+ elif rg -q 'status must be one of' "$TMPDIR_EVAL/advance-invalid.out"; then
659
+ _pass "sidecar writer rejects invalid workflow states"
660
+ else
661
+ _fail "invalid state failure was not actionable"
662
+ fi
663
+
664
+ if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
665
+ --status in_progress \
666
+ --phase execution \
667
+ --summary "Invalid target phase fixture." \
668
+ --next-action "Should fail." \
669
+ --target-phase banana >"$TMPDIR_EVAL/advance-invalid-target.out" 2>&1; then
670
+ _fail "sidecar writer should reject invalid target phases"
671
+ elif rg -q 'target phase must be one of' "$TMPDIR_EVAL/advance-invalid-target.out"; then
672
+ _pass "sidecar writer rejects invalid target phases"
673
+ else
674
+ _fail "invalid target phase failure was not actionable"
675
+ fi
676
+
677
+ cp "$ARTIFACT_DIR/state.json" "$TMPDIR_EVAL/terminal-jump-state.before"
678
+ cp "$ARTIFACT_DIR/handoff.json" "$TMPDIR_EVAL/terminal-jump-handoff.before"
679
+ if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
680
+ --status archived \
681
+ --phase done \
682
+ --summary "Verifier terminal jump fixture." \
683
+ --next-status done \
684
+ --next-action "Should not become terminal before release and learning." \
685
+ --target-phase done \
686
+ --timestamp "2026-05-09T00:01:40Z" >"$TMPDIR_EVAL/terminal-jump.out" 2>&1; then
687
+ _fail "transition guard should reject verifier terminal jumps"
688
+ elif rg -q 'terminal_jump_rejected' "$TMPDIR_EVAL/terminal-jump.out" \
689
+ && [[ -f "$ARTIFACT_DIR/transition-diagnostics.jsonl" ]] \
690
+ && rg -q '"code": "terminal_jump_rejected"' "$ARTIFACT_DIR/transition-diagnostics.jsonl" \
691
+ && cmp -s "$ARTIFACT_DIR/state.json" "$TMPDIR_EVAL/terminal-jump-state.before" \
692
+ && cmp -s "$ARTIFACT_DIR/handoff.json" "$TMPDIR_EVAL/terminal-jump-handoff.before"; then
693
+ _pass "transition guard rejects terminal jumps without mutating state or handoff"
694
+ else
695
+ _fail "terminal jump rejection lacked diagnostics or mutated authoritative sidecars"
696
+ fi
697
+
698
+ BUILDER_TRANSITION_DIR="$TMPDIR_EVAL/repo/.flow-agents/builder-transition-guard"
699
+ mkdir -p "$BUILDER_TRANSITION_DIR"
700
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$BUILDER_TRANSITION_DIR/builder-transition-guard--deliver.md"
701
+ flow_agents_node "$WRITER" init-plan "$BUILDER_TRANSITION_DIR/builder-transition-guard--deliver.md" \
702
+ --source-request "Builder transition guard fixture." \
703
+ --summary "Builder transition guard fixture." \
704
+ --next-action "Move into verification." \
705
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/builder-transition-init.out" 2>"$TMPDIR_EVAL/builder-transition-init.err"
706
+ flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
707
+ --status verifying \
708
+ --phase verification \
709
+ --summary "Builder verification fixture." \
710
+ --next-action "Verify according to Builder Kit build flow." \
711
+ --target-phase evidence \
712
+ --flow-definition builder.build \
713
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/builder-transition-verify.out" 2>"$TMPDIR_EVAL/builder-transition-verify.err"
714
+
715
+ cp "$BUILDER_TRANSITION_DIR/state.json" "$TMPDIR_EVAL/builder-missing-reason-state.before"
716
+ cp "$BUILDER_TRANSITION_DIR/handoff.json" "$TMPDIR_EVAL/builder-missing-reason-handoff.before"
717
+ if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
718
+ --status in_progress \
719
+ --phase execution \
720
+ --summary "Missing route-back reason fixture." \
721
+ --next-action "Route back to execution." \
722
+ --target-phase verification \
723
+ --flow-definition builder.build >"$TMPDIR_EVAL/builder-missing-reason.out" 2>&1; then
724
+ _fail "transition guard should reject Builder Kit route-back without reason"
725
+ elif rg -q 'route_back_reason_required' "$TMPDIR_EVAL/builder-missing-reason.out" \
726
+ && rg -q 'implementation_defect' "$BUILDER_TRANSITION_DIR/transition-diagnostics.jsonl" \
727
+ && cmp -s "$BUILDER_TRANSITION_DIR/state.json" "$TMPDIR_EVAL/builder-missing-reason-state.before" \
728
+ && cmp -s "$BUILDER_TRANSITION_DIR/handoff.json" "$TMPDIR_EVAL/builder-missing-reason-handoff.before"; then
729
+ _pass "transition guard rejects missing Builder Kit route-back reasons without mutation"
730
+ else
731
+ _fail "missing Builder Kit route-back reason was not fail-closed"
732
+ fi
733
+
734
+ if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
735
+ --status in_progress \
736
+ --phase execution \
737
+ --summary "Allowed route-back fixture." \
738
+ --next-action "Fix implementation defect." \
739
+ --target-phase verification \
740
+ --flow-definition builder.build \
741
+ --route-back-reason implementation_defect \
742
+ --timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/builder-route-back.out" 2>"$TMPDIR_EVAL/builder-route-back.err" \
743
+ && rg -q '"phase": "execution"' "$BUILDER_TRANSITION_DIR/state.json" \
744
+ && rg -q '"count": 1' "$BUILDER_TRANSITION_DIR/transition-attempts.json"; then
745
+ _pass "transition guard allows Builder Kit route-back with deterministic attempt key"
746
+ else
747
+ _fail "allowed Builder Kit route-back failed: $(cat "$TMPDIR_EVAL/builder-route-back.out" "$TMPDIR_EVAL/builder-route-back.err")"
748
+ fi
749
+
750
+ for attempt in 2 3; do
751
+ flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
752
+ --status verifying \
753
+ --phase verification \
754
+ --summary "Return to verification attempt $attempt." \
755
+ --next-action "Verify again." \
756
+ --target-phase evidence \
757
+ --flow-definition builder.build \
758
+ --timestamp "2026-05-09T00:01:${attempt}0Z" >"$TMPDIR_EVAL/builder-forward-$attempt.out" 2>"$TMPDIR_EVAL/builder-forward-$attempt.err"
759
+ flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
760
+ --status in_progress \
761
+ --phase execution \
762
+ --summary "Route back attempt $attempt." \
763
+ --next-action "Fix implementation defect again." \
764
+ --target-phase verification \
765
+ --flow-definition builder.build \
766
+ --route-back-reason implementation_defect \
767
+ --timestamp "2026-05-09T00:01:${attempt}5Z" >"$TMPDIR_EVAL/builder-route-back-$attempt.out" 2>"$TMPDIR_EVAL/builder-route-back-$attempt.err"
768
+ done
769
+
770
+ flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
771
+ --status verifying \
772
+ --phase verification \
773
+ --summary "Return to verification before exceeded route-back." \
774
+ --next-action "Verify again." \
775
+ --target-phase evidence \
776
+ --flow-definition builder.build \
777
+ --timestamp "2026-05-09T00:01:50Z" >"$TMPDIR_EVAL/builder-forward-4.out" 2>"$TMPDIR_EVAL/builder-forward-4.err"
778
+ cp "$BUILDER_TRANSITION_DIR/transition-attempts.json" "$TMPDIR_EVAL/builder-attempts.before"
779
+ if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
780
+ --status in_progress \
781
+ --phase execution \
782
+ --summary "Exceeded route-back fixture." \
783
+ --next-action "Should block after max attempts." \
784
+ --target-phase verification \
785
+ --flow-definition builder.build \
786
+ --route-back-reason implementation_defect >"$TMPDIR_EVAL/builder-route-back-exceeded.out" 2>&1; then
787
+ _fail "transition guard should block exceeded Builder Kit route-back attempts"
788
+ elif rg -q 'route_back_attempts_exceeded' "$TMPDIR_EVAL/builder-route-back-exceeded.out" \
789
+ && rg -q '"count": 3' "$BUILDER_TRANSITION_DIR/transition-attempts.json" \
790
+ && cmp -s "$BUILDER_TRANSITION_DIR/transition-attempts.json" "$TMPDIR_EVAL/builder-attempts.before"; then
791
+ _pass "transition guard blocks route-back loops without double incrementing rejected attempts"
792
+ else
793
+ _fail "Builder Kit max-attempt route-back behavior was not deterministic"
794
+ fi
795
+
796
+ LEGACY_TRANSITION_DIR="$TMPDIR_EVAL/repo/.flow-agents/legacy-transition-guard"
797
+ mkdir -p "$LEGACY_TRANSITION_DIR"
798
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$LEGACY_TRANSITION_DIR/legacy-transition-guard--deliver.md"
799
+ flow_agents_node "$WRITER" init-plan "$LEGACY_TRANSITION_DIR/legacy-transition-guard--deliver.md" \
800
+ --source-request "Legacy transition guard fixture." \
801
+ --summary "Legacy transition guard fixture." \
802
+ --next-action "Move into verification." \
803
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/legacy-transition-init.out" 2>"$TMPDIR_EVAL/legacy-transition-init.err"
804
+ flow_agents_node "$WRITER" advance-state "$LEGACY_TRANSITION_DIR" \
805
+ --status verifying \
806
+ --phase verification \
807
+ --summary "Legacy verification fixture." \
808
+ --next-action "Verify direct primitive workflow." \
809
+ --target-phase evidence \
810
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/legacy-transition-verify.out" 2>"$TMPDIR_EVAL/legacy-transition-verify.err"
811
+ if flow_agents_node "$WRITER" advance-state "$LEGACY_TRANSITION_DIR" \
812
+ --status in_progress \
813
+ --phase execution \
814
+ --summary "Legacy direct primitive route-back." \
815
+ --next-action "Direct primitive can route back without Builder Kit metadata." \
816
+ --target-phase verification \
817
+ --timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/legacy-route-back.out" 2>"$TMPDIR_EVAL/legacy-route-back.err" \
818
+ && rg -q '"phase": "execution"' "$LEGACY_TRANSITION_DIR/state.json" \
819
+ && [[ ! -f "$LEGACY_TRANSITION_DIR/transition-attempts.json" ]]; then
820
+ _pass "transition guard preserves backward-compatible legacy direct primitives"
821
+ else
822
+ _fail "legacy-compatible direct primitive route-back failed: $(cat "$TMPDIR_EVAL/legacy-route-back.out" "$TMPDIR_EVAL/legacy-route-back.err")"
823
+ fi
824
+
825
+ NV_DIR="$TMPDIR_EVAL/repo/.flow-agents/not-verified-sidecars"
826
+ mkdir -p "$NV_DIR"
827
+ cat > "$NV_DIR/not-verified-sidecars--deliver.md" <<'MARKDOWN'
828
+ # Route not verified evidence
829
+
830
+ status: needs-decision
831
+ type: deliver
832
+
833
+ ## Plan
834
+
835
+ Record uncertain evidence without pretending it passed.
836
+
837
+ ## Definition Of Done
838
+
839
+ - **User outcome:** Workflow agents can persist uncertain evidence for routing.
840
+ - **Scope:** Not-verified sidecar writer behavior.
841
+ - **Acceptance criteria:**
842
+ - [x] Not verified evidence is recorded - Evidence: evidence sidecar.
843
+ - **Usefulness checks:**
844
+ - [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
845
+ - **Stop-short risks:** Not verified evidence could be hidden as pass.
846
+ - **Durable docs target:** not needed
847
+ - **Sandbox mode:** local-edit
848
+
849
+ ## Verification Report
850
+
851
+ Build: [NOT_VERIFIED] external service unavailable
852
+
853
+ ### Acceptance Criteria
854
+ - [NOT_VERIFIED] Not verified evidence is recorded - Evidence collection unavailable.
855
+
856
+ ### Verdict: NOT_VERIFIED
857
+
858
+ ## Goal Fit Gate
859
+
860
+ - [x] Original user goal restated
861
+ - [ ] Every acceptance criterion has evidence
862
+
863
+ ## Final Acceptance
864
+
865
+ - [ ] CI/relevant checks passed
866
+ MARKDOWN
867
+
868
+ if flow_agents_node "$WRITER" init-plan "$NV_DIR/not-verified-sidecars--deliver.md" \
869
+ --source-request "Route not verified evidence." \
870
+ --summary "Not verified fixture initialized." \
871
+ --next-action "Record not verified evidence." \
872
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/nv-init.out" 2>"$TMPDIR_EVAL/nv-init.err"; then
873
+ _pass "sidecar writer initializes not-verified fixture"
874
+ else
875
+ _fail "sidecar writer not-verified init failed: $(cat "$TMPDIR_EVAL/nv-init.out" "$TMPDIR_EVAL/nv-init.err")"
876
+ fi
877
+
878
+ if flow_agents_node "$WRITER" record-evidence "$NV_DIR" \
879
+ --verdict not_verified \
880
+ --check-json '{"id":"external-check","kind":"external","status":"not_verified","summary":"External service was unavailable."}' \
881
+ --gap "External service was unavailable before user decision." \
882
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/nv-evidence.out" 2>"$TMPDIR_EVAL/nv-evidence.err"; then
883
+ _pass "sidecar writer records not-verified evidence for routing"
884
+ else
885
+ _fail "sidecar writer not-verified evidence failed: $(cat "$TMPDIR_EVAL/nv-evidence.out" "$TMPDIR_EVAL/nv-evidence.err")"
886
+ fi
887
+
888
+ if rg -q '"status": "not_verified"' "$NV_DIR/state.json" && rg -q '"not_verified_gaps"' "$NV_DIR/evidence.json"; then
889
+ _pass "sidecar writer preserves not-verified state and gaps"
890
+ else
891
+ _fail "sidecar writer did not preserve not-verified state"
892
+ fi
893
+
894
+ NEW_INVALID_DIR="$TMPDIR_EVAL/repo/.flow-agents/new-invalid-artifact"
895
+ if flow_agents_node "$WRITER" record-evidence "$NEW_INVALID_DIR" \
896
+ --verdict banana \
897
+ --check-json '{"id":"invalid-new","kind":"test","status":"pass","summary":"Should fail."}' >"$TMPDIR_EVAL/new-invalid.out" 2>&1; then
898
+ _fail "sidecar writer should reject invalid new artifact command"
899
+ elif [[ ! -e "$NEW_INVALID_DIR/.workflow-sidecar.lock" ]]; then
900
+ _pass "sidecar writer does not leave lock files for invalid new artifact commands"
901
+ else
902
+ _fail "sidecar writer left lock file for invalid new artifact command"
903
+ fi
904
+
905
+ LOCK_DENIED_DIR="$TMPDIR_EVAL/repo/.flow-agents/lock-denied"
906
+ mkdir -p "$LOCK_DENIED_DIR"
907
+ if chmod 500 "$LOCK_DENIED_DIR" 2>"$TMPDIR_EVAL/lock-denied-chmod.err"; then
908
+ if run_bounded 5 flow_agents_node "$WRITER" record-critique "$LOCK_DENIED_DIR" \
909
+ --id lock-denied-review \
910
+ --reviewer tool-code-reviewer \
911
+ --verdict pass \
912
+ --summary "This lock acquisition should fail quickly." >"$TMPDIR_EVAL/lock-denied.out" 2>&1; then
913
+ chmod 700 "$LOCK_DENIED_DIR" 2>/dev/null || true
914
+ _fail "sidecar writer should reject lock acquisition permission failures"
915
+ else
916
+ chmod 700 "$LOCK_DENIED_DIR" 2>/dev/null || true
917
+ if rg -q 'failed to acquire workflow sidecar lock' "$TMPDIR_EVAL/lock-denied.out" \
918
+ && rg -q 'record-critique' "$TMPDIR_EVAL/lock-denied.out" \
919
+ && rg -q "$LOCK_DENIED_DIR/.workflow-sidecar.lockdir" "$TMPDIR_EVAL/lock-denied.out" \
920
+ && rg -q 'EPERM|EACCES|permission denied|operation not permitted' "$TMPDIR_EVAL/lock-denied.out" \
921
+ && rg -q 'permissions, ownership, or sandbox restrictions' "$TMPDIR_EVAL/lock-denied.out" \
922
+ && rg -q 'fix permissions or ownership' "$TMPDIR_EVAL/lock-denied.out" \
923
+ && rg -q 'approved writable workspace' "$TMPDIR_EVAL/lock-denied.out" \
924
+ && rg -q 'manually write schema-valid sidecars' "$TMPDIR_EVAL/lock-denied.out" \
925
+ && rg -q 'workflow artifact validation rather than bypassing locks' "$TMPDIR_EVAL/lock-denied.out" \
926
+ && [[ ! -e "$LOCK_DENIED_DIR/.workflow-sidecar.lockdir" ]] \
927
+ && [[ ! -e "$LOCK_DENIED_DIR/critique.json" ]]; then
928
+ _pass "sidecar writer fails fast with actionable lock acquisition permission guidance"
929
+ else
930
+ _fail "sidecar writer lock acquisition failure was not actionable: $(cat "$TMPDIR_EVAL/lock-denied.out")"
931
+ fi
932
+ fi
933
+ else
934
+ _pass "sidecar writer lock permission coverage skipped because chmod is unavailable: $(cat "$TMPDIR_EVAL/lock-denied-chmod.err")"
935
+ fi
936
+
937
+ if flow_agents_node "$WRITER" record-critique "$ARTIFACT_DIR" \
938
+ --id writer-review \
939
+ --reviewer tool-code-reviewer \
940
+ --verdict pass \
941
+ --summary "No blocking findings." \
942
+ --artifact-ref auto-sidecars--deliver.md \
943
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/critique.out" 2>"$TMPDIR_EVAL/critique.err"; then
944
+ _pass "sidecar writer records passing critique"
945
+ else
946
+ _fail "sidecar writer critique failed: $(cat "$TMPDIR_EVAL/critique.out" "$TMPDIR_EVAL/critique.err")"
947
+ fi
948
+
949
+ CONCURRENT_DIR="$TMPDIR_EVAL/repo/.flow-agents/concurrent-critiques"
950
+ mkdir -p "$CONCURRENT_DIR"
951
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$CONCURRENT_DIR/concurrent-critiques--deliver.md"
952
+ flow_agents_node "$WRITER" init-plan "$CONCURRENT_DIR/concurrent-critiques--deliver.md" \
953
+ --source-request "Concurrent critique fixture." \
954
+ --summary "Concurrent critique fixture." \
955
+ --next-action "Record concurrent critique." \
956
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/concurrent-init.out" 2>"$TMPDIR_EVAL/concurrent-init.err"
957
+ flow_agents_node "$WRITER" record-evidence "$CONCURRENT_DIR" \
958
+ --verdict pass \
959
+ --check-json '{"id":"concurrent-fixture","kind":"test","status":"pass","summary":"Concurrent fixture setup passed."}' \
960
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/concurrent-evidence.out" 2>"$TMPDIR_EVAL/concurrent-evidence.err"
961
+
962
+ FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=0.2 flow_agents_node "$WRITER" record-critique "$CONCURRENT_DIR" \
963
+ --id concurrent-review-a \
964
+ --reviewer tool-code-reviewer \
965
+ --verdict pass \
966
+ --summary "Concurrent review A passed." \
967
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/concurrent-a.out" 2>"$TMPDIR_EVAL/concurrent-a.err" &
968
+ pid_a=$!
969
+ FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=0.2 flow_agents_node "$WRITER" record-critique "$CONCURRENT_DIR" \
970
+ --id concurrent-review-b \
971
+ --reviewer tool-code-reviewer \
972
+ --verdict pass \
973
+ --summary "Concurrent review B passed." \
974
+ --timestamp "2026-05-09T00:02:01Z" >"$TMPDIR_EVAL/concurrent-b.out" 2>"$TMPDIR_EVAL/concurrent-b.err" &
975
+ pid_b=$!
976
+ wait "$pid_a"
977
+ status_a=$?
978
+ wait "$pid_b"
979
+ status_b=$?
980
+
981
+ if [[ "$status_a" -eq 0 && "$status_b" -eq 0 ]] \
982
+ && rg -q '"id": "concurrent-review-a"' "$CONCURRENT_DIR/critique.json" \
983
+ && rg -q '"id": "concurrent-review-b"' "$CONCURRENT_DIR/critique.json"; then
984
+ _pass "sidecar writer serializes concurrent sidecar writes"
985
+ else
986
+ _fail "sidecar writer lost concurrent critique writes: $(cat "$TMPDIR_EVAL/concurrent-a.out" "$TMPDIR_EVAL/concurrent-a.err" "$TMPDIR_EVAL/concurrent-b.out" "$TMPDIR_EVAL/concurrent-b.err")"
987
+ fi
988
+
989
+ if flow_agents_node "$WRITER" record-release "$ARTIFACT_DIR" \
990
+ --decision merge \
991
+ --scope "Workflow sidecar writer fixture." \
992
+ --evidence-ref evidence.json \
993
+ --gate-json '{"name":"merge","status":"pass","summary":"Evidence and critique passed.","evidence_refs":["writer-fixture"]}' \
994
+ --gate-json '{"name":"docs","status":"pass","summary":"Workflow usage docs are the durable target."}' \
995
+ --rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
996
+ --observability-json '{"status":"not_required","summary":"No production telemetry needed for this fixture."}' \
997
+ --post-deploy-json '{"id":"post-merge-static","status":"planned","summary":"Run static checks after merge."}' \
998
+ --docs-json '{"status":"updated","summary":"Workflow usage documentation covers sidecar use.","refs":["docs/workflow-usage-guide.md"]}' \
999
+ --summary "Release readiness recorded for merge." \
1000
+ --timestamp "2026-05-09T00:03:00Z" >"$TMPDIR_EVAL/release.out" 2>"$TMPDIR_EVAL/release.err"; then
1001
+ _pass "sidecar writer records release readiness"
1002
+ else
1003
+ _fail "sidecar writer release failed: $(cat "$TMPDIR_EVAL/release.out" "$TMPDIR_EVAL/release.err")"
1004
+ fi
1005
+
1006
+ if rg -q '"decision": "merge"' "$ARTIFACT_DIR/release.json" && rg -q '"phase": "release"' "$ARTIFACT_DIR/state.json"; then
1007
+ _pass "sidecar writer advances state from release readiness"
1008
+ else
1009
+ _fail "sidecar writer did not update release state"
1010
+ fi
1011
+
1012
+ NO_SUMMARY_RELEASE_DIR="$TMPDIR_EVAL/repo/.flow-agents/no-summary-release"
1013
+ mkdir -p "$NO_SUMMARY_RELEASE_DIR"
1014
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$NO_SUMMARY_RELEASE_DIR/no-summary-release--deliver.md"
1015
+ flow_agents_node "$WRITER" init-plan "$NO_SUMMARY_RELEASE_DIR/no-summary-release--deliver.md" \
1016
+ --source-request "No-summary release fixture." \
1017
+ --summary "No-summary release fixture." \
1018
+ --next-action "Record release without an explicit summary." \
1019
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/no-summary-release-init.out" 2>"$TMPDIR_EVAL/no-summary-release-init.err"
1020
+ flow_agents_node "$WRITER" record-evidence "$NO_SUMMARY_RELEASE_DIR" \
1021
+ --verdict pass \
1022
+ --check-json '{"id":"no-summary-release-fixture","kind":"test","status":"pass","summary":"No-summary release setup passed."}' \
1023
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/no-summary-release-evidence.out" 2>"$TMPDIR_EVAL/no-summary-release-evidence.err"
1024
+
1025
+ if flow_agents_node "$WRITER" record-release "$NO_SUMMARY_RELEASE_DIR" \
1026
+ --decision merge \
1027
+ --scope "No-summary release fixture." \
1028
+ --evidence-ref evidence.json \
1029
+ --gate-json '{"name":"merge","status":"pass","summary":"Evidence passed.","evidence_refs":["no-summary-release-fixture"]}' \
1030
+ --rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
1031
+ --observability-json '{"status":"not_required","summary":"No production telemetry needed for this fixture."}' \
1032
+ --docs-json '{"status":"not_needed","summary":"No docs change needed."}' \
1033
+ --timestamp "2026-05-09T00:03:30Z" >"$TMPDIR_EVAL/no-summary-release.out" 2>"$TMPDIR_EVAL/no-summary-release.err" \
1034
+ && node -e 'const fs = require("fs"); const dir = process.argv[1]; const state = JSON.parse(fs.readFileSync(`${dir}/state.json`, "utf8")); if (state.phase !== "release") throw new Error(`expected release phase, got ${state.phase}`); if (state.next_action?.summary !== "Release readiness recorded for merge.") throw new Error(`unexpected summary: ${state.next_action?.summary}`);' "$NO_SUMMARY_RELEASE_DIR" \
1035
+ && rg -q '"decision": "merge"' "$NO_SUMMARY_RELEASE_DIR/release.json" \
1036
+ && flow_agents_node "$VALIDATOR" --skip-markdown-validation "$NO_SUMMARY_RELEASE_DIR" >"$TMPDIR_EVAL/no-summary-release-valid.out" 2>"$TMPDIR_EVAL/no-summary-release-valid.err"; then
1037
+ _pass "sidecar writer records valid release state without explicit summary"
1038
+ else
1039
+ _fail "no-summary release state fallback failed: $(cat "$TMPDIR_EVAL/no-summary-release.out" "$TMPDIR_EVAL/no-summary-release.err" "$TMPDIR_EVAL/no-summary-release-valid.out" "$TMPDIR_EVAL/no-summary-release-valid.err" 2>/dev/null)"
1040
+ fi
1041
+
1042
+ if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
1043
+ --status learned \
1044
+ --record-json '{"id":"writer-loop","source_refs":["release.json","evidence.json"],"outcome":"success","facts":["Release sidecar validated."],"interpretation":"Writer commands can carry release and learning feedback without hand-authored JSON.","routing":[{"target":"none","action":"No follow-up required after intended-vs-observed closeout.","status":"completed"}],"correction":{"needed":false,"evidence":"Release, evidence, and learning closeout matched intended behavior."}}' \
1045
+ --summary "Learning recorded and no follow-up remains." \
1046
+ --timestamp "2026-05-09T00:04:00Z" >"$TMPDIR_EVAL/learning.out" 2>"$TMPDIR_EVAL/learning.err"; then
1047
+ _pass "sidecar writer records learning feedback"
1048
+ else
1049
+ _fail "sidecar writer learning failed: $(cat "$TMPDIR_EVAL/learning.out" "$TMPDIR_EVAL/learning.err")"
1050
+ fi
1051
+
1052
+ if rg -q '"status": "learned"' "$ARTIFACT_DIR/learning.json" && rg -q '"phase": "learning"' "$ARTIFACT_DIR/state.json"; then
1053
+ _pass "sidecar writer advances state from learning feedback"
1054
+ else
1055
+ _fail "sidecar writer did not update learning state"
1056
+ fi
1057
+
1058
+ if flow_agents_node "$VALIDATOR" --skip-markdown-validation "$ARTIFACT_DIR/learning.json" >"$TMPDIR_EVAL/learning-valid.out" 2>"$TMPDIR_EVAL/learning-valid.err" \
1059
+ && rg -q '"needed": false' "$ARTIFACT_DIR/learning.json" \
1060
+ && rg -q '"target": "none"' "$ARTIFACT_DIR/learning.json"; then
1061
+ _pass "sidecar writer records valid no-correction learning closeout"
1062
+ else
1063
+ _fail "no-correction learning closeout failed validation: $(cat "$TMPDIR_EVAL/learning-valid.out" "$TMPDIR_EVAL/learning-valid.err")"
1064
+ fi
1065
+
1066
+ CORRECTION_DIR="$TMPDIR_EVAL/repo/.flow-agents/correction-needed-learning"
1067
+ mkdir -p "$CORRECTION_DIR"
1068
+ if flow_agents_node "$WRITER" record-learning "$CORRECTION_DIR" \
1069
+ --task-slug correction-needed-learning \
1070
+ --status followup_required \
1071
+ --record-json '{"id":"stale-learning-route","source_refs":["release.json","issue-93"],"outcome":"mixed","facts":["A stale learning route remained local after durable tracking existed."],"interpretation":"Terminal learning review must force a correction or no-correction decision.","routing":[{"target":"skill","action":"Update learning-review closeout contract.","status":"open","ref":"https://github.com/kontourai/flow-agents/issues/93"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.stale-route-closeout","intended_behavior":"Terminal learning review routes or closes every actionable gap.","observed_behavior":"A stale learning route remained local after durable tracking existed.","gap":"Learning review did not force a correction/no-correction decision.","prevention":{"target":"skill","action":"Update learning-review closeout contract.","status":"open","ref":"https://github.com/kontourai/flow-agents/issues/93"}}}' \
1072
+ --summary "Correction-needed learning recorded." \
1073
+ --timestamp "2026-05-09T00:04:30Z" >"$TMPDIR_EVAL/correction-needed-learning.out" 2>"$TMPDIR_EVAL/correction-needed-learning.err" \
1074
+ && flow_agents_node "$VALIDATOR" --skip-markdown-validation "$CORRECTION_DIR/learning.json" >"$TMPDIR_EVAL/correction-needed-valid.out" 2>"$TMPDIR_EVAL/correction-needed-valid.err"; then
1075
+ _pass "sidecar writer records valid correction-needed learning closeout"
1076
+ else
1077
+ _fail "correction-needed learning closeout failed: $(cat "$TMPDIR_EVAL/correction-needed-learning.out" "$TMPDIR_EVAL/correction-needed-learning.err" "$TMPDIR_EVAL/correction-needed-valid.out" "$TMPDIR_EVAL/correction-needed-valid.err")"
1078
+ fi
1079
+
1080
+ DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-pass"
1081
+ mkdir -p "$DOGFOOD_DIR"
1082
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DOGFOOD_DIR/dogfood-pass--deliver.md"
1083
+ flow_agents_node "$WRITER" init-plan "$DOGFOOD_DIR/dogfood-pass--deliver.md" \
1084
+ --source-request "Dogfood pass fixture." \
1085
+ --summary "Dogfood pass fixture." \
1086
+ --next-action "Run dogfood pass." \
1087
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-init.out" 2>"$TMPDIR_EVAL/dogfood-init.err"
1088
+
1089
+ if flow_agents_node "$WRITER" dogfood-pass \
1090
+ --artifact-root "$SESSION_ROOT" \
1091
+ --artifact-dir "$DOGFOOD_DIR" \
1092
+ --verdict pass \
1093
+ --summary "Should fail without evidence." >"$TMPDIR_EVAL/dogfood-no-evidence.out" 2>&1; then
1094
+ _fail "dogfood-pass should reject clean pass without evidence"
1095
+ elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-no-evidence.out"; then
1096
+ _pass "dogfood-pass refuses clean completion without evidence"
1097
+ else
1098
+ _fail "dogfood-pass missing actionable no-evidence error"
1099
+ fi
1100
+
1101
+ DIRTY_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-dirty-evidence"
1102
+ mkdir -p "$DIRTY_EVIDENCE_DOGFOOD_DIR"
1103
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DIRTY_EVIDENCE_DOGFOOD_DIR/dogfood-dirty-evidence--deliver.md"
1104
+ flow_agents_node "$WRITER" init-plan "$DIRTY_EVIDENCE_DOGFOOD_DIR/dogfood-dirty-evidence--deliver.md" \
1105
+ --source-request "Dogfood dirty evidence fixture." \
1106
+ --summary "Dogfood dirty evidence fixture." \
1107
+ --next-action "Run dogfood pass against existing dirty evidence." \
1108
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-dirty-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-dirty-evidence-init.err"
1109
+ cat > "$DIRTY_EVIDENCE_DOGFOOD_DIR/evidence.json" <<'JSON'
1110
+ {
1111
+ "schema_version": "1.0",
1112
+ "task_slug": "dogfood-dirty-evidence",
1113
+ "verdict": "pass",
1114
+ "checks": [
1115
+ {
1116
+ "id": "existing-pass",
1117
+ "kind": "test",
1118
+ "status": "pass",
1119
+ "summary": "Existing pass check."
1120
+ },
1121
+ {
1122
+ "id": "existing-fail",
1123
+ "kind": "test",
1124
+ "status": "fail",
1125
+ "summary": "Existing fail check."
1126
+ }
1127
+ ],
1128
+ "not_verified_gaps": []
1129
+ }
1130
+ JSON
1131
+ cp "$DIRTY_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-state.before"
1132
+ cp "$DIRTY_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-handoff.before"
1133
+ if flow_agents_node "$WRITER" dogfood-pass \
1134
+ --artifact-root "$SESSION_ROOT" \
1135
+ --artifact-dir "$DIRTY_EVIDENCE_DOGFOOD_DIR" \
1136
+ --verdict pass \
1137
+ --summary "Should fail before state writes." >"$TMPDIR_EVAL/dogfood-dirty-evidence.out" 2>&1; then
1138
+ _fail "dogfood-pass should reject existing dirty pass evidence before state writes"
1139
+ elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-dirty-evidence.out" \
1140
+ && cmp -s "$DIRTY_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-state.before" \
1141
+ && cmp -s "$DIRTY_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-handoff.before"; then
1142
+ _pass "dogfood-pass rejects existing dirty evidence before state and handoff writes"
1143
+ else
1144
+ _fail "dogfood-pass existing dirty evidence was not fail-closed"
1145
+ fi
1146
+
1147
+ INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-evidence"
1148
+ mkdir -p "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR"
1149
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/dogfood-existing-invalid-evidence--deliver.md"
1150
+ flow_agents_node "$WRITER" init-plan "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/dogfood-existing-invalid-evidence--deliver.md" \
1151
+ --source-request "Dogfood existing invalid evidence fixture." \
1152
+ --summary "Dogfood existing invalid evidence fixture." \
1153
+ --next-action "Run dogfood pass against existing invalid evidence." \
1154
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-evidence-init.err"
1155
+ cat > "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/evidence.json" <<'JSON'
1156
+ {
1157
+ "schema_version": "1.0",
1158
+ "task_slug": "dogfood-existing-invalid-evidence",
1159
+ "verdict": "pass",
1160
+ "checks": [
1161
+ {
1162
+ "id": "existing-invalid-pass",
1163
+ "kind": "test",
1164
+ "status": "pass",
1165
+ "summary": "Existing pass check with invalid metadata.",
1166
+ "standard_refs": [
1167
+ {
1168
+ "standard": "unknown",
1169
+ "ref": "bad-ref"
1170
+ }
1171
+ ]
1172
+ }
1173
+ ],
1174
+ "not_verified_gaps": []
1175
+ }
1176
+ JSON
1177
+ cp "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-state.before"
1178
+ cp "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-handoff.before"
1179
+ if flow_agents_node "$WRITER" dogfood-pass \
1180
+ --artifact-root "$SESSION_ROOT" \
1181
+ --artifact-dir "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR" \
1182
+ --verdict pass \
1183
+ --summary "Should fail before state writes." >"$TMPDIR_EVAL/dogfood-existing-invalid-evidence.out" 2>&1; then
1184
+ _fail "dogfood-pass should reject existing invalid pass evidence before state writes"
1185
+ elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-existing-invalid-evidence.out" \
1186
+ && cmp -s "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-state.before" \
1187
+ && cmp -s "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-handoff.before"; then
1188
+ _pass "dogfood-pass rejects existing invalid evidence before state and handoff writes"
1189
+ else
1190
+ _fail "dogfood-pass existing invalid evidence was not fail-closed"
1191
+ fi
1192
+
1193
+ if flow_agents_node "$WRITER" dogfood-pass \
1194
+ --artifact-root "$SESSION_ROOT" \
1195
+ --artifact-dir "$DOGFOOD_DIR" \
1196
+ --verdict pass \
1197
+ --check-json '{"id":"dogfood-failed-check","kind":"test","status":"fail","summary":"Should not write."}' \
1198
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-pass-failed-check.out" 2>&1; then
1199
+ _fail "dogfood-pass should reject failed checks on clean pass before evidence writes"
1200
+ elif rg -q 'clean evidence requires all non-skipped checks to pass' "$TMPDIR_EVAL/dogfood-pass-failed-check.out" \
1201
+ && [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
1202
+ _pass "dogfood-pass rejects failed clean-pass checks before partial evidence writes"
1203
+ else
1204
+ _fail "dogfood-pass failed clean-pass check was not fail-closed"
1205
+ fi
1206
+
1207
+ if flow_agents_node "$WRITER" dogfood-pass \
1208
+ --artifact-root "$SESSION_ROOT" \
1209
+ --artifact-dir "$DOGFOOD_DIR" \
1210
+ --verdict pass \
1211
+ --check-json '{"id":"dogfood-not-verified-check","kind":"test","status":"not_verified","summary":"Should not write."}' \
1212
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-pass-not-verified-check.out" 2>&1; then
1213
+ _fail "dogfood-pass should reject not_verified checks on clean pass before evidence writes"
1214
+ elif rg -q 'clean evidence requires all non-skipped checks to pass' "$TMPDIR_EVAL/dogfood-pass-not-verified-check.out" \
1215
+ && [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
1216
+ _pass "dogfood-pass rejects not_verified clean-pass checks before partial evidence writes"
1217
+ else
1218
+ _fail "dogfood-pass not_verified clean-pass check was not fail-closed"
1219
+ fi
1220
+
1221
+ INVALID_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-evidence"
1222
+ mkdir -p "$INVALID_EVIDENCE_DOGFOOD_DIR"
1223
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_EVIDENCE_DOGFOOD_DIR/dogfood-invalid-evidence--deliver.md"
1224
+ flow_agents_node "$WRITER" init-plan "$INVALID_EVIDENCE_DOGFOOD_DIR/dogfood-invalid-evidence--deliver.md" \
1225
+ --artifact-root "$SESSION_ROOT" \
1226
+ --source-request "Dogfood invalid evidence fixture." \
1227
+ --summary "Dogfood invalid evidence fixture." \
1228
+ --next-action "Run dogfood pass with invalid evidence metadata." \
1229
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-evidence-init.err"
1230
+ cp "$INVALID_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-state.before"
1231
+ cp "$INVALID_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-handoff.before"
1232
+ if flow_agents_node "$WRITER" dogfood-pass \
1233
+ --artifact-root "$SESSION_ROOT" \
1234
+ --artifact-dir "$INVALID_EVIDENCE_DOGFOOD_DIR" \
1235
+ --verdict pass \
1236
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write invalid metadata.","standard_refs":[{"standard":"unknown","ref":"bad-ref"}]}' \
1237
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-evidence.out" 2>&1; then
1238
+ _fail "dogfood-pass should reject invalid evidence metadata before sidecar writes"
1239
+ elif rg -q 'standard' "$TMPDIR_EVAL/dogfood-invalid-evidence.out" \
1240
+ && [[ ! -f "$INVALID_EVIDENCE_DOGFOOD_DIR/evidence.json" ]] \
1241
+ && cmp -s "$INVALID_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-state.before" \
1242
+ && cmp -s "$INVALID_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-handoff.before"; then
1243
+ _pass "dogfood-pass rejects invalid evidence metadata before partial sidecar writes"
1244
+ else
1245
+ _fail "dogfood-pass invalid evidence metadata was not fail-closed"
1246
+ fi
1247
+
1248
+ INVALID_LEARNING_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-learning"
1249
+ mkdir -p "$INVALID_LEARNING_DOGFOOD_DIR"
1250
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_LEARNING_DOGFOOD_DIR/dogfood-invalid-learning--deliver.md"
1251
+ flow_agents_node "$WRITER" init-plan "$INVALID_LEARNING_DOGFOOD_DIR/dogfood-invalid-learning--deliver.md" \
1252
+ --source-request "Dogfood invalid learning fixture." \
1253
+ --summary "Dogfood invalid learning fixture." \
1254
+ --next-action "Run dogfood pass with invalid learning." \
1255
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-learning-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-learning-init.err"
1256
+ if flow_agents_node "$WRITER" dogfood-pass \
1257
+ --artifact-root "$SESSION_ROOT" \
1258
+ --artifact-dir "$INVALID_LEARNING_DOGFOOD_DIR" \
1259
+ --verdict pass \
1260
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid learning."}' \
1261
+ --learning-status learned \
1262
+ --learning-record-json '{"id":"dogfood-invalid-learning","source_refs":["evidence.json"],"outcome":"mixed","facts":["Learning has open routing."],"interpretation":"Open routing cannot be learned.","routing":[{"target":"doc","action":"Close this follow-up later.","status":"open"}]}' \
1263
+ --learning-summary "Invalid learning should fail before writes." \
1264
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-learning.out" 2>&1; then
1265
+ _fail "dogfood-pass should reject invalid learning before evidence writes"
1266
+ elif rg -q 'learned status cannot have open learning routing' "$TMPDIR_EVAL/dogfood-invalid-learning.out" \
1267
+ && [[ ! -f "$INVALID_LEARNING_DOGFOOD_DIR/evidence.json" ]] \
1268
+ && [[ ! -f "$INVALID_LEARNING_DOGFOOD_DIR/learning.json" ]]; then
1269
+ _pass "dogfood-pass rejects invalid learning before partial sidecar writes"
1270
+ else
1271
+ _fail "dogfood-pass invalid learning was not fail-closed"
1272
+ fi
1273
+
1274
+ INVALID_LEARNING_SHAPE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-learning-shape"
1275
+ mkdir -p "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR"
1276
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/dogfood-invalid-learning-shape--deliver.md"
1277
+ flow_agents_node "$WRITER" init-plan "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/dogfood-invalid-learning-shape--deliver.md" \
1278
+ --source-request "Dogfood invalid learning shape fixture." \
1279
+ --summary "Dogfood invalid learning shape fixture." \
1280
+ --next-action "Run dogfood pass with invalid learning shape." \
1281
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-learning-shape-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-learning-shape-init.err"
1282
+ if flow_agents_node "$WRITER" dogfood-pass \
1283
+ --artifact-root "$SESSION_ROOT" \
1284
+ --artifact-dir "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR" \
1285
+ --verdict pass \
1286
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid learning shape."}' \
1287
+ --learning-status learned \
1288
+ --learning-record-json '{"id":"dogfood-invalid-learning-shape","source_refs":"evidence.json","outcome":"success","facts":"Learning facts must be an array.","interpretation":"Invalid shape cannot be learned.","routing":[{"target":"doc","action":"Already closed.","status":"completed"}]}' \
1289
+ --learning-summary "Invalid learning shape should fail before writes." \
1290
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-learning-shape.out" 2>&1; then
1291
+ _fail "dogfood-pass should reject invalid learning shape before evidence writes"
1292
+ elif rg -q 'source_refs' "$TMPDIR_EVAL/dogfood-invalid-learning-shape.out" \
1293
+ && [[ ! -f "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/evidence.json" ]] \
1294
+ && [[ ! -f "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/learning.json" ]]; then
1295
+ _pass "dogfood-pass rejects invalid learning shape before partial sidecar writes"
1296
+ else
1297
+ _fail "dogfood-pass invalid learning shape was not fail-closed"
1298
+ fi
1299
+
1300
+ EXISTING_INVALID_LEARNING_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-learning"
1301
+ mkdir -p "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR"
1302
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/dogfood-existing-invalid-learning--deliver.md"
1303
+ flow_agents_node "$WRITER" init-plan "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/dogfood-existing-invalid-learning--deliver.md" \
1304
+ --source-request "Dogfood existing invalid learning fixture." \
1305
+ --summary "Dogfood existing invalid learning fixture." \
1306
+ --next-action "Run dogfood pass against existing invalid learning." \
1307
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-learning-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-learning-init.err"
1308
+ cat > "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/learning.json" <<'JSON'
1309
+ {
1310
+ "schema_version": "1.0",
1311
+ "task_slug": "dogfood-existing-invalid-learning",
1312
+ "status": "learned",
1313
+ "updated_at": "2026-05-09T00:01:00Z",
1314
+ "records": [
1315
+ {
1316
+ "id": "existing-invalid-learning",
1317
+ "recorded_at": "2026-05-09T00:01:00Z",
1318
+ "source_refs": "evidence.json",
1319
+ "outcome": "success",
1320
+ "facts": [
1321
+ "Existing learning has invalid source_refs shape."
1322
+ ],
1323
+ "interpretation": "This should not be accepted by clean dogfood pass.",
1324
+ "routing": [
1325
+ {
1326
+ "target": "none",
1327
+ "action": "No follow-up.",
1328
+ "status": "completed"
1329
+ }
1330
+ ]
1331
+ }
1332
+ ]
1333
+ }
1334
+ JSON
1335
+ cp "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-state.before"
1336
+ cp "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-handoff.before"
1337
+ if flow_agents_node "$WRITER" dogfood-pass \
1338
+ --artifact-root "$SESSION_ROOT" \
1339
+ --artifact-dir "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR" \
1340
+ --verdict pass \
1341
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing invalid learning."}' \
1342
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-invalid-learning.out" 2>&1; then
1343
+ _fail "dogfood-pass should reject existing invalid learning before evidence writes"
1344
+ elif rg -q 'source_refs' "$TMPDIR_EVAL/dogfood-existing-invalid-learning.out" \
1345
+ && [[ ! -f "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/evidence.json" ]] \
1346
+ && cmp -s "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-state.before" \
1347
+ && cmp -s "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-handoff.before"; then
1348
+ _pass "dogfood-pass rejects existing invalid learning before partial sidecar writes"
1349
+ else
1350
+ _fail "dogfood-pass existing invalid learning was not fail-closed"
1351
+ fi
1352
+
1353
+ EXISTING_LEARNED_NO_CORRECTION_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-learned-no-correction"
1354
+ mkdir -p "$EXISTING_LEARNED_NO_CORRECTION_DIR"
1355
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_LEARNED_NO_CORRECTION_DIR/dogfood-learned-no-correction--deliver.md"
1356
+ flow_agents_node "$WRITER" init-plan "$EXISTING_LEARNED_NO_CORRECTION_DIR/dogfood-learned-no-correction--deliver.md" \
1357
+ --source-request "Dogfood learned missing correction fixture." \
1358
+ --summary "Dogfood learned missing correction fixture." \
1359
+ --next-action "Run dogfood pass against terminal learning missing correction." \
1360
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-learned-no-correction-init.out" 2>"$TMPDIR_EVAL/dogfood-learned-no-correction-init.err"
1361
+ cat > "$EXISTING_LEARNED_NO_CORRECTION_DIR/learning.json" <<'JSON'
1362
+ {
1363
+ "schema_version": "1.0",
1364
+ "task_slug": "dogfood-learned-no-correction",
1365
+ "status": "learned",
1366
+ "updated_at": "2026-05-09T00:01:00Z",
1367
+ "records": [
1368
+ {
1369
+ "id": "learned-without-correction",
1370
+ "recorded_at": "2026-05-09T00:01:00Z",
1371
+ "source_refs": [
1372
+ "evidence.json"
1373
+ ],
1374
+ "outcome": "success",
1375
+ "facts": [
1376
+ "Existing learning is otherwise well-shaped."
1377
+ ],
1378
+ "interpretation": "Terminal learned records must include a correction or no-correction decision.",
1379
+ "routing": [
1380
+ {
1381
+ "target": "none",
1382
+ "action": "No follow-up.",
1383
+ "status": "completed"
1384
+ }
1385
+ ]
1386
+ }
1387
+ ]
1388
+ }
1389
+ JSON
1390
+ cp "$EXISTING_LEARNED_NO_CORRECTION_DIR/state.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-state.before"
1391
+ cp "$EXISTING_LEARNED_NO_CORRECTION_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-handoff.before"
1392
+ if flow_agents_node "$WRITER" dogfood-pass \
1393
+ --artifact-root "$SESSION_ROOT" \
1394
+ --artifact-dir "$EXISTING_LEARNED_NO_CORRECTION_DIR" \
1395
+ --verdict pass \
1396
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing learned learning is corrected."}' \
1397
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-learned-no-correction.out" 2>&1; then
1398
+ _fail "dogfood-pass should reject existing learned learning missing correction before evidence writes"
1399
+ elif rg -q 'learning status learned requires every record to include correction.needed' "$TMPDIR_EVAL/dogfood-learned-no-correction.out" \
1400
+ && [[ ! -f "$EXISTING_LEARNED_NO_CORRECTION_DIR/evidence.json" ]] \
1401
+ && cmp -s "$EXISTING_LEARNED_NO_CORRECTION_DIR/state.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-state.before" \
1402
+ && cmp -s "$EXISTING_LEARNED_NO_CORRECTION_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-handoff.before"; then
1403
+ _pass "dogfood-pass rejects existing learned learning missing correction before partial sidecar writes"
1404
+ else
1405
+ _fail "dogfood-pass existing learned learning missing correction was not fail-closed"
1406
+ fi
1407
+
1408
+ INVALID_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-critique"
1409
+ mkdir -p "$INVALID_CRITIQUE_DOGFOOD_DIR"
1410
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-invalid-critique--deliver.md"
1411
+ flow_agents_node "$WRITER" init-plan "$INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-invalid-critique--deliver.md" \
1412
+ --source-request "Dogfood invalid critique fixture." \
1413
+ --summary "Dogfood invalid critique fixture." \
1414
+ --next-action "Run dogfood pass with invalid critique metadata." \
1415
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-critique-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-critique-init.err"
1416
+ cp "$INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-critique-state.before"
1417
+ cp "$INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-critique-handoff.before"
1418
+ if flow_agents_node "$WRITER" dogfood-pass \
1419
+ --artifact-root "$SESSION_ROOT" \
1420
+ --artifact-dir "$INVALID_CRITIQUE_DOGFOOD_DIR" \
1421
+ --verdict pass \
1422
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid critique."}' \
1423
+ --require-critique \
1424
+ --critique-id dogfood-invalid-critique \
1425
+ --critique-verdict pass \
1426
+ --critique-summary "Invalid critique finding metadata should fail before writes." \
1427
+ --finding-json '{"id":"invalid-file-refs","severity":"low","status":"fixed","description":"file_refs must be an array.","file_refs":"not-an-array"}' \
1428
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-critique.out" 2>&1; then
1429
+ _fail "dogfood-pass should reject invalid critique metadata before evidence writes"
1430
+ elif rg -q 'file_refs' "$TMPDIR_EVAL/dogfood-invalid-critique.out" \
1431
+ && [[ ! -f "$INVALID_CRITIQUE_DOGFOOD_DIR/evidence.json" ]] \
1432
+ && [[ ! -f "$INVALID_CRITIQUE_DOGFOOD_DIR/critique.json" ]] \
1433
+ && cmp -s "$INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-critique-state.before" \
1434
+ && cmp -s "$INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-critique-handoff.before"; then
1435
+ _pass "dogfood-pass rejects invalid critique metadata before partial sidecar writes"
1436
+ else
1437
+ _fail "dogfood-pass invalid critique metadata was not fail-closed"
1438
+ fi
1439
+
1440
+ EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-critique"
1441
+ mkdir -p "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR"
1442
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-existing-invalid-critique--deliver.md"
1443
+ flow_agents_node "$WRITER" init-plan "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-existing-invalid-critique--deliver.md" \
1444
+ --source-request "Dogfood existing invalid critique fixture." \
1445
+ --summary "Dogfood existing invalid critique fixture." \
1446
+ --next-action "Run dogfood pass against existing invalid critique." \
1447
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-critique-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-critique-init.err"
1448
+ cat > "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/critique.json" <<'JSON'
1449
+ {
1450
+ "schema_version": "1.0",
1451
+ "task_slug": "dogfood-existing-invalid-critique",
1452
+ "status": "pass",
1453
+ "required": true,
1454
+ "updated_at": "2026-05-09T00:01:00Z",
1455
+ "critiques": [
1456
+ {
1457
+ "id": "existing-invalid-review",
1458
+ "reviewer": "tool-code-reviewer",
1459
+ "reviewed_at": "2026-05-09T00:01:00Z",
1460
+ "verdict": "pass",
1461
+ "summary": "Looks clean but has invalid finding shape.",
1462
+ "findings": [
1463
+ {
1464
+ "id": "invalid-existing-file-refs",
1465
+ "severity": "low",
1466
+ "status": "fixed",
1467
+ "description": "file_refs must be an array.",
1468
+ "file_refs": "not-an-array"
1469
+ }
1470
+ ]
1471
+ }
1472
+ ]
1473
+ }
1474
+ JSON
1475
+ cp "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-state.before"
1476
+ cp "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-handoff.before"
1477
+ if flow_agents_node "$WRITER" dogfood-pass \
1478
+ --artifact-root "$SESSION_ROOT" \
1479
+ --artifact-dir "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR" \
1480
+ --verdict pass \
1481
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing invalid critique."}' \
1482
+ --require-critique \
1483
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-invalid-critique.out" 2>&1; then
1484
+ _fail "dogfood-pass should reject existing invalid critique before evidence writes"
1485
+ elif rg -q 'requires passing critique' "$TMPDIR_EVAL/dogfood-existing-invalid-critique.out" \
1486
+ && [[ ! -f "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/evidence.json" ]] \
1487
+ && cmp -s "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-state.before" \
1488
+ && cmp -s "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-handoff.before"; then
1489
+ _pass "dogfood-pass rejects existing invalid critique before partial sidecar writes"
1490
+ else
1491
+ _fail "dogfood-pass existing invalid critique was not fail-closed"
1492
+ fi
1493
+
1494
+ if flow_agents_node "$WRITER" dogfood-pass \
1495
+ --artifact-root "$SESSION_ROOT" \
1496
+ --artifact-dir "$DOGFOOD_DIR" \
1497
+ --verdict pass \
1498
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Dogfood check passed."}' \
1499
+ --require-critique \
1500
+ --summary "Should fail without critique." >"$TMPDIR_EVAL/dogfood-no-critique.out" 2>&1; then
1501
+ _fail "dogfood-pass should reject required critique gaps before writing evidence"
1502
+ elif rg -q 'requires passing critique' "$TMPDIR_EVAL/dogfood-no-critique.out" \
1503
+ && [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
1504
+ _pass "dogfood-pass requires critique when configured without partial evidence writes"
1505
+ else
1506
+ _fail "dogfood-pass critique requirement was not fail-closed"
1507
+ fi
1508
+
1509
+ if flow_agents_node "$WRITER" dogfood-pass \
1510
+ --artifact-root "$SESSION_ROOT" \
1511
+ --artifact-dir "$SESSION_ROOT/dogfood-pas" \
1512
+ --verdict pass \
1513
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1514
+ --summary "Should fail for typo artifact dir." >"$TMPDIR_EVAL/dogfood-bad-dir.out" 2>&1; then
1515
+ _fail "dogfood-pass should reject bad explicit artifact dirs"
1516
+ elif rg -q 'artifact directory does not exist' "$TMPDIR_EVAL/dogfood-bad-dir.out" \
1517
+ && [[ ! -d "$SESSION_ROOT/dogfood-pas" ]]; then
1518
+ _pass "dogfood-pass rejects bad explicit artifact dirs without creating sidecars"
1519
+ else
1520
+ _fail "dogfood-pass bad artifact dir failure was not fail-closed"
1521
+ fi
1522
+
1523
+ OUTSIDE_DOGFOOD_DIR="$TMPDIR_EVAL/outside-dogfood"
1524
+ mkdir -p "$OUTSIDE_DOGFOOD_DIR"
1525
+ cat > "$OUTSIDE_DOGFOOD_DIR/outside--deliver.md" <<'MARKDOWN'
1526
+ # Outside artifact
1527
+
1528
+ status: planning
1529
+ type: deliver
1530
+
1531
+ ## Plan
1532
+
1533
+ This should not be writable from a different artifact root.
1534
+ MARKDOWN
1535
+ if flow_agents_node "$WRITER" dogfood-pass \
1536
+ --artifact-root "$SESSION_ROOT" \
1537
+ --artifact-dir "$OUTSIDE_DOGFOOD_DIR" \
1538
+ --verdict pass \
1539
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1540
+ --summary "Should fail outside root." >"$TMPDIR_EVAL/dogfood-outside-dir.out" 2>&1; then
1541
+ _fail "dogfood-pass should reject artifact dirs outside artifact root"
1542
+ elif rg -q 'artifact directory must be under artifact root' "$TMPDIR_EVAL/dogfood-outside-dir.out" \
1543
+ && [[ ! -f "$OUTSIDE_DOGFOOD_DIR/evidence.json" ]]; then
1544
+ _pass "dogfood-pass rejects outside-root artifact dirs without writes"
1545
+ else
1546
+ _fail "dogfood-pass outside-root failure was not fail-closed"
1547
+ fi
1548
+
1549
+ DOGFOOD_SYMLINK_TARGET="$TMPDIR_EVAL/dogfood-symlink-target"
1550
+ DOGFOOD_SYMLINK_DIR="$SESSION_ROOT/dogfood-symlink"
1551
+ mkdir -p "$DOGFOOD_SYMLINK_TARGET"
1552
+ if ln -s "$DOGFOOD_SYMLINK_TARGET" "$DOGFOOD_SYMLINK_DIR" 2>"$TMPDIR_EVAL/dogfood-symlink-create.err"; then
1553
+ if flow_agents_node "$WRITER" dogfood-pass \
1554
+ --artifact-root "$SESSION_ROOT" \
1555
+ --artifact-dir "$DOGFOOD_SYMLINK_DIR" \
1556
+ --verdict pass \
1557
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1558
+ --summary "Should reject symlink artifact dir." >"$TMPDIR_EVAL/dogfood-symlink-dir.out" 2>&1; then
1559
+ _fail "dogfood-pass should reject symlink artifact dirs"
1560
+ elif rg -q 'artifact directory must not be a symlink' "$TMPDIR_EVAL/dogfood-symlink-dir.out" \
1561
+ && [[ ! -f "$DOGFOOD_SYMLINK_TARGET/evidence.json" ]]; then
1562
+ _pass "dogfood-pass rejects symlink artifact dirs without writes"
1563
+ else
1564
+ _fail "dogfood-pass symlink artifact-dir failure was not fail-closed"
1565
+ fi
1566
+ else
1567
+ _pass "dogfood-pass symlink artifact-dir coverage skipped because symlink creation is unavailable: $(cat "$TMPDIR_EVAL/dogfood-symlink-create.err")"
1568
+ fi
1569
+
1570
+ if flow_agents_node "$WRITER" dogfood-pass \
1571
+ --artifact-root "$SESSION_ROOT" \
1572
+ --artifact-dir "$DOGFOOD_DIR" \
1573
+ --verdict pass \
1574
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1575
+ --require-critique \
1576
+ --critique-id dogfood-bad-json \
1577
+ --critique-summary "Invalid finding should fail before evidence." \
1578
+ --finding-json '{bad json' \
1579
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-bad-finding.out" 2>&1; then
1580
+ _fail "dogfood-pass should reject invalid critique JSON before evidence writes"
1581
+ elif rg -q -- '--finding-json must be valid JSON' "$TMPDIR_EVAL/dogfood-bad-finding.out" \
1582
+ && [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
1583
+ _pass "dogfood-pass rejects invalid critique JSON before partial evidence writes"
1584
+ else
1585
+ _fail "dogfood-pass invalid critique JSON was not fail-closed"
1586
+ fi
1587
+
1588
+ if flow_agents_node "$WRITER" dogfood-pass \
1589
+ --artifact-root "$SESSION_ROOT" \
1590
+ --artifact-dir "$DOGFOOD_DIR" \
1591
+ --verdict pass \
1592
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1593
+ --require-critique \
1594
+ --critique-id dogfood-failing-review \
1595
+ --critique-verdict fail \
1596
+ --critique-summary "Failing critique should fail before evidence." \
1597
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-failing-critique.out" 2>&1; then
1598
+ _fail "dogfood-pass should reject failing required critique before evidence writes"
1599
+ elif rg -q 'requires clean critique before recording pass evidence' "$TMPDIR_EVAL/dogfood-failing-critique.out" \
1600
+ && [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
1601
+ _pass "dogfood-pass rejects failing required critique before partial evidence writes"
1602
+ else
1603
+ _fail "dogfood-pass failing critique was not fail-closed"
1604
+ fi
1605
+
1606
+ DIRTY_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-dirty-critique"
1607
+ mkdir -p "$DIRTY_CRITIQUE_DOGFOOD_DIR"
1608
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DIRTY_CRITIQUE_DOGFOOD_DIR/dogfood-dirty-critique--deliver.md"
1609
+ flow_agents_node "$WRITER" init-plan "$DIRTY_CRITIQUE_DOGFOOD_DIR/dogfood-dirty-critique--deliver.md" \
1610
+ --source-request "Dogfood dirty critique fixture." \
1611
+ --summary "Dogfood dirty critique fixture." \
1612
+ --next-action "Run dogfood pass against existing open critique." \
1613
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-dirty-init.out" 2>"$TMPDIR_EVAL/dogfood-dirty-init.err"
1614
+ cat > "$DIRTY_CRITIQUE_DOGFOOD_DIR/critique.json" <<'JSON'
1615
+ {
1616
+ "schema_version": "1.0",
1617
+ "task_slug": "dogfood-dirty-critique",
1618
+ "status": "fail",
1619
+ "required": true,
1620
+ "updated_at": "2026-05-09T00:01:00Z",
1621
+ "critiques": [
1622
+ {
1623
+ "id": "existing-open-review",
1624
+ "reviewer": "tool-code-reviewer",
1625
+ "reviewed_at": "2026-05-09T00:01:00Z",
1626
+ "verdict": "fail",
1627
+ "summary": "Existing open finding blocks clean completion.",
1628
+ "findings": [
1629
+ {
1630
+ "severity": "high",
1631
+ "status": "open",
1632
+ "summary": "Existing finding remains open."
1633
+ }
1634
+ ]
1635
+ }
1636
+ ]
1637
+ }
1638
+ JSON
1639
+ if flow_agents_node "$WRITER" dogfood-pass \
1640
+ --artifact-root "$SESSION_ROOT" \
1641
+ --artifact-dir "$DIRTY_CRITIQUE_DOGFOOD_DIR" \
1642
+ --verdict pass \
1643
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
1644
+ --require-critique \
1645
+ --critique-id dogfood-clean-review \
1646
+ --critique-verdict pass \
1647
+ --critique-summary "New critique is clean but prior critique is still open." \
1648
+ --summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-dirty-critique.out" 2>&1; then
1649
+ _fail "dogfood-pass should reject existing dirty critique before evidence writes"
1650
+ elif rg -q 'requires clean critique before recording pass evidence' "$TMPDIR_EVAL/dogfood-existing-dirty-critique.out" \
1651
+ && [[ ! -f "$DIRTY_CRITIQUE_DOGFOOD_DIR/evidence.json" ]]; then
1652
+ _pass "dogfood-pass rejects existing dirty critique before partial evidence writes"
1653
+ else
1654
+ _fail "dogfood-pass existing dirty critique was not fail-closed"
1655
+ fi
1656
+
1657
+ FAILED_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-failed-pass"
1658
+ mkdir -p "$FAILED_DOGFOOD_DIR"
1659
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$FAILED_DOGFOOD_DIR/dogfood-failed-pass--deliver.md"
1660
+ flow_agents_node "$WRITER" init-plan "$FAILED_DOGFOOD_DIR/dogfood-failed-pass--deliver.md" \
1661
+ --source-request "Dogfood failed pass fixture." \
1662
+ --summary "Dogfood failed pass fixture." \
1663
+ --next-action "Record failed dogfood pass." \
1664
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-failed-init.out" 2>"$TMPDIR_EVAL/dogfood-failed-init.err"
1665
+ if flow_agents_node "$WRITER" dogfood-pass \
1666
+ --artifact-root "$SESSION_ROOT" \
1667
+ --artifact-dir "$FAILED_DOGFOOD_DIR" \
1668
+ --verdict fail \
1669
+ --check-json '{"id":"dogfood-failed-check","kind":"test","status":"fail","summary":"Dogfood check failed."}' \
1670
+ --require-critique \
1671
+ --critique-id dogfood-failed-review \
1672
+ --critique-verdict fail \
1673
+ --critique-summary "Failed critique should be recorded for routing." \
1674
+ --finding-json '{"id":"failed-dogfood-finding","severity":"high","status":"open","description":"Failed dogfood finding remains open."}' \
1675
+ --summary "Dogfood pass failed and should route back to execution." \
1676
+ --timestamp "2026-05-09T00:04:30Z" >"$TMPDIR_EVAL/dogfood-failed-pass.out" 2>"$TMPDIR_EVAL/dogfood-failed-pass.err"; then
1677
+ _pass "dogfood-pass records failed evidence and failing critique for routing"
1678
+ else
1679
+ _fail "dogfood-pass should allow honest failed records: $(cat "$TMPDIR_EVAL/dogfood-failed-pass.out" "$TMPDIR_EVAL/dogfood-failed-pass.err")"
1680
+ fi
1681
+
1682
+ if rg -q '"verdict": "fail"' "$FAILED_DOGFOOD_DIR/evidence.json" \
1683
+ && rg -q '"status": "fail"' "$FAILED_DOGFOOD_DIR/critique.json" \
1684
+ && rg -q '"status": "failed"' "$FAILED_DOGFOOD_DIR/state.json" \
1685
+ && rg -q 'Required dogfood critique is not passing' "$FAILED_DOGFOOD_DIR/handoff.json"; then
1686
+ _pass "dogfood-pass failed records preserve failed state and blockers"
1687
+ else
1688
+ _fail "dogfood-pass failed record did not preserve routing state"
1689
+ fi
1690
+
1691
+ if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$FAILED_DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-failed-valid.out" 2>"$TMPDIR_EVAL/dogfood-failed-valid.err"; then
1692
+ _fail "strict validator should still reject failed required critique"
1693
+ elif rg -q 'required critique must pass' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
1694
+ _pass "dogfood-pass failed records remain visibly blocked under strict validation"
1695
+ else
1696
+ _fail "dogfood-pass failed record strict validation did not expose critique blocker"
1697
+ fi
1698
+
1699
+ if flow_agents_node "$WRITER" dogfood-pass \
1700
+ --artifact-root "$SESSION_ROOT" \
1701
+ --artifact-dir "$DOGFOOD_DIR" \
1702
+ --verdict pass \
1703
+ --check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Dogfood check passed.","artifact_refs":[{"kind":"artifact","file":"dogfood-pass--deliver.md","summary":"Dogfood pass deliver artifact."}]}' \
1704
+ --require-critique \
1705
+ --critique-id dogfood-review \
1706
+ --reviewer tool-code-reviewer \
1707
+ --critique-verdict pass \
1708
+ --critique-summary "Dogfood critique passed." \
1709
+ --learning-record-json '{"id":"dogfood-learning","source_refs":["evidence.json","critique.json"],"outcome":"success","facts":["Dogfood pass command recorded evidence and critique."],"interpretation":"Dogfood pass can close a clean local loop.","routing":[{"target":"none","action":"No follow-up required.","status":"completed"}],"correction":{"needed":false,"evidence":"Evidence, critique, and learning matched intended dogfood behavior."}}' \
1710
+ --learning-summary "Dogfood command learning recorded." \
1711
+ --summary "Dogfood pass completed." \
1712
+ --timestamp "2026-05-09T00:05:00Z" >"$TMPDIR_EVAL/dogfood-pass.out" 2>"$TMPDIR_EVAL/dogfood-pass.err"; then
1713
+ _pass "sidecar writer records dogfood pass"
1714
+ else
1715
+ _fail "dogfood-pass failed: $(cat "$TMPDIR_EVAL/dogfood-pass.out" "$TMPDIR_EVAL/dogfood-pass.err")"
1716
+ fi
1717
+
1718
+ if rg -q '"state_status": "verified"' "$TMPDIR_EVAL/dogfood-pass.out" \
1719
+ && rg -q '"status": "pass"' "$DOGFOOD_DIR/critique.json" \
1720
+ && rg -q '"status": "learned"' "$DOGFOOD_DIR/learning.json" \
1721
+ && rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"; then
1722
+ _pass "dogfood-pass writes clean evidence, critique, learning, and state"
1723
+ else
1724
+ _fail "dogfood-pass did not produce expected clean sidecars"
1725
+ fi
1726
+
1727
+ if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-valid.out" 2>"$TMPDIR_EVAL/dogfood-valid.err"; then
1728
+ _pass "dogfood-pass output passes strict sidecar validation"
1729
+ else
1730
+ _fail "dogfood-pass output failed validation: $(cat "$TMPDIR_EVAL/dogfood-valid.out" "$TMPDIR_EVAL/dogfood-valid.err")"
1731
+ fi
1732
+
1733
+ if flow_agents_node "$WRITER" dogfood-pass \
1734
+ --artifact-root "$SESSION_ROOT" \
1735
+ --artifact-dir "$DOGFOOD_DIR" \
1736
+ --verdict pass \
1737
+ --check-json '{"id":"dogfood-release-fail-check","kind":"test","status":"pass","summary":"Dogfood release failure fixture check passed."}' \
1738
+ --critique-id dogfood-release-failing-review \
1739
+ --reviewer tool-code-reviewer \
1740
+ --critique-verdict fail \
1741
+ --critique-summary "Dogfood release critique failed." \
1742
+ --finding-json '{"id":"dogfood-release-finding","severity":"high","status":"open","description":"Release readiness must not ignore failing critique."}' \
1743
+ --release-decision merge \
1744
+ --release-scope "Dogfood pass release readiness should fail." \
1745
+ --summary "Dogfood pass release readiness should be blocked." \
1746
+ --timestamp "2026-05-09T00:05:20Z" >"$TMPDIR_EVAL/dogfood-release-fail.out" 2>"$TMPDIR_EVAL/dogfood-release-fail.err"; then
1747
+ _fail "dogfood-pass release readiness should reject failing critique even when critique is not explicitly required"
1748
+ elif rg -q 'requires clean critique' "$TMPDIR_EVAL/dogfood-release-fail.out" "$TMPDIR_EVAL/dogfood-release-fail.err" \
1749
+ && [[ ! -f "$DOGFOOD_DIR/release.json" ]] \
1750
+ && rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"; then
1751
+ _pass "dogfood-pass release readiness requires clean critique"
1752
+ else
1753
+ _fail "dogfood-pass release readiness failing critique was not fail-closed"
1754
+ fi
1755
+
1756
+ if flow_agents_node "$WRITER" dogfood-pass \
1757
+ --artifact-root "$SESSION_ROOT" \
1758
+ --artifact-dir "$DOGFOOD_DIR" \
1759
+ --verdict pass \
1760
+ --check-json '{"id":"dogfood-release-check","kind":"test","status":"pass","summary":"Dogfood release check passed.","artifact_refs":[{"kind":"artifact","file":"dogfood-pass--deliver.md","summary":"Dogfood pass deliver artifact."}]}' \
1761
+ --require-critique \
1762
+ --critique-id dogfood-release-review \
1763
+ --reviewer tool-code-reviewer \
1764
+ --critique-verdict pass \
1765
+ --critique-summary "Dogfood release critique passed." \
1766
+ --release-decision merge \
1767
+ --release-scope "Dogfood pass release readiness." \
1768
+ --release-summary "Dogfood pass can record release readiness after clean evidence and critique." \
1769
+ --release-doc-ref docs/workflow-usage-guide.md \
1770
+ --summary "Dogfood pass release readiness completed." \
1771
+ --timestamp "2026-05-09T00:05:30Z" >"$TMPDIR_EVAL/dogfood-release.out" 2>"$TMPDIR_EVAL/dogfood-release.err"; then
1772
+ _pass "dogfood-pass records release readiness after clean pass"
1773
+ else
1774
+ _fail "dogfood-pass release readiness failed: $(cat "$TMPDIR_EVAL/dogfood-release.out" "$TMPDIR_EVAL/dogfood-release.err")"
1775
+ fi
1776
+
1777
+ if rg -q '"release_decision": "merge"' "$TMPDIR_EVAL/dogfood-release.out" \
1778
+ && rg -q '"decision": "merge"' "$DOGFOOD_DIR/release.json" \
1779
+ && rg -q '"phase": "release"' "$DOGFOOD_DIR/state.json"; then
1780
+ _pass "dogfood-pass release readiness updates release sidecar and state"
1781
+ else
1782
+ _fail "dogfood-pass release readiness did not update expected sidecars"
1783
+ fi
1784
+
1785
+ DOGFOOD_NV_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-not-verified"
1786
+ mkdir -p "$DOGFOOD_NV_DIR"
1787
+ cat > "$DOGFOOD_NV_DIR/dogfood-not-verified--deliver.md" <<'MARKDOWN'
1788
+ # Dogfood not verified fixture
1789
+
1790
+ status: needs-decision
1791
+ type: deliver
1792
+
1793
+ ## Plan
1794
+
1795
+ Record a dogfood pass with explicit not verified evidence.
1796
+
1797
+ ## Definition Of Done
1798
+
1799
+ - **User outcome:** Dogfood pass preserves not verified evidence.
1800
+ - **Scope:** Dogfood not verified fixture.
1801
+ - **Acceptance criteria:**
1802
+ - [x] Not verified evidence is preserved - Evidence: evidence.json
1803
+ - **Usefulness checks:**
1804
+ - [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
1805
+ - **Stop-short risks:** Not verified evidence could be hidden as pass.
1806
+ - **Durable docs target:** not needed
1807
+ - **Sandbox mode:** local-edit
1808
+
1809
+ ## Verification Report
1810
+
1811
+ Build: [NOT_VERIFIED] external live runtime unavailable
1812
+
1813
+ ### Acceptance Criteria
1814
+ - [NOT_VERIFIED] Not verified evidence is preserved - Evidence: external live runtime unavailable.
1815
+
1816
+ ### Verdict: NOT_VERIFIED
1817
+
1818
+ ## Goal Fit Gate
1819
+
1820
+ - [x] Original user goal restated
1821
+ - [ ] Every acceptance criterion has evidence
1822
+
1823
+ ## Final Acceptance
1824
+
1825
+ - [ ] CI/relevant checks passed
1826
+ MARKDOWN
1827
+ flow_agents_node "$WRITER" init-plan "$DOGFOOD_NV_DIR/dogfood-not-verified--deliver.md" \
1828
+ --source-request "Dogfood not verified fixture." \
1829
+ --summary "Dogfood not verified fixture." \
1830
+ --next-action "Record not verified dogfood pass." \
1831
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-nv-init.out" 2>"$TMPDIR_EVAL/dogfood-nv-init.err"
1832
+
1833
+ if flow_agents_node "$WRITER" dogfood-pass \
1834
+ --artifact-root "$SESSION_ROOT" \
1835
+ --artifact-dir "$DOGFOOD_NV_DIR" \
1836
+ --verdict not_verified \
1837
+ --check-json '{"id":"dogfood-external","kind":"external","status":"not_verified","summary":"External live runtime was unavailable."}' \
1838
+ --gap "External live runtime unavailable." \
1839
+ --summary "Dogfood pass preserved not verified evidence." \
1840
+ --timestamp "2026-05-09T00:06:00Z" >"$TMPDIR_EVAL/dogfood-nv.out" 2>"$TMPDIR_EVAL/dogfood-nv.err" \
1841
+ && rg -q '"verdict": "not_verified"' "$DOGFOOD_NV_DIR/evidence.json" \
1842
+ && rg -q '"state_status": "not_verified"' "$TMPDIR_EVAL/dogfood-nv.out" \
1843
+ && rg -q '"External live runtime unavailable."' "$DOGFOOD_NV_DIR/evidence.json"; then
1844
+ _pass "dogfood-pass preserves NOT_VERIFIED evidence and routing"
1845
+ else
1846
+ _fail "dogfood-pass did not preserve not verified evidence: $(cat "$TMPDIR_EVAL/dogfood-nv.out" "$TMPDIR_EVAL/dogfood-nv.err")"
1847
+ fi
1848
+
1849
+ if flow_agents_node "$WRITER" record-release "$ARTIFACT_DIR" \
1850
+ --decision launch \
1851
+ --scope "Invalid release fixture." \
1852
+ --gate-json '{"name":"merge","status":"pass","summary":"Should fail."}' \
1853
+ --rollback-json '{"status":"not_required","summary":"Should fail.","owner":"codex"}' \
1854
+ --observability-json '{"status":"not_required","summary":"Should fail."}' \
1855
+ --docs-json '{"status":"not_needed","summary":"Should fail."}' \
1856
+ --summary "Should fail." >"$TMPDIR_EVAL/release-invalid.out" 2>&1; then
1857
+ _fail "sidecar writer should reject invalid release decisions"
1858
+ elif rg -q 'decision must be one of' "$TMPDIR_EVAL/release-invalid.out"; then
1859
+ _pass "sidecar writer rejects invalid release decisions"
1860
+ else
1861
+ _fail "invalid release decision failure was not actionable"
1862
+ fi
1863
+
1864
+ SEMANTIC_RELEASE_DIR="$TMPDIR_EVAL/repo/.flow-agents/semantic-release"
1865
+ mkdir -p "$SEMANTIC_RELEASE_DIR"
1866
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$SEMANTIC_RELEASE_DIR/semantic-release--deliver.md"
1867
+ flow_agents_node "$WRITER" init-plan "$SEMANTIC_RELEASE_DIR/semantic-release--deliver.md" \
1868
+ --source-request "Semantic release failure fixture." \
1869
+ --summary "Semantic release failure fixture." \
1870
+ --next-action "Record evidence." \
1871
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/semantic-release-init.out" 2>"$TMPDIR_EVAL/semantic-release-init.err"
1872
+ flow_agents_node "$WRITER" record-evidence "$SEMANTIC_RELEASE_DIR" \
1873
+ --verdict pass \
1874
+ --check-json '{"id":"semantic-release-fixture","kind":"test","status":"pass","summary":"Semantic release setup passed."}' \
1875
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/semantic-release-evidence.out" 2>"$TMPDIR_EVAL/semantic-release-evidence.err"
1876
+
1877
+ if flow_agents_node "$WRITER" record-release "$SEMANTIC_RELEASE_DIR" \
1878
+ --decision merge \
1879
+ --scope "Semantic release fixture." \
1880
+ --gate-json '{"name":"docs","status":"pass","summary":"Docs passed but merge gate is missing."}' \
1881
+ --rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
1882
+ --observability-json '{"status":"not_required","summary":"No production telemetry needed."}' \
1883
+ --docs-json '{"status":"updated","summary":"Docs are updated."}' \
1884
+ --summary "Should fail before state advances." >"$TMPDIR_EVAL/semantic-release-invalid.out" 2>&1; then
1885
+ _fail "sidecar writer should reject semantically invalid release decisions"
1886
+ elif rg -q 'positive release decision requires merge gate to pass' "$TMPDIR_EVAL/semantic-release-invalid.out" \
1887
+ && rg -q '"phase": "verification"' "$SEMANTIC_RELEASE_DIR/state.json"; then
1888
+ _pass "sidecar writer does not advance state after invalid release semantics"
1889
+ else
1890
+ _fail "semantic release failure advanced state or lacked actionable output"
1891
+ fi
1892
+
1893
+ if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
1894
+ --status learned \
1895
+ --record-json '{"id":"bad-learning","source_refs":["release.json"],"outcome":"celebration","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"doc","action":"Should fail.","status":"completed"}]}' \
1896
+ --summary "Should fail." >"$TMPDIR_EVAL/learning-invalid.out" 2>&1; then
1897
+ _fail "sidecar writer should reject invalid learning outcomes"
1898
+ elif rg -q 'learning outcome must be one of' "$TMPDIR_EVAL/learning-invalid.out"; then
1899
+ _pass "sidecar writer rejects invalid learning outcomes"
1900
+ else
1901
+ _fail "invalid learning outcome failure was not actionable"
1902
+ fi
1903
+
1904
+ if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
1905
+ --status followup_required \
1906
+ --record-json '{"id":"bad-correction-recurrence","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"skill","action":"Should fail.","status":"open"}],"correction":{"needed":true,"type":"workflow","intended_behavior":"A recurrence key is recorded.","observed_behavior":"The recurrence key is missing.","gap":"Grouping would be impossible.","prevention":{"target":"skill","action":"Should fail.","status":"open"}}}' \
1907
+ --summary "Should fail." >"$TMPDIR_EVAL/correction-missing-recurrence.out" 2>&1; then
1908
+ _fail "sidecar writer should reject correction-needed records without recurrence key"
1909
+ elif rg -q 'correction.recurrence_key is required' "$TMPDIR_EVAL/correction-missing-recurrence.out"; then
1910
+ _pass "sidecar writer rejects correction-needed records without recurrence key"
1911
+ else
1912
+ detail="$(cat "$TMPDIR_EVAL/correction-missing-recurrence.out")"
1913
+ _fail "missing correction recurrence key failure was not actionable: $detail"
1914
+ fi
1915
+
1916
+ if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
1917
+ --status followup_required \
1918
+ --record-json '{"id":"bad-correction-prevention","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"none","action":"Should fail.","status":"completed"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.missing-prevention","intended_behavior":"A prevention route or no-change rationale is recorded.","observed_behavior":"Neither decision is present.","gap":"The mismatch has no closeout decision."}}' \
1919
+ --summary "Should fail." >"$TMPDIR_EVAL/correction-missing-prevention.out" 2>&1; then
1920
+ _fail "sidecar writer should reject correction-needed records without prevention or no-change rationale"
1921
+ elif rg -q 'correction requires prevention route or no_change_rationale' "$TMPDIR_EVAL/correction-missing-prevention.out"; then
1922
+ _pass "sidecar writer rejects correction-needed records without prevention or no-change rationale"
1923
+ else
1924
+ detail="$(cat "$TMPDIR_EVAL/correction-missing-prevention.out")"
1925
+ _fail "missing correction prevention failure was not actionable: $detail"
1926
+ fi
1927
+
1928
+ if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
1929
+ --status followup_required \
1930
+ --record-json '{"id":"bad-correction-prevention-shape","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"none","action":"Should fail.","status":"completed"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.incomplete-prevention","intended_behavior":"A complete prevention route is recorded.","observed_behavior":"Prevention only named an action.","gap":"The prevention route was not actionable.","prevention":{"action":"Should fail."}}}' \
1931
+ --summary "Should fail." >"$TMPDIR_EVAL/correction-incomplete-prevention.out" 2>&1; then
1932
+ _fail "sidecar writer should reject incomplete correction prevention routes"
1933
+ elif rg -q 'correction.prevention.target is required' "$TMPDIR_EVAL/correction-incomplete-prevention.out"; then
1934
+ _pass "sidecar writer rejects incomplete correction prevention routes"
1935
+ else
1936
+ _fail "incomplete correction prevention failure was not actionable: $(cat "$TMPDIR_EVAL/correction-incomplete-prevention.out")"
1937
+ fi
1938
+
1939
+ SEMANTIC_LEARNING_DIR="$TMPDIR_EVAL/repo/.flow-agents/semantic-learning"
1940
+ mkdir -p "$SEMANTIC_LEARNING_DIR"
1941
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$SEMANTIC_LEARNING_DIR/semantic-learning--deliver.md"
1942
+ flow_agents_node "$WRITER" init-plan "$SEMANTIC_LEARNING_DIR/semantic-learning--deliver.md" \
1943
+ --source-request "Semantic learning failure fixture." \
1944
+ --summary "Semantic learning failure fixture." \
1945
+ --next-action "Record evidence." \
1946
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/semantic-learning-init.out" 2>"$TMPDIR_EVAL/semantic-learning-init.err"
1947
+ flow_agents_node "$WRITER" record-evidence "$SEMANTIC_LEARNING_DIR" \
1948
+ --verdict pass \
1949
+ --check-json '{"id":"semantic-learning-fixture","kind":"test","status":"pass","summary":"Semantic learning setup passed."}' \
1950
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/semantic-learning-evidence.out" 2>"$TMPDIR_EVAL/semantic-learning-evidence.err"
1951
+ flow_agents_node "$WRITER" record-release "$SEMANTIC_LEARNING_DIR" \
1952
+ --decision merge \
1953
+ --scope "Semantic learning fixture." \
1954
+ --gate-json '{"name":"merge","status":"pass","summary":"Merge gate passed."}' \
1955
+ --rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
1956
+ --observability-json '{"status":"not_required","summary":"No production telemetry needed."}' \
1957
+ --docs-json '{"status":"updated","summary":"Docs are updated."}' \
1958
+ --summary "Release state exists before learning failure." \
1959
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/semantic-learning-release.out" 2>"$TMPDIR_EVAL/semantic-learning-release.err"
1960
+
1961
+ if flow_agents_node "$WRITER" record-learning "$SEMANTIC_LEARNING_DIR" \
1962
+ --status learned \
1963
+ --record-json '{"id":"open-routing","source_refs":["release.json"],"outcome":"success","facts":["Should fail."],"interpretation":"Should fail before archiving.","routing":[{"target":"backlog","action":"Route open follow-up.","status":"open"}]}' \
1964
+ --summary "Should fail before state advances." >"$TMPDIR_EVAL/semantic-learning-invalid.out" 2>&1; then
1965
+ _fail "sidecar writer should reject semantically invalid learning records"
1966
+ elif rg -q 'learning status learned cannot have open routing' "$TMPDIR_EVAL/semantic-learning-invalid.out" \
1967
+ && rg -q '"phase": "release"' "$SEMANTIC_LEARNING_DIR/state.json"; then
1968
+ _pass "sidecar writer does not archive state after invalid learning semantics"
1969
+ else
1970
+ _fail "semantic learning failure advanced state or lacked actionable output"
1971
+ fi
1972
+
1973
+ REVIEW_DIR="$TMPDIR_EVAL/repo/.flow-agents/imported-critique"
1974
+ mkdir -p "$REVIEW_DIR"
1975
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$REVIEW_DIR/imported-critique--deliver.md"
1976
+ flow_agents_node "$WRITER" init-plan "$REVIEW_DIR/imported-critique--deliver.md" \
1977
+ --source-request "Imported critique fixture." \
1978
+ --summary "Imported critique fixture." \
1979
+ --next-action "Import critique." \
1980
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/import-init.out" 2>"$TMPDIR_EVAL/import-init.err"
1981
+ flow_agents_node "$WRITER" record-evidence "$REVIEW_DIR" \
1982
+ --verdict pass \
1983
+ --check-json '{"id":"import-fixture","kind":"test","status":"pass","summary":"Import fixture setup passed."}' \
1984
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/import-evidence.out" 2>"$TMPDIR_EVAL/import-evidence.err"
1985
+ cat > "$REVIEW_DIR/imported-critique--review.md" <<'MARKDOWN'
1986
+ ---
1987
+ role: code-review
1988
+ parent: imported-critique--deliver
1989
+ created: 2026-05-09T00:02:00Z
1990
+ verdict: PASS
1991
+ ---
1992
+
1993
+ ## Code Review
1994
+
1995
+ Findings: 1 LOW
1996
+
1997
+ ### Findings
1998
+
1999
+ #### [LOW] src/cli/workflow-sidecar.ts - Minor style note
2000
+ This finding was already addressed before import.
2001
+
2002
+ ### Verdict: PASS
2003
+ MARKDOWN
2004
+
2005
+ if flow_agents_node "$WRITER" import-critique "$REVIEW_DIR" "$REVIEW_DIR/imported-critique--review.md" \
2006
+ --finding-status fixed >"$TMPDIR_EVAL/import-critique.out" 2>"$TMPDIR_EVAL/import-critique.err"; then
2007
+ _pass "sidecar writer imports passing critique artifact"
2008
+ else
2009
+ _fail "sidecar writer import critique failed: $(cat "$TMPDIR_EVAL/import-critique.out" "$TMPDIR_EVAL/import-critique.err")"
2010
+ fi
2011
+
2012
+ if rg -q '"id": "minor-style-note"' "$REVIEW_DIR/critique.json" && rg -q '"status": "fixed"' "$REVIEW_DIR/critique.json"; then
2013
+ _pass "sidecar writer extracts review findings"
2014
+ else
2015
+ _fail "sidecar writer did not extract review findings"
2016
+ fi
2017
+
2018
+ cat > "$REVIEW_DIR/unrelated-note.md" <<'MARKDOWN'
2019
+ # Unrelated Note
2020
+
2021
+ This is ordinary Markdown and must not satisfy required critique.
2022
+ MARKDOWN
2023
+
2024
+ if flow_agents_node "$WRITER" import-critique "$REVIEW_DIR" "$REVIEW_DIR/unrelated-note.md" >"$TMPDIR_EVAL/import-unrelated.out" 2>&1; then
2025
+ _fail "sidecar writer should reject non-review Markdown imports"
2026
+ elif rg -q 'review artifact must declare role' "$TMPDIR_EVAL/import-unrelated.out"; then
2027
+ _pass "sidecar writer rejects non-review Markdown imports"
2028
+ else
2029
+ _fail "non-review import failure was not actionable"
2030
+ fi
2031
+
2032
+ if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$ARTIFACT_DIR" >"$TMPDIR_EVAL/valid.out" 2>"$TMPDIR_EVAL/valid.err"; then
2033
+ _pass "writer output passes strict sidecar validation"
2034
+ else
2035
+ _fail "writer output failed validation: $(cat "$TMPDIR_EVAL/valid.out" "$TMPDIR_EVAL/valid.err")"
2036
+ fi
2037
+
2038
+ BAD_DIR="$TMPDIR_EVAL/repo/.flow-agents/bad-critique"
2039
+ mkdir -p "$BAD_DIR"
2040
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$BAD_DIR/bad-critique--deliver.md"
2041
+
2042
+ flow_agents_node "$WRITER" init-plan "$BAD_DIR/bad-critique--deliver.md" \
2043
+ --source-request "Bad critique fixture." \
2044
+ --summary "Bad critique fixture." \
2045
+ --next-action "Record evidence." \
2046
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/bad-init.out" 2>"$TMPDIR_EVAL/bad-init.err"
2047
+ flow_agents_node "$WRITER" record-evidence "$BAD_DIR" \
2048
+ --verdict pass \
2049
+ --check-json '{"id":"bad-fixture","kind":"test","status":"pass","summary":"Bad fixture setup passed."}' \
2050
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/bad-evidence.out" 2>"$TMPDIR_EVAL/bad-evidence.err"
2051
+
2052
+ if flow_agents_node "$WRITER" record-critique "$BAD_DIR" \
2053
+ --id bad-review \
2054
+ --reviewer tool-code-reviewer \
2055
+ --verdict pass \
2056
+ --summary "Open finding should fail." \
2057
+ --finding-json '{"id":"open-medium","severity":"medium","status":"open","description":"Open finding."}' \
2058
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/bad-critique.out" 2>&1; then
2059
+ _fail "sidecar writer should reject open critique findings"
2060
+ elif rg -q 'required critique must pass' "$TMPDIR_EVAL/bad-critique.out"; then
2061
+ _pass "sidecar writer rejects open critique findings"
2062
+ else
2063
+ _fail "open critique failure did not mention open findings"
2064
+ fi
2065
+
2066
+ IMPORT_BAD="$TMPDIR_EVAL/repo/.flow-agents/imported-bad-critique"
2067
+ mkdir -p "$IMPORT_BAD"
2068
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$IMPORT_BAD/imported-bad-critique--deliver.md"
2069
+ flow_agents_node "$WRITER" init-plan "$IMPORT_BAD/imported-bad-critique--deliver.md" \
2070
+ --source-request "Bad imported critique fixture." \
2071
+ --summary "Bad imported critique fixture." \
2072
+ --next-action "Import failing critique." \
2073
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/import-bad-init.out" 2>"$TMPDIR_EVAL/import-bad-init.err"
2074
+ flow_agents_node "$WRITER" record-evidence "$IMPORT_BAD" \
2075
+ --verdict pass \
2076
+ --check-json '{"id":"import-bad-fixture","kind":"test","status":"pass","summary":"Bad import fixture setup passed."}' \
2077
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/import-bad-evidence.out" 2>"$TMPDIR_EVAL/import-bad-evidence.err"
2078
+ cat > "$IMPORT_BAD/imported-bad-critique--review.md" <<'MARKDOWN'
2079
+ ---
2080
+ role: code-review
2081
+ parent: imported-bad-critique--deliver
2082
+ created: 2026-05-09T00:02:00Z
2083
+ verdict: CHANGES_REQUESTED
2084
+ ---
2085
+
2086
+ ## Code Review
2087
+
2088
+ Findings: 1 HIGH
2089
+
2090
+ ### Findings
2091
+
2092
+ #### [HIGH] src/cli/workflow-sidecar.ts - Imported blocker
2093
+ This finding should keep required critique from passing.
2094
+
2095
+ ### Verdict: CHANGES_REQUESTED
2096
+ MARKDOWN
2097
+
2098
+ if flow_agents_node "$WRITER" import-critique "$IMPORT_BAD" "$IMPORT_BAD/imported-bad-critique--review.md" >"$TMPDIR_EVAL/import-bad-critique.out" 2>&1; then
2099
+ _fail "sidecar writer should reject imported failing critique"
2100
+ elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out" && rg -q '"id": "imported-blocker"' "$IMPORT_BAD/critique.json"; then
2101
+ _pass "sidecar writer persists and rejects imported failing critique"
2102
+ else
2103
+ _fail "imported failing critique did not persist actionable finding"
2104
+ fi
2105
+
2106
+ if [[ "$errors" -eq 0 ]]; then
2107
+ echo "Workflow sidecar writer integration passed."
2108
+ exit 0
2109
+ fi
2110
+
2111
+ echo "Workflow sidecar writer integration failed: $errors issue(s)."
2112
+ exit 1