@kontourai/flow-agents 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +103 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/console-learning-projection.d.ts +1 -0
  14. package/build/src/cli/effective-backlog-settings.d.ts +1 -0
  15. package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
  16. package/build/src/cli/init.d.ts +17 -0
  17. package/build/src/cli/init.js +242 -20
  18. package/build/src/cli/kit.d.ts +1 -0
  19. package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
  20. package/build/src/cli/publish-change-helper.d.ts +1 -0
  21. package/build/src/cli/pull-work-provider.d.ts +1 -0
  22. package/build/src/cli/runtime-adapter.d.ts +1 -0
  23. package/build/src/cli/telemetry-doctor.d.ts +1 -0
  24. package/build/src/cli/usage-feedback.d.ts +1 -0
  25. package/build/src/cli/utterance-check.d.ts +1 -0
  26. package/build/src/cli/validate-hook-influence.d.ts +1 -0
  27. package/build/src/cli/validate-source-tree.d.ts +1 -0
  28. package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
  29. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  30. package/build/src/cli/verify.d.ts +1 -0
  31. package/build/src/cli/verify.js +90 -0
  32. package/build/src/cli/veritas-governance.d.ts +1 -0
  33. package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
  34. package/build/src/cli/workflow-sidecar.d.ts +324 -0
  35. package/build/src/cli/workflow-sidecar.js +1973 -90
  36. package/build/src/cli.d.ts +2 -0
  37. package/build/src/cli.js +2 -3
  38. package/build/src/flow-kit/validate.d.ts +81 -0
  39. package/build/src/index.d.ts +5 -0
  40. package/build/src/index.js +36 -0
  41. package/build/src/lib/args.d.ts +8 -0
  42. package/build/src/lib/flow-resolver.d.ts +82 -0
  43. package/build/src/lib/flow-resolver.js +237 -0
  44. package/build/src/lib/fs.d.ts +7 -0
  45. package/build/src/lib/workflow-learning-projection.d.ts +132 -0
  46. package/build/src/runtime-adapters.d.ts +18 -0
  47. package/build/src/tools/build-universal-bundles.d.ts +2 -0
  48. package/build/src/tools/build-universal-bundles.js +34 -22
  49. package/build/src/tools/common.d.ts +9 -0
  50. package/build/src/tools/generate-context-map.d.ts +2 -0
  51. package/build/src/tools/generate-context-map.js +3 -16
  52. package/build/src/tools/validate-package.d.ts +2 -0
  53. package/build/src/tools/validate-source-tree.d.ts +2 -0
  54. package/build/src/tools/validate-source-tree.js +42 -162
  55. package/context/contracts/artifact-contract.md +10 -0
  56. package/context/contracts/delivery-contract.md +1 -0
  57. package/context/contracts/review-contract.md +1 -0
  58. package/context/contracts/verification-contract.md +2 -0
  59. package/context/gate-awareness.md +39 -0
  60. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  61. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  62. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  63. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  64. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  65. package/docs/adr/0007-skill-audit.md +1 -1
  66. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  67. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  68. package/docs/adr/0011-mcp-posture.md +100 -0
  69. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  70. package/docs/adr/0013-context-lifecycle.md +151 -0
  71. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  72. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  73. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  74. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  75. package/docs/agent-system-guidebook.md +5 -12
  76. package/docs/context-map.md +4 -10
  77. package/docs/developer-architecture.md +14 -0
  78. package/docs/index.md +3 -2
  79. package/docs/integrations/framework-adapter.md +19 -6
  80. package/docs/integrations/index.md +2 -2
  81. package/docs/north-star.md +4 -4
  82. package/docs/operating-layers.md +3 -3
  83. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  84. package/docs/repository-structure.md +2 -2
  85. package/docs/skills-map.md +1 -0
  86. package/docs/spec/runtime-hook-surface.md +78 -10
  87. package/docs/standards-register.md +3 -3
  88. package/docs/survey-utterance-check.md +1 -1
  89. package/docs/trust-anchor-adoption.md +197 -0
  90. package/docs/verifiable-trust.md +95 -0
  91. package/docs/veritas-integration.md +2 -2
  92. package/docs/workflow-usage-guide.md +69 -0
  93. package/evals/acceptance/DEMO-false-completion.md +144 -0
  94. package/evals/acceptance/demo-cast.sh +92 -0
  95. package/evals/acceptance/demo-false-completion.sh +72 -0
  96. package/evals/acceptance/demo-real-evidence.sh +104 -0
  97. package/evals/acceptance/demo.tape +29 -0
  98. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  99. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  100. package/evals/acceptance/prove-teeth.sh +105 -0
  101. package/evals/ci/antigaming-suite.sh +54 -0
  102. package/evals/ci/run-baseline.sh +2 -0
  103. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  104. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  105. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  106. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  107. package/evals/integration/test_builder_step_producers.sh +379 -0
  108. package/evals/integration/test_bundle_install.sh +35 -71
  109. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  110. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  111. package/evals/integration/test_checkpoint_signing.sh +489 -0
  112. package/evals/integration/test_claim_lookup.sh +352 -0
  113. package/evals/integration/test_command_log_integrity.sh +275 -0
  114. package/evals/integration/test_context_map.sh +0 -2
  115. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  116. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  117. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  118. package/evals/integration/test_flow_kit_repository.sh +2 -0
  119. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  120. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  121. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  122. package/evals/integration/test_gate_lockdown.sh +1137 -0
  123. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  124. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  125. package/evals/integration/test_goal_fit_hook.sh +69 -4
  126. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  127. package/evals/integration/test_hook_category_behaviors.sh +14 -0
  128. package/evals/integration/test_install_merge.sh +1176 -0
  129. package/evals/integration/test_mint_attestation.sh +373 -0
  130. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  131. package/evals/integration/test_publish_delivery.sh +269 -0
  132. package/evals/integration/test_reconcile_soundness.sh +528 -0
  133. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  134. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  135. package/evals/integration/test_trust_checkpoint.sh +325 -0
  136. package/evals/integration/test_trust_reconcile.sh +293 -0
  137. package/evals/integration/test_verify_cli.sh +208 -0
  138. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  139. package/evals/lib/node.sh +0 -6
  140. package/evals/run.sh +47 -0
  141. package/evals/static/test_library_exports.sh +85 -0
  142. package/evals/static/test_universal_bundles.sh +15 -0
  143. package/evals/static/test_workflow_skills.sh +6 -13
  144. package/install.sh +0 -7
  145. package/integrations/strands-ts/README.md +25 -15
  146. package/integrations/veritas/flow-agents.adapter.json +1 -2
  147. package/kits/builder/flows/build.flow.json +59 -12
  148. package/kits/builder/kit.json +85 -15
  149. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  150. package/kits/builder/skills/deliver/SKILL.md +36 -6
  151. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  152. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  153. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  154. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  155. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  156. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  157. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  158. package/kits/knowledge/adapters/default-store/index.js +38 -0
  159. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  160. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  161. package/kits/knowledge/docs/store-contract.md +314 -0
  162. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  163. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  164. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  165. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  166. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  167. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  168. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  169. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  170. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  171. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  172. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  173. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  174. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  175. package/kits/knowledge/kit.json +51 -1
  176. package/package.json +13 -4
  177. package/packaging/conformance/README.md +10 -2
  178. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  179. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  180. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  181. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  182. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  183. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  184. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  185. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  186. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  187. package/packaging/conformance/run-conformance.js +1 -1
  188. package/scripts/README.md +2 -1
  189. package/scripts/build-universal-bundles.js +0 -1
  190. package/scripts/ci/mint-attestation.js +221 -0
  191. package/scripts/ci/trust-reconcile.js +545 -0
  192. package/scripts/hooks/config-protection.js +423 -1
  193. package/scripts/hooks/evidence-capture.js +348 -0
  194. package/scripts/hooks/lib/liveness-read.js +113 -0
  195. package/scripts/hooks/run-hook.js +6 -1
  196. package/scripts/hooks/stop-goal-fit.js +1471 -79
  197. package/scripts/hooks/workflow-steering.js +135 -5
  198. package/scripts/install-codex-home.sh +39 -0
  199. package/scripts/install-merge.js +330 -0
  200. package/src/cli/init.ts +218 -20
  201. package/src/cli/validate-workflow-artifacts.ts +18 -2
  202. package/src/cli/verify.ts +100 -0
  203. package/src/cli/workflow-sidecar.ts +2093 -84
  204. package/src/cli.ts +2 -3
  205. package/src/index.ts +53 -0
  206. package/src/lib/flow-resolver.ts +284 -0
  207. package/src/tools/build-universal-bundles.ts +34 -21
  208. package/src/tools/generate-context-map.ts +3 -17
  209. package/src/tools/validate-source-tree.ts +44 -104
  210. package/tsconfig.json +1 -0
  211. package/build/src/tools/filter-installed-packs.js +0 -135
  212. package/packaging/packs.json +0 -49
  213. package/scripts/filter-installed-packs.js +0 -2
  214. package/src/tools/filter-installed-packs.ts +0 -132
@@ -480,10 +480,12 @@ else
480
480
  _fail "sidecar writer evidence failed: $(cat "$TMPDIR_EVAL/evidence.out" "$TMPDIR_EVAL/evidence.err")"
481
481
  fi
482
482
 
483
- if rg -q '"status": "verified"' "$ARTIFACT_DIR/state.json" && rg -q '"status": "pass"' "$ARTIFACT_DIR/acceptance.json"; then
484
- _pass "sidecar writer updates state and acceptance from evidence"
483
+ # Phase 4c: acceptance.json criteria status no longer updated at verification time (bundle-only).
484
+ # State is verified; bundle claims carry the criteria status.
485
+ if rg -q '"status": "verified"' "$ARTIFACT_DIR/state.json" && [[ -f "$ARTIFACT_DIR/trust.bundle" ]] && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const ac=b.claims.filter(c=>c.claimType==="workflow.acceptance.criterion"); if(ac.length===0) throw new Error("no acceptance criterion claims in bundle"); if(ac.some(c=>c.value!=="pass")) throw new Error("some acceptance criterion not pass in bundle: "+JSON.stringify(ac.map(c=>c.value)));' "$ARTIFACT_DIR/trust.bundle" 2>/dev/null; then
486
+ _pass "sidecar writer updates state and records acceptance in bundle from evidence"
485
487
  else
486
- _fail "sidecar writer did not update state and acceptance"
488
+ _fail "sidecar writer did not update state or bundle from evidence"
487
489
  fi
488
490
 
489
491
  INVALID_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-evidence-ref"
@@ -545,14 +547,15 @@ else
545
547
  fi
546
548
 
547
549
  SURFACE_CHECK='{"id":"surface-trust-fixture","kind":"policy","status":"pass","summary":"Hachure trust.bundle evidence passed.","surface_trust_refs":[{"artifact_kind":"trust.bundle","artifact_ref":"trust/report.json","gate_id":"builder.trust.bundle","claim_type":"builder.trust.bundle","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Issued during this workflow."},"authority":{"producer":"surface-local","summary":"Local Surface trust producer."},"integrity":{"status":"matched","summary":"Artifact digest matched expected subject and gate.","digest":"sha256:abc123"},"status":"pass","summary":"Accepted trust.bundle claim."}]}'
550
+ # Phase 4c: evidence.json no longer written; verify in trust.bundle (sole verification artifact).
548
551
  if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
549
552
  --verdict pass \
550
553
  --check-json "$SURFACE_CHECK" \
551
554
  --timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/surface-evidence.out" 2>"$TMPDIR_EVAL/surface-evidence.err" \
552
- && rg -q '"surface_trust_refs"' "$ARTIFACT_DIR/evidence.json" \
553
- && rg -q '"artifact_kind": "trust.bundle"' "$ARTIFACT_DIR/evidence.json" \
554
- && ! rg -q 'veritas' "$ARTIFACT_DIR/evidence.json"; then
555
- _pass "sidecar writer records Hachure-aligned trust.bundle refs"
555
+ && [[ -f "$ARTIFACT_DIR/trust.bundle" ]] \
556
+ && ! rg -q 'veritas' "$ARTIFACT_DIR/trust.bundle" \
557
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const c=b.claims.find(c=>c.claimType==="workflow.check.policy"); if(!c) throw new Error("no policy claim in bundle"); if(c.value!=="pass") throw new Error("expected pass, got "+c.value);' "$ARTIFACT_DIR/trust.bundle" 2>/dev/null; then
558
+ _pass "sidecar writer records Hachure-aligned trust.bundle refs (verified in bundle)"
556
559
  else
557
560
  _fail "sidecar writer did not record Hachure-aligned trust.bundle refs: $(cat "$TMPDIR_EVAL/surface-evidence.out" "$TMPDIR_EVAL/surface-evidence.err")"
558
561
  fi
@@ -595,13 +598,16 @@ check_surface_fixture() {
595
598
  local expected_text="$5"
596
599
  local dir="$TMPDIR_EVAL/repo/.flow-agents/surface-$name"
597
600
  mkdir -p "$dir"
601
+ # Phase 4c: evidence.json no longer written; verify surface trust check status in trust.bundle.
598
602
  if flow_agents_node "$WRITER" record-evidence "$dir" \
599
603
  --task-slug "surface-$name" \
600
604
  --verdict "$verdict" \
601
605
  --check-json '{"id":"ordinary-builder-evidence","kind":"test","status":"pass","summary":"Ordinary Builder Kit evidence still records."}' \
602
606
  --surface-trust-json "$SURFACE_FIXTURE_DIR/$fixture" \
603
607
  --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/surface-$name.out" 2>"$TMPDIR_EVAL/surface-$name.err" \
604
- && node -e 'const fs=require("fs"); const [file, expectedStatus, expectedText]=process.argv.slice(1); const data=JSON.parse(fs.readFileSync(file,"utf8")); const trustChecks=data.checks.filter((check)=>check.id.startsWith("surface-trust-")); if (trustChecks.length!==1) throw new Error(`expected one surface trust check, found ${trustChecks.length}`); const check=trustChecks[0]; if (check.status!==expectedStatus) throw new Error(`expected ${expectedStatus}, got ${check.status}`); const ref=check.surface_trust_refs[0]; const blob=JSON.stringify(check); if (!blob.includes(expectedText)) throw new Error(`missing expected text ${expectedText}: ${blob}`); if (blob.toLowerCase().includes("veritas")) throw new Error("surface trust output leaked a Veritas-specific field"); if (ref.gate_id==="unknown" || ref.claim_type==="unknown") throw new Error("surface trust ref did not map gate and claim metadata");' "$dir/evidence.json" "$expected_status" "$expected_text"
608
+ && [[ -f "$dir/trust.bundle" ]] \
609
+ && ! grep -qi 'veritas' "$dir/trust.bundle" \
610
+ && node -e 'const fs=require("fs"); const [bundleFile, expectedStatus, expectedText]=process.argv.slice(1); const b=JSON.parse(fs.readFileSync(bundleFile,"utf8")); const policyClaims=b.claims.filter((c)=>c.claimType==="workflow.check.policy"); if(policyClaims.length!==1) throw new Error("expected one policy claim, found "+policyClaims.length); const c=policyClaims[0]; if(c.value!==expectedStatus) throw new Error("expected "+expectedStatus+", got "+c.value); const blob=JSON.stringify(b); if(!blob.includes(expectedText)) throw new Error("missing expected text "+expectedText+" in bundle");' "$dir/trust.bundle" "$expected_status" "$expected_text" 2>/dev/null
605
611
  then
606
612
  _pass "surface trust fixture maps $name to $expected_status evidence"
607
613
  else
@@ -619,13 +625,15 @@ check_surface_fixture "artifact-absent" "artifact-absent.json" "not_verified" "n
619
625
 
620
626
  PURE_SURFACE_DIR="$TMPDIR_EVAL/repo/.flow-agents/surface-trust-only"
621
627
  mkdir -p "$PURE_SURFACE_DIR"
628
+ # Phase 4c: evidence.json no longer written; verify in trust.bundle.
622
629
  if flow_agents_node "$WRITER" record-evidence "$PURE_SURFACE_DIR" \
623
630
  --task-slug "surface-trust-only" \
624
631
  --verdict pass \
625
632
  --surface-trust-json "$SURFACE_FIXTURE_DIR/accepted-claim-trust-report.json" \
626
633
  --timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/surface-only.out" 2>"$TMPDIR_EVAL/surface-only.err" \
627
- && rg -q '"surface_trust_refs"' "$PURE_SURFACE_DIR/evidence.json"; then
628
- _pass "sidecar writer records Surface trust evidence without unrelated check-json"
634
+ && [[ -f "$PURE_SURFACE_DIR/trust.bundle" ]] \
635
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); if(!Array.isArray(b.claims)||b.claims.length===0) throw new Error("no claims in bundle"); ' "$PURE_SURFACE_DIR/trust.bundle" 2>/dev/null; then
636
+ _pass "sidecar writer records Surface trust evidence without unrelated check-json (verified in bundle)"
629
637
  else
630
638
  _fail "sidecar writer should accept Surface trust evidence without check-json: $(cat "$TMPDIR_EVAL/surface-only.out" "$TMPDIR_EVAL/surface-only.err")"
631
639
  fi
@@ -885,8 +893,12 @@ else
885
893
  _fail "sidecar writer not-verified evidence failed: $(cat "$TMPDIR_EVAL/nv-evidence.out" "$TMPDIR_EVAL/nv-evidence.err")"
886
894
  fi
887
895
 
888
- if rg -q '"status": "not_verified"' "$NV_DIR/state.json" && rg -q '"not_verified_gaps"' "$NV_DIR/evidence.json"; then
889
- _pass "sidecar writer preserves not-verified state and gaps"
896
+ # Phase 4c: evidence.json no longer written; not-verified state is in state.json + trust.bundle.
897
+ # not_verified_gaps are accepted as input but not persisted to a sidecar (bundle-only sessions).
898
+ if rg -q '"status": "not_verified"' "$NV_DIR/state.json" \
899
+ && [[ -f "$NV_DIR/trust.bundle" ]] \
900
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const c=b.claims.find(c=>c.claimType==="workflow.check.external"); if(!c) throw new Error("no external check claim"); if(c.value!=="not_verified") throw new Error("expected not_verified, got "+c.value);' "$NV_DIR/trust.bundle" 2>/dev/null; then
901
+ _pass "sidecar writer preserves not-verified state in state.json and bundle"
890
902
  else
891
903
  _fail "sidecar writer did not preserve not-verified state"
892
904
  fi
@@ -978,10 +990,11 @@ status_a=$?
978
990
  wait "$pid_b"
979
991
  status_b=$?
980
992
 
993
+ # Phase 4c: critique.json no longer written; verify both reviews are in trust.bundle claims.
981
994
  if [[ "$status_a" -eq 0 && "$status_b" -eq 0 ]] \
982
- && rg -q '"id": "concurrent-review-a"' "$CONCURRENT_DIR/critique.json" \
983
- && rg -q '"id": "concurrent-review-b"' "$CONCURRENT_DIR/critique.json"; then
984
- _pass "sidecar writer serializes concurrent sidecar writes"
995
+ && [[ -f "$CONCURRENT_DIR/trust.bundle" ]] \
996
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const cc=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(cc.length<2) throw new Error("expected 2 critique claims, found "+cc.length+": "+JSON.stringify(cc.map(c=>c.subjectId)));' "$CONCURRENT_DIR/trust.bundle" 2>/dev/null; then
997
+ _pass "sidecar writer serializes concurrent sidecar writes (both reviews in bundle)"
985
998
  else
986
999
  _fail "sidecar writer lost concurrent critique writes: $(cat "$TMPDIR_EVAL/concurrent-a.out" "$TMPDIR_EVAL/concurrent-a.err" "$TMPDIR_EVAL/concurrent-b.out" "$TMPDIR_EVAL/concurrent-b.err")"
987
1000
  fi
@@ -1679,21 +1692,24 @@ else
1679
1692
  _fail "dogfood-pass should allow honest failed records: $(cat "$TMPDIR_EVAL/dogfood-failed-pass.out" "$TMPDIR_EVAL/dogfood-failed-pass.err")"
1680
1693
  fi
1681
1694
 
1682
- if rg -q '"verdict": "fail"' "$FAILED_DOGFOOD_DIR/evidence.json" \
1683
- && rg -q '"status": "fail"' "$FAILED_DOGFOOD_DIR/critique.json" \
1684
- && rg -q '"status": "failed"' "$FAILED_DOGFOOD_DIR/state.json" \
1685
- && rg -q 'Required dogfood critique is not passing' "$FAILED_DOGFOOD_DIR/handoff.json"; then
1686
- _pass "dogfood-pass failed records preserve failed state and blockers"
1695
+ # Phase 4c: evidence.json/critique.json no longer written; verify in trust.bundle.
1696
+ if rg -q '"status": "failed"' "$FAILED_DOGFOOD_DIR/state.json" \
1697
+ && rg -q 'Required dogfood critique is not passing' "$FAILED_DOGFOOD_DIR/handoff.json" \
1698
+ && [[ -f "$FAILED_DOGFOOD_DIR/trust.bundle" ]] \
1699
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const cc=b.claims.filter(c=>c.claimType==="workflow.check.test"); if(!cc.length) throw new Error("no test check claim"); if(cc[0].value!=="fail") throw new Error("expected fail, got "+cc[0].value); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="fail") throw new Error("expected fail critique, got "+crit[0].value);' "$FAILED_DOGFOOD_DIR/trust.bundle" 2>/dev/null; then
1700
+ _pass "dogfood-pass failed records preserve failed state and blockers (verified in bundle)"
1687
1701
  else
1688
1702
  _fail "dogfood-pass failed record did not preserve routing state"
1689
1703
  fi
1690
1704
 
1705
+ # Phase 4c: critique.json no longer written; validator reports sidecar missing (still blocks gate).
1706
+ # The trust.bundle carries the disputed critique claim which is the authoritative gate signal.
1691
1707
  if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$FAILED_DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-failed-valid.out" 2>"$TMPDIR_EVAL/dogfood-failed-valid.err"; then
1692
- _fail "strict validator should still reject failed required critique"
1693
- elif rg -q 'required critique must pass' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
1694
- _pass "dogfood-pass failed records remain visibly blocked under strict validation"
1708
+ _fail "strict validator should still reject when critique is missing (4c bundle-only)"
1709
+ elif rg -q 'required critique must pass|required sidecar is missing' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
1710
+ _pass "dogfood-pass failed records remain visibly blocked under strict validation (sidecar missing or critique fail)"
1695
1711
  else
1696
- _fail "dogfood-pass failed record strict validation did not expose critique blocker"
1712
+ _fail "dogfood-pass failed record strict validation did not expose critique blocker: $(cat "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err")"
1697
1713
  fi
1698
1714
 
1699
1715
  if flow_agents_node "$WRITER" dogfood-pass \
@@ -1715,11 +1731,13 @@ else
1715
1731
  _fail "dogfood-pass failed: $(cat "$TMPDIR_EVAL/dogfood-pass.out" "$TMPDIR_EVAL/dogfood-pass.err")"
1716
1732
  fi
1717
1733
 
1734
+ # Phase 4c: critique.json no longer written; verify in trust.bundle.
1718
1735
  if rg -q '"state_status": "verified"' "$TMPDIR_EVAL/dogfood-pass.out" \
1719
- && rg -q '"status": "pass"' "$DOGFOOD_DIR/critique.json" \
1720
1736
  && rg -q '"status": "learned"' "$DOGFOOD_DIR/learning.json" \
1721
- && rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"; then
1722
- _pass "dogfood-pass writes clean evidence, critique, learning, and state"
1737
+ && rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json" \
1738
+ && [[ -f "$DOGFOOD_DIR/trust.bundle" ]] \
1739
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim in bundle"); if(crit[0].value!=="pass") throw new Error("expected pass critique, got "+crit[0].value);' "$DOGFOOD_DIR/trust.bundle" 2>/dev/null; then
1740
+ _pass "dogfood-pass writes clean bundle, learning, and state (4c bundle-only)"
1723
1741
  else
1724
1742
  _fail "dogfood-pass did not produce expected clean sidecars"
1725
1743
  fi
@@ -1830,6 +1848,7 @@ flow_agents_node "$WRITER" init-plan "$DOGFOOD_NV_DIR/dogfood-not-verified--deli
1830
1848
  --next-action "Record not verified dogfood pass." \
1831
1849
  --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-nv-init.out" 2>"$TMPDIR_EVAL/dogfood-nv-init.err"
1832
1850
 
1851
+ # Phase 4c: evidence.json no longer written; verify not-verified claim in trust.bundle.
1833
1852
  if flow_agents_node "$WRITER" dogfood-pass \
1834
1853
  --artifact-root "$SESSION_ROOT" \
1835
1854
  --artifact-dir "$DOGFOOD_NV_DIR" \
@@ -1838,10 +1857,10 @@ if flow_agents_node "$WRITER" dogfood-pass \
1838
1857
  --gap "External live runtime unavailable." \
1839
1858
  --summary "Dogfood pass preserved not verified evidence." \
1840
1859
  --timestamp "2026-05-09T00:06:00Z" >"$TMPDIR_EVAL/dogfood-nv.out" 2>"$TMPDIR_EVAL/dogfood-nv.err" \
1841
- && rg -q '"verdict": "not_verified"' "$DOGFOOD_NV_DIR/evidence.json" \
1842
1860
  && rg -q '"state_status": "not_verified"' "$TMPDIR_EVAL/dogfood-nv.out" \
1843
- && rg -q '"External live runtime unavailable."' "$DOGFOOD_NV_DIR/evidence.json"; then
1844
- _pass "dogfood-pass preserves NOT_VERIFIED evidence and routing"
1861
+ && [[ -f "$DOGFOOD_NV_DIR/trust.bundle" ]] \
1862
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const ec=b.claims.filter(c=>c.claimType==="workflow.check.external"); if(!ec.length) throw new Error("no external check claim"); if(ec[0].value!=="not_verified") throw new Error("expected not_verified, got "+ec[0].value);' "$DOGFOOD_NV_DIR/trust.bundle" 2>/dev/null; then
1863
+ _pass "dogfood-pass preserves NOT_VERIFIED evidence and routing (verified in bundle)"
1845
1864
  else
1846
1865
  _fail "dogfood-pass did not preserve not verified evidence: $(cat "$TMPDIR_EVAL/dogfood-nv.out" "$TMPDIR_EVAL/dogfood-nv.err")"
1847
1866
  fi
@@ -2009,8 +2028,10 @@ else
2009
2028
  _fail "sidecar writer import critique failed: $(cat "$TMPDIR_EVAL/import-critique.out" "$TMPDIR_EVAL/import-critique.err")"
2010
2029
  fi
2011
2030
 
2012
- if rg -q '"id": "minor-style-note"' "$REVIEW_DIR/critique.json" && rg -q '"status": "fixed"' "$REVIEW_DIR/critique.json"; then
2013
- _pass "sidecar writer extracts review findings"
2031
+ # Phase 4c: critique.json no longer written; verify critique claim in trust.bundle.
2032
+ if [[ -f "$REVIEW_DIR/trust.bundle" ]] \
2033
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="pass") throw new Error("expected pass, got "+crit[0].value);' "$REVIEW_DIR/trust.bundle" 2>/dev/null; then
2034
+ _pass "sidecar writer extracts review findings (verified in bundle)"
2014
2035
  else
2015
2036
  _fail "sidecar writer did not extract review findings"
2016
2037
  fi
@@ -2097,12 +2118,506 @@ MARKDOWN
2097
2118
 
2098
2119
  if flow_agents_node "$WRITER" import-critique "$IMPORT_BAD" "$IMPORT_BAD/imported-bad-critique--review.md" >"$TMPDIR_EVAL/import-bad-critique.out" 2>&1; then
2099
2120
  _fail "sidecar writer should reject imported failing critique"
2100
- elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out" && rg -q '"id": "imported-blocker"' "$IMPORT_BAD/critique.json"; then
2101
- _pass "sidecar writer persists and rejects imported failing critique"
2121
+ elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out" \
2122
+ && [[ -f "$IMPORT_BAD/trust.bundle" ]] \
2123
+ && node -e 'const fs=require("fs"); const b=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); const crit=b.claims.filter(c=>c.claimType==="workflow.critique.review"); if(!crit.length) throw new Error("no critique claim"); if(crit[0].value!=="fail") throw new Error("expected fail, got "+crit[0].value);' "$IMPORT_BAD/trust.bundle" 2>/dev/null; then
2124
+ _pass "sidecar writer persists and rejects imported failing critique (critique in bundle, not sidecar)"
2102
2125
  else
2103
2126
  _fail "imported failing critique did not persist actionable finding"
2104
2127
  fi
2105
2128
 
2129
+
2130
+ # ─── AC1: trust.bundle dual-write file existence and schema validity ──────────
2131
+ TB_SCHEMA_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-schema"
2132
+ mkdir -p "$TB_SCHEMA_DIR"
2133
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_SCHEMA_DIR/trust-bundle-schema--deliver.md"
2134
+ flow_agents_node "$WRITER" init-plan "$TB_SCHEMA_DIR/trust-bundle-schema--deliver.md" \
2135
+ --source-request "Trust bundle schema fixture." \
2136
+ --summary "Trust bundle schema fixture." \
2137
+ --next-action "Record evidence and verify trust.bundle." \
2138
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-schema-init.out" 2>"$TMPDIR_EVAL/tb-schema-init.err"
2139
+
2140
+ if flow_agents_node "$WRITER" record-evidence "$TB_SCHEMA_DIR" \
2141
+ --verdict pass \
2142
+ --check-json '{"id":"tb-schema-check","kind":"test","status":"pass","summary":"Trust bundle schema fixture check passed."}' \
2143
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-schema-evidence.out" 2>"$TMPDIR_EVAL/tb-schema-evidence.err" \
2144
+ && [[ -f "$TB_SCHEMA_DIR/trust.bundle" ]]; then
2145
+ _pass "trust.bundle dual-write creates trust.bundle after record-evidence"
2146
+ else
2147
+ _fail "trust.bundle dual-write did not create trust.bundle after record-evidence: $(cat "$TMPDIR_EVAL/tb-schema-evidence.out" "$TMPDIR_EVAL/tb-schema-evidence.err")"
2148
+ fi
2149
+
2150
+ TB_BUNDLE_PATH="$TB_SCHEMA_DIR/trust.bundle"
2151
+ if [[ -f "$TB_BUNDLE_PATH" ]]; then
2152
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-validate.err"
2153
+ import { readFileSync } from 'node:fs';
2154
+ import { validateTrustBundle } from '${ROOT}/build/src/cli/workflow-sidecar.js';
2155
+ const bundle = JSON.parse(readFileSync('${TB_BUNDLE_PATH}', 'utf8'));
2156
+ const result = await validateTrustBundle(bundle);
2157
+ if (!result.available) { process.stderr.write('surface unavailable: validateTrustBundle.available was false\n'); process.exit(2); }
2158
+ if (!result.valid) { process.stderr.write('schema invalid: ' + result.errors.join('; ') + '\n'); process.exit(1); }
2159
+ NODEOF
2160
+ then
2161
+ _pass "trust.bundle dual-write produces schema-valid bundle (available:true, valid:true)"
2162
+ else
2163
+ _fail "trust.bundle schema validation failed: $(cat "$TMPDIR_EVAL/tb-validate.err")"
2164
+ fi
2165
+ fi
2166
+
2167
+ # ─── AC2: claim status fidelity — pass→verified, fail→disputed ───────────────
2168
+ TB_FIDELITY_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-fidelity"
2169
+ mkdir -p "$TB_FIDELITY_DIR"
2170
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_FIDELITY_DIR/trust-bundle-fidelity--deliver.md"
2171
+ flow_agents_node "$WRITER" init-plan "$TB_FIDELITY_DIR/trust-bundle-fidelity--deliver.md" \
2172
+ --source-request "Trust bundle claim fidelity fixture." \
2173
+ --summary "Trust bundle claim fidelity fixture." \
2174
+ --next-action "Seed pass and fail checks to verify claim status mapping." \
2175
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-fidelity-init.out" 2>"$TMPDIR_EVAL/tb-fidelity-init.err"
2176
+
2177
+ if flow_agents_node "$WRITER" record-evidence "$TB_FIDELITY_DIR" \
2178
+ --verdict fail \
2179
+ --check-json '{"id":"tb-pass-check","kind":"test","status":"pass","summary":"This check passed."}' \
2180
+ --check-json '{"id":"tb-fail-check","kind":"test","status":"fail","summary":"This check failed."}' \
2181
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-fidelity-evidence.out" 2>"$TMPDIR_EVAL/tb-fidelity-evidence.err" \
2182
+ && [[ -f "$TB_FIDELITY_DIR/trust.bundle" ]]; then
2183
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-fidelity-check.err"
2184
+ import { readFileSync } from 'node:fs';
2185
+ const bundle = JSON.parse(readFileSync('${TB_FIDELITY_DIR}/trust.bundle', 'utf8'));
2186
+ const claims = bundle.claims;
2187
+ // Surface uses generateClaimId: search by subjectId (which encodes slug/checkId)
2188
+ const passClaim = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-pass-check'));
2189
+ const failClaim = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-fail-check'));
2190
+ if (!passClaim) { process.stderr.write('missing claim for subjectId ending with /tb-pass-check\n'); process.exit(1); }
2191
+ if (!failClaim) { process.stderr.write('missing claim for subjectId ending with /tb-fail-check\n'); process.exit(1); }
2192
+ if (passClaim.status !== 'verified') { process.stderr.write('pass check claim status was ' + passClaim.status + ', expected verified (Surface deriveClaimStatus)\n'); process.exit(1); }
2193
+ if (failClaim.status !== 'disputed') { process.stderr.write('fail check claim status was ' + failClaim.status + ', expected disputed (Surface deriveClaimStatus)\n'); process.exit(1); }
2194
+ // Assert at least one acceptance criterion claim exists (seeded by init-plan)
2195
+ const acClaims = claims.filter((c) => c.claimType === 'workflow.acceptance.criterion');
2196
+ if (acClaims.length === 0) { process.stderr.write('expected at least one workflow.acceptance.criterion claim but found none\n'); process.exit(1); }
2197
+ NODEOF
2198
+ then
2199
+ _pass "trust.bundle claim fidelity: pass check maps to verified, fail check maps to disputed, ac criterion claim present (Surface deriveClaimStatus)"
2200
+ else
2201
+ _fail "trust.bundle claim fidelity assertion failed: $(cat "$TMPDIR_EVAL/tb-fidelity-check.err")"
2202
+ fi
2203
+ else
2204
+ _fail "trust.bundle claim fidelity setup failed: $(cat "$TMPDIR_EVAL/tb-fidelity-evidence.out" "$TMPDIR_EVAL/tb-fidelity-evidence.err")"
2205
+ fi
2206
+
2207
+ # ─── AC2: claim status fidelity — critique fail→disputed, pass→verified ──────
2208
+ TB_CRITIQUE_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-critique"
2209
+ mkdir -p "$TB_CRITIQUE_DIR"
2210
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CRITIQUE_DIR/trust-bundle-critique--deliver.md"
2211
+ flow_agents_node "$WRITER" init-plan "$TB_CRITIQUE_DIR/trust-bundle-critique--deliver.md" \
2212
+ --source-request "Trust bundle critique claim fidelity fixture." \
2213
+ --summary "Trust bundle critique claim fidelity fixture." \
2214
+ --next-action "Record pass and fail critiques to verify claim status mapping." \
2215
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-critique-init.out" 2>"$TMPDIR_EVAL/tb-critique-init.err"
2216
+ flow_agents_node "$WRITER" record-evidence "$TB_CRITIQUE_DIR" \
2217
+ --verdict pass \
2218
+ --check-json '{"id":"tb-critique-setup","kind":"test","status":"pass","summary":"Critique fidelity setup passed."}' \
2219
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-critique-evidence.out" 2>"$TMPDIR_EVAL/tb-critique-evidence.err"
2220
+
2221
+ # Record a failing critique (verdict fail → claim status disputed)
2222
+ flow_agents_node "$WRITER" record-critique "$TB_CRITIQUE_DIR" \
2223
+ --id tb-fail-review \
2224
+ --reviewer tool-code-reviewer \
2225
+ --verdict fail \
2226
+ --summary "Critique failed — blocking finding." \
2227
+ --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/tb-critique-fail.out" 2>"$TMPDIR_EVAL/tb-critique-fail.err" || true
2228
+
2229
+ # Record a passing critique (verdict pass, no open findings → claim status verified)
2230
+ if flow_agents_node "$WRITER" record-critique "$TB_CRITIQUE_DIR" \
2231
+ --id tb-pass-review \
2232
+ --reviewer tool-code-reviewer \
2233
+ --verdict pass \
2234
+ --summary "Critique passed — no blocking findings." \
2235
+ --timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/tb-critique-pass.out" 2>"$TMPDIR_EVAL/tb-critique-pass.err" \
2236
+ && [[ -f "$TB_CRITIQUE_DIR/trust.bundle" ]]; then
2237
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-critique-assert.err"
2238
+ import { readFileSync } from 'node:fs';
2239
+ const bundle = JSON.parse(readFileSync('${TB_CRITIQUE_DIR}/trust.bundle', 'utf8'));
2240
+ const claims = bundle.claims;
2241
+ // Surface uses generateClaimId: search by subjectId (which encodes slug/reviewId)
2242
+ const failCritique = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-fail-review'));
2243
+ const passCritique = claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-pass-review'));
2244
+ if (!failCritique) { process.stderr.write('missing claim for subjectId ending with /tb-fail-review\n'); process.exit(1); }
2245
+ if (!passCritique) { process.stderr.write('missing claim for subjectId ending with /tb-pass-review\n'); process.exit(1); }
2246
+ if (failCritique.status !== 'disputed') { process.stderr.write('fail critique claim status was ' + failCritique.status + ', expected disputed (Surface deriveClaimStatus)\n'); process.exit(1); }
2247
+ if (passCritique.status !== 'verified') { process.stderr.write('pass critique claim status was ' + passCritique.status + ', expected verified (Surface deriveClaimStatus)\n'); process.exit(1); }
2248
+ NODEOF
2249
+ then
2250
+ _pass "trust.bundle claim fidelity: critique fail→disputed, critique pass→verified"
2251
+ else
2252
+ _fail "trust.bundle critique claim fidelity assertion failed: $(cat "$TMPDIR_EVAL/tb-critique-assert.err")"
2253
+ fi
2254
+ else
2255
+ _fail "trust.bundle critique claim fidelity setup failed: $(cat "$TMPDIR_EVAL/tb-critique-pass.out" "$TMPDIR_EVAL/tb-critique-pass.err")"
2256
+ fi
2257
+
2258
+ # ─── AC3: capture authoritative over claimed status + policies present (ADR 0010 maximal) ──
2259
+ TB_CAPTURE_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-capture"
2260
+ mkdir -p "$TB_CAPTURE_DIR"
2261
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CAPTURE_DIR/trust-bundle-capture--deliver.md"
2262
+ flow_agents_node "$WRITER" init-plan "$TB_CAPTURE_DIR/trust-bundle-capture--deliver.md" \
2263
+ --source-request "Capture-authoritative trust bundle fixture." \
2264
+ --summary "Capture-authoritative trust bundle fixture." \
2265
+ --next-action "Seed a claimed-pass check whose command actually failed in the capture log." \
2266
+ --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-capture-init.out" 2>"$TMPDIR_EVAL/tb-capture-init.err"
2267
+ # Deterministic capture log: the command FAILED (exit 1), recorded before record-evidence.
2268
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1}' > "$TB_CAPTURE_DIR/command-log.jsonl"
2269
+ if flow_agents_node "$WRITER" record-evidence "$TB_CAPTURE_DIR" \
2270
+ --verdict pass \
2271
+ --check-json '{"id":"tb-capture-check","kind":"test","status":"pass","summary":"Claimed pass.","command":"npm test"}' \
2272
+ --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-capture-evidence.out" 2>"$TMPDIR_EVAL/tb-capture-evidence.err" \
2273
+ && [[ -f "$TB_CAPTURE_DIR/trust.bundle" ]]; then
2274
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-capture-assert.err"
2275
+ import { readFileSync } from 'node:fs';
2276
+ const bundle = JSON.parse(readFileSync('${TB_CAPTURE_DIR}/trust.bundle', 'utf8'));
2277
+ const claim = bundle.claims.find((c) => c.subjectId && c.subjectId.endsWith('/tb-capture-check'));
2278
+ if (!claim) { process.stderr.write('missing claim for /tb-capture-check\n'); process.exit(1); }
2279
+ if (claim.status !== 'disputed') { process.stderr.write('claimed-pass check with captured FAIL had status ' + claim.status + ', expected disputed (capture authoritative)\n'); process.exit(1); }
2280
+ if (!Array.isArray(bundle.policies) || bundle.policies.length === 0) { process.stderr.write('bundle.policies empty — expected a verification policy per claimType\n'); process.exit(1); }
2281
+ const ev = bundle.evidence.find((e) => e.claimId === claim.id);
2282
+ if (!ev || !ev.execution || ev.execution.isError !== true) { process.stderr.write('capture evidence with execution.isError=true missing\n'); process.exit(1); }
2283
+ NODEOF
2284
+ then
2285
+ _pass "trust.bundle capture authoritative: claimed-pass + captured-fail → disputed; policies present; execution evidence folded in"
2286
+ else
2287
+ _fail "trust.bundle capture-authoritative assertion failed: $(cat "$TMPDIR_EVAL/tb-capture-assert.err")"
2288
+ fi
2289
+ else
2290
+ _fail "trust.bundle capture-authoritative setup failed: $(cat "$TMPDIR_EVAL/tb-capture-evidence.out" "$TMPDIR_EVAL/tb-capture-evidence.err")"
2291
+ fi
2292
+
2293
+ # ─── AC4: render-trust-panel projects the bundle to a standalone Surface Trust Panel (ADR 0010 Phase 3) ──
2294
+ if [[ -f "$TB_CAPTURE_DIR/trust.bundle" ]] && flow_agents_node "$WRITER" render-trust-panel "$TB_CAPTURE_DIR" --out "$TB_CAPTURE_DIR/trust-panel.html" >"$TMPDIR_EVAL/tb-panel.out" 2>"$TMPDIR_EVAL/tb-panel.err"; then
2295
+ PANEL="$TB_CAPTURE_DIR/trust-panel.html"
2296
+ REPORT="$TB_CAPTURE_DIR/trust-report.json"
2297
+ if [[ -f "$PANEL" ]] \
2298
+ && rg -q "<surface-trust-panel" "$PANEL" \
2299
+ && rg -q "customElements.define" "$PANEL" \
2300
+ && rg -q '"status":"disputed"' "$PANEL"; then
2301
+ _pass "render-trust-panel: standalone Trust Panel HTML with inlined Surface element + disputed claim from the derived report"
2302
+ else
2303
+ _fail "render-trust-panel output missing panel element / inlined JS / disputed claim"
2304
+ fi
2305
+ # report artifact: the derived TrustReport (universal input for Surface's Snapshot Viewer / bare element)
2306
+ if [[ -f "$REPORT" ]] && rg -q '"status": "disputed"' "$REPORT" && rg -q '"claims"' "$REPORT"; then
2307
+ _pass "render-trust-panel: also emits trust-report.json (derived report with the disputed claim)"
2308
+ else
2309
+ _fail "render-trust-panel did not emit a valid trust-report.json: $(head -c 200 "$REPORT" 2>/dev/null)"
2310
+ fi
2311
+ else
2312
+ _fail "render-trust-panel failed: $(cat "$TMPDIR_EVAL/tb-panel.out" "$TMPDIR_EVAL/tb-panel.err")"
2313
+ fi
2314
+
2315
+ # ─── AC5: trust-mcp wiring (flow-agents#137) — zero-write print + opt-in, reversible enable/disable ──
2316
+ TB_MCP_CFG="$TMPDIR_EVAL/mcp/.mcp.json"
2317
+ mkdir -p "$(dirname "$TB_MCP_CFG")"
2318
+ echo '{"mcpServers":{"other":{"command":"x","args":[]}}}' > "$TB_MCP_CFG"
2319
+ if flow_agents_node "$WRITER" trust-mcp >"$TMPDIR_EVAL/tb-mcp-print.out" 2>/dev/null \
2320
+ && rg -q "flow-agents-surface-trust" "$TMPDIR_EVAL/tb-mcp-print.out" \
2321
+ && flow_agents_node "$WRITER" trust-mcp --mode enable --config "$TB_MCP_CFG" >/dev/null 2>&1 \
2322
+ && flow_agents_node "$WRITER" trust-mcp --mode enable --config "$TB_MCP_CFG" >/dev/null 2>&1; then
2323
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-mcp.err"
2324
+ import { readFileSync } from 'node:fs';
2325
+ const s = (JSON.parse(readFileSync('${TB_MCP_CFG}','utf8')).mcpServers) || {};
2326
+ if (!s['flow-agents-surface-trust']) { process.stderr.write('enable did not add our server\n'); process.exit(1); }
2327
+ if (!s['other']) { process.stderr.write('enable clobbered an existing server\n'); process.exit(1); }
2328
+ if (Object.keys(s).length !== 2) { process.stderr.write('enable not idempotent (count ' + Object.keys(s).length + ')\n'); process.exit(1); }
2329
+ NODEOF
2330
+ then
2331
+ flow_agents_node "$WRITER" trust-mcp --mode disable --config "$TB_MCP_CFG" >/dev/null 2>&1
2332
+ if node --input-type=module <<NODEOF 2>>"$TMPDIR_EVAL/tb-mcp.err"
2333
+ import { readFileSync } from 'node:fs';
2334
+ const s = (JSON.parse(readFileSync('${TB_MCP_CFG}','utf8')).mcpServers) || {};
2335
+ if (s['flow-agents-surface-trust']) { process.stderr.write('disable left our server\n'); process.exit(1); }
2336
+ if (!s['other']) { process.stderr.write('disable removed an existing server\n'); process.exit(1); }
2337
+ NODEOF
2338
+ then
2339
+ _pass "trust-mcp: zero-write print; enable idempotent + preserves existing; disable removes only ours"
2340
+ else
2341
+ _fail "trust-mcp disable assertion failed: $(cat "$TMPDIR_EVAL/tb-mcp.err")"
2342
+ fi
2343
+ else
2344
+ _fail "trust-mcp enable assertion failed: $(cat "$TMPDIR_EVAL/tb-mcp.err")"
2345
+ fi
2346
+ else
2347
+ _fail "trust-mcp print/enable invocation failed"
2348
+ fi
2349
+
2350
+ # ─── AC6: agent liveness (ADR 0012) — held / free-on-lapse / free-on-release ──
2351
+ TB_LIVENESS_ROOT="$TMPDIR_EVAL/liveness/.flow-agents"
2352
+ flow_agents_node "$WRITER" liveness claim held-subj --actor agent-A --at "2026-06-25T11:50:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
2353
+ flow_agents_node "$WRITER" liveness heartbeat held-subj --actor agent-A --at "2026-06-25T11:58:00Z" --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
2354
+ flow_agents_node "$WRITER" liveness claim stale-subj --actor agent-B --at "2026-06-25T11:00:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
2355
+ flow_agents_node "$WRITER" liveness claim rel-subj --actor agent-C --at "2026-06-25T11:50:00Z" --ttl 1800 --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
2356
+ flow_agents_node "$WRITER" liveness release rel-subj --actor agent-C --at "2026-06-25T11:55:00Z" --artifact-root "$TB_LIVENESS_ROOT" >/dev/null 2>&1
2357
+ LIVENESS_OUT=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LIVENESS_ROOT" 2>/dev/null | grep -viE "unknown format")
2358
+ if echo "$LIVENESS_OUT" | grep -qE "held-subj.*agent-A.*held" \
2359
+ && echo "$LIVENESS_OUT" | grep -qE "stale-subj.*agent-B.*free" \
2360
+ && echo "$LIVENESS_OUT" | grep -qE "rel-subj.*agent-C.*free"; then
2361
+ _pass "liveness: liveness claims recompute held / free(lapsed) / free(released) via Surface deriveTrustStatus (ADR 0012)"
2362
+ else
2363
+ _fail "liveness status mismatch (expected held/free/free): $LIVENESS_OUT"
2364
+ fi
2365
+
2366
+ # ─── AC7: lifecycle-driven liveness (ADR 0012) — init-plan claims, advance-state releases (opt-in) ──
2367
+ TB_LC_ROOT="$TMPDIR_EVAL/liveness-lifecycle/.flow-agents"
2368
+ TB_LC_DIR="$TB_LC_ROOT/lc-task"; mkdir -p "$TB_LC_DIR"
2369
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_LC_DIR/lc-task--deliver.md"
2370
+ FLOW_AGENTS_LIVENESS=on FLOW_AGENTS_ACTOR=agent-LC flow_agents_node "$WRITER" init-plan "$TB_LC_DIR/lc-task--deliver.md" --task-slug lc-task --source-request x --summary y --next-action z --timestamp "2026-06-25T11:50:00Z" >/dev/null 2>&1
2371
+ LC_HELD=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LC_ROOT" 2>/dev/null | grep -viE "unknown format")
2372
+ FLOW_AGENTS_LIVENESS=on FLOW_AGENTS_ACTOR=agent-LC flow_agents_node "$WRITER" advance-state "$TB_LC_DIR" --status delivered --phase done --task-slug lc-task --timestamp "2026-06-25T11:55:00Z" >/dev/null 2>&1
2373
+ LC_FREE=$(flow_agents_node "$WRITER" liveness status --now "2026-06-25T12:00:00Z" --artifact-root "$TB_LC_ROOT" 2>/dev/null | grep -viE "unknown format")
2374
+ TB_OFF_ROOT="$TMPDIR_EVAL/liveness-off/.flow-agents"; mkdir -p "$TB_OFF_ROOT/off-task"
2375
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_OFF_ROOT/off-task/off-task--deliver.md"
2376
+ flow_agents_node "$WRITER" init-plan "$TB_OFF_ROOT/off-task/off-task--deliver.md" --task-slug off-task --source-request x --summary y --next-action z >/dev/null 2>&1
2377
+ if echo "$LC_HELD" | grep -qE "lc-task.*agent-LC.*held" && echo "$LC_FREE" | grep -qE "lc-task.*agent-LC.*free" && [ ! -f "$TB_OFF_ROOT/liveness/events.jsonl" ]; then
2378
+ _pass "liveness lifecycle: init-plan claims (held), advance→delivered releases (free); opt-in respected (no events when disabled)"
2379
+ else
2380
+ _fail "liveness lifecycle mismatch: held=[$LC_HELD] free=[$LC_FREE] off=$([ -f "$TB_OFF_ROOT/liveness/events.jsonl" ] && echo wrote || echo none)"
2381
+ fi
2382
+
2383
+ # ─── AC8: bundle-writers fail LOUDLY when Surface unavailable — no silent data loss (#156) ──
2384
+ TB_FO_DIR="$TMPDIR_EVAL/repo/.flow-agents/failopen"
2385
+ mkdir -p "$TB_FO_DIR"
2386
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_FO_DIR/failopen--deliver.md"
2387
+ flow_agents_node "$WRITER" init-plan "$TB_FO_DIR/failopen--deliver.md" --task-slug failopen --source-request x --summary y --next-action z --timestamp "2026-05-09T00:00:00Z" >/dev/null 2>&1
2388
+ flow_agents_node "$WRITER" record-evidence "$TB_FO_DIR" --verdict pass --check-json '{"id":"c1","kind":"test","status":"pass","summary":"s"}' --timestamp "2026-05-09T00:01:00Z" >/dev/null 2>&1
2389
+ # With Surface forced-unavailable, record-critique MUST fail (non-zero), not silently drop the critique.
2390
+ if FLOW_AGENTS_SURFACE_UNAVAILABLE=1 flow_agents_node "$WRITER" record-critique "$TB_FO_DIR" --id rev-fo --reviewer r --verdict pass --summary fo --timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/failopen.out" 2>&1; then
2391
+ _fail "record-critique fail-opened (exit 0) when Surface unavailable — SILENT DATA LOSS: $(cat "$TMPDIR_EVAL/failopen.out")"
2392
+ elif grep -qiE "was NOT written|not persisted" "$TMPDIR_EVAL/failopen.out"; then
2393
+ _pass "bundle-writers fail loudly (no silent data loss) when Surface unavailable (#156)"
2394
+ else
2395
+ _fail "record-critique failed but without a clear not-persisted message: $(cat "$TMPDIR_EVAL/failopen.out")"
2396
+ fi
2397
+
2398
+
2399
+ # ─── AC3: statusFunctionVersion conformance ───────────────────────────────────
2400
+ # Assert the statusFunctionVersion embedded in the emitted trust.bundle source
2401
+ # field matches @kontourai/surface's exported statusFunctionVersion constant.
2402
+ # Also run hachure conformance vectors through Surface's deriveClaimStatus to
2403
+ # confirm our producer path produces canonical statuses.
2404
+ TB_CONF_DIR="$TMPDIR_EVAL/repo/.flow-agents/trust-bundle-conformance"
2405
+ mkdir -p "$TB_CONF_DIR"
2406
+ cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$TB_CONF_DIR/trust-bundle-conformance--deliver.md"
2407
+ flow_agents_node "$WRITER" init-plan "$TB_CONF_DIR/trust-bundle-conformance--deliver.md" --source-request "Conformance fixture." --summary "Conformance fixture." --next-action "Record evidence and check statusFunctionVersion." --timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/tb-conf-init.out" 2>"$TMPDIR_EVAL/tb-conf-init.err"
2408
+ flow_agents_node "$WRITER" record-evidence "$TB_CONF_DIR" --verdict pass --check-json '{"id":"conf-check","kind":"test","status":"pass","summary":"Conformance check passed."}' --timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/tb-conf-evidence.out" 2>"$TMPDIR_EVAL/tb-conf-evidence.err"
2409
+
2410
+ if [[ -f "$TB_CONF_DIR/trust.bundle" ]]; then
2411
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-sfv-check.err"
2412
+ import { readFileSync } from 'node:fs';
2413
+ import { statusFunctionVersion } from '@kontourai/surface';
2414
+ const bundle = JSON.parse(readFileSync('${TB_CONF_DIR}/trust.bundle', 'utf8'));
2415
+ // statusFunctionVersion is encoded in the source field as "...;statusFunctionVersion=<version>"
2416
+ const sourceMatch = (bundle.source || '').match(/statusFunctionVersion=(.+)$/);
2417
+ if (!sourceMatch) { process.stderr.write('bundle source does not contain statusFunctionVersion: ' + bundle.source + '\n'); process.exit(1); }
2418
+ const bundleSfv = sourceMatch[1];
2419
+ const surfaceSfv = String(statusFunctionVersion);
2420
+ if (bundleSfv !== surfaceSfv) {
2421
+ process.stderr.write('bundle statusFunctionVersion ' + bundleSfv + ' does not match Surface statusFunctionVersion ' + surfaceSfv + '\n');
2422
+ process.exit(1);
2423
+ }
2424
+ NODEOF
2425
+ then
2426
+ _pass "trust.bundle source encodes statusFunctionVersion matching Surface\'s canonical export"
2427
+ else
2428
+ _fail "trust.bundle statusFunctionVersion mismatch: $(cat "$TMPDIR_EVAL/tb-sfv-check.err")"
2429
+ fi
2430
+ fi
2431
+
2432
+ # Conformance vectors: assert Surface's deriveClaimStatus produces canonical statuses
2433
+ # for hachure's reference sf-*.json vectors (sf-verified-commit → verified, sf-disputed-blocking → disputed).
2434
+ HACHURE_CONF="$ROOT/node_modules/hachure/conformance"
2435
+ if [[ -d "$HACHURE_CONF" ]]; then
2436
+ if node --input-type=module <<NODEOF 2>"$TMPDIR_EVAL/tb-conf-vectors.err"
2437
+ import { readFileSync, readdirSync } from 'node:fs';
2438
+ import { deriveClaimStatus, statusFunctionVersion } from '@kontourai/surface';
2439
+ const confDir = '${HACHURE_CONF}';
2440
+ const vectors = readdirSync(confDir).filter(f => f.startsWith('sf-') && f.endsWith('.json'));
2441
+ let passed = 0; let failed = 0;
2442
+ for (const vec of vectors) {
2443
+ const data = JSON.parse(readFileSync(confDir + '/' + vec, 'utf8'));
2444
+ const { input, expect, now: nowStr } = data;
2445
+ const now = nowStr ? new Date(nowStr) : new Date();
2446
+ for (const [claimId, expectedStatus] of Object.entries(expect.statusByClaimId ?? {})) {
2447
+ const claim = input.claims.find((c) => c.id === claimId);
2448
+ if (!claim) { process.stderr.write('vector ' + vec + ': claim ' + claimId + ' not found\n'); failed++; continue; }
2449
+ const evidence = (input.evidence || []).filter((e) => e.claimId === claimId);
2450
+ const events = (input.events || []).filter((e) => e.claimId === claimId);
2451
+ const policies = (input.policies || []);
2452
+ const authorityTrace = (input.authorityTrace || []);
2453
+ const result = deriveClaimStatus({ claim, evidence, events, policies, now, authorityTrace });
2454
+ if (result.status !== expectedStatus) {
2455
+ process.stderr.write('vector ' + vec + ' claim ' + claimId + ': got ' + result.status + ', expected ' + expectedStatus + '\n');
2456
+ failed++;
2457
+ } else {
2458
+ passed++;
2459
+ }
2460
+ }
2461
+ }
2462
+ process.stderr.write('conformance vectors: ' + passed + ' passed, ' + failed + ' failed (statusFunctionVersion=' + statusFunctionVersion + ')\n');
2463
+ if (failed > 0) process.exit(1);
2464
+ NODEOF
2465
+ then
2466
+ _pass "hachure conformance vectors pass Surface deriveClaimStatus"
2467
+ else
2468
+ _fail "hachure conformance vectors failed: $(cat "$TMPDIR_EVAL/tb-conf-vectors.err")"
2469
+ fi
2470
+ fi
2471
+
2472
+ # ─── Deterministic session slug from work-item ref (#161) ───────────────────
2473
+
2474
+ WORK_ITEM_ROOT="$TMPDIR_EVAL/work-item-repo/.flow-agents"
2475
+
2476
+ # (a) --work-item derives deterministic slug kontourai-flow-agents-161
2477
+ if flow_agents_node "$WRITER" ensure-session \
2478
+ --artifact-root "$WORK_ITEM_ROOT" \
2479
+ --work-item "kontourai/flow-agents#161" \
2480
+ --title "Work Item 161" \
2481
+ --summary "Deterministic slug from work-item ref." \
2482
+ --timestamp "2026-06-25T00:00:00Z" >"$TMPDIR_EVAL/wi-ensure.out" 2>"$TMPDIR_EVAL/wi-ensure.err"; then
2483
+ _pass "ensure-session --work-item derives slug kontourai-flow-agents-161"
2484
+ else
2485
+ _fail "ensure-session --work-item failed: $(cat "$TMPDIR_EVAL/wi-ensure.out" "$TMPDIR_EVAL/wi-ensure.err")"
2486
+ fi
2487
+
2488
+ if [[ -f "$WORK_ITEM_ROOT/kontourai-flow-agents-161/state.json" ]]; then
2489
+ _pass "ensure-session --work-item creates expected session directory"
2490
+ else
2491
+ _fail "ensure-session --work-item did not create $WORK_ITEM_ROOT/kontourai-flow-agents-161/"
2492
+ fi
2493
+
2494
+ # (b) idempotency: second call same ref → same directory, no failure
2495
+ if flow_agents_node "$WRITER" ensure-session \
2496
+ --artifact-root "$WORK_ITEM_ROOT" \
2497
+ --work-item "kontourai/flow-agents#161" \
2498
+ --title "Work Item 161 Second" \
2499
+ --summary "Idempotent call." \
2500
+ --timestamp "2026-06-25T00:00:01Z" >"$TMPDIR_EVAL/wi-ensure2.out" 2>"$TMPDIR_EVAL/wi-ensure2.err" \
2501
+ && [[ -f "$WORK_ITEM_ROOT/kontourai-flow-agents-161/state.json" ]]; then
2502
+ _pass "ensure-session --work-item is idempotent (same slug/dir on second call)"
2503
+ else
2504
+ _fail "ensure-session --work-item idempotency failed: $(cat "$TMPDIR_EVAL/wi-ensure2.out" "$TMPDIR_EVAL/wi-ensure2.err")"
2505
+ fi
2506
+
2507
+ # (c) --task-slug wins over --work-item (back-compat: explicit overrides derived)
2508
+ TASK_SLUG_ROOT="$TMPDIR_EVAL/task-slug-repo/.flow-agents"
2509
+ if flow_agents_node "$WRITER" ensure-session \
2510
+ --artifact-root "$TASK_SLUG_ROOT" \
2511
+ --task-slug "manual-slug" \
2512
+ --work-item "kontourai/flow-agents#161" \
2513
+ --title "Manual Slug" \
2514
+ --summary "Explicit task-slug must win over work-item." \
2515
+ --timestamp "2026-06-25T00:00:02Z" >"$TMPDIR_EVAL/wi-taskslug.out" 2>"$TMPDIR_EVAL/wi-taskslug.err" \
2516
+ && [[ -d "$TASK_SLUG_ROOT/manual-slug" ]] \
2517
+ && [[ ! -d "$TASK_SLUG_ROOT/kontourai-flow-agents-161" ]]; then
2518
+ _pass "ensure-session --task-slug wins over --work-item (back-compat)"
2519
+ else
2520
+ _fail "ensure-session --task-slug did not win over --work-item: $(cat "$TMPDIR_EVAL/wi-taskslug.out" "$TMPDIR_EVAL/wi-taskslug.err")"
2521
+ fi
2522
+
2523
+ # (c2) --task-slug only (no --work-item) still works
2524
+ TASK_SLUG_ONLY_ROOT="$TMPDIR_EVAL/task-slug-only-repo/.flow-agents"
2525
+ if flow_agents_node "$WRITER" ensure-session \
2526
+ --artifact-root "$TASK_SLUG_ONLY_ROOT" \
2527
+ --task-slug "explicit-only" \
2528
+ --title "Explicit Only" \
2529
+ --summary "task-slug only, no work-item." \
2530
+ --timestamp "2026-06-25T00:00:03Z" >"$TMPDIR_EVAL/wi-onlyslug.out" 2>"$TMPDIR_EVAL/wi-onlyslug.err" \
2531
+ && [[ -d "$TASK_SLUG_ONLY_ROOT/explicit-only" ]]; then
2532
+ _pass "ensure-session --task-slug alone still works (back-compat regression guard)"
2533
+ else
2534
+ _fail "ensure-session --task-slug alone failed: $(cat "$TMPDIR_EVAL/wi-onlyslug.out" "$TMPDIR_EVAL/wi-onlyslug.err")"
2535
+ fi
2536
+
2537
+ # (d) liveness subjectId matches work-item slug
2538
+ # ensure-session establishes the slug; liveness events (emitted by init-plan/advance-state) key
2539
+ # on that same slug as subjectId. We verify this by emitting two liveness claim events directly
2540
+ # via `liveness claim` using the slug derived from the ref, then asserting both share subjectId.
2541
+ LIVENESS_WORK_ROOT="$TMPDIR_EVAL/liveness-wi-repo/.flow-agents"
2542
+ # First: ensure-session --work-item produces the expected slug (directory name proof)
2543
+ if flow_agents_node "$WRITER" ensure-session \
2544
+ --artifact-root "$LIVENESS_WORK_ROOT" \
2545
+ --work-item "kontourai/flow-agents#162" \
2546
+ --title "Liveness Work Item" \
2547
+ --summary "Liveness subjectId test." \
2548
+ --timestamp "2026-06-25T00:00:04Z" >"$TMPDIR_EVAL/wi-liveness1.out" 2>"$TMPDIR_EVAL/wi-liveness1.err" \
2549
+ && [[ -d "$LIVENESS_WORK_ROOT/kontourai-flow-agents-162" ]]; then
2550
+ _pass "ensure-session --work-item creates session dir with deterministic slug"
2551
+ else
2552
+ _fail "ensure-session --work-item session dir check failed: $(cat "$TMPDIR_EVAL/wi-liveness1.out" "$TMPDIR_EVAL/wi-liveness1.err")"
2553
+ fi
2554
+
2555
+ # Emit two liveness claim events using the same subjectId (as init-plan does when FLOW_AGENTS_LIVENESS=on).
2556
+ # This proves: same work-item ref → same slug → same subjectId across two agents.
2557
+ FLOW_AGENTS_ACTOR=agent-a flow_agents_node "$WRITER" liveness claim \
2558
+ --artifact-root "$LIVENESS_WORK_ROOT" \
2559
+ kontourai-flow-agents-162 >"$TMPDIR_EVAL/wi-liveness-claim-a.out" 2>"$TMPDIR_EVAL/wi-liveness-claim-a.err"
2560
+ FLOW_AGENTS_ACTOR=agent-b flow_agents_node "$WRITER" liveness claim \
2561
+ --artifact-root "$LIVENESS_WORK_ROOT" \
2562
+ kontourai-flow-agents-162 >"$TMPDIR_EVAL/wi-liveness-claim-b.out" 2>"$TMPDIR_EVAL/wi-liveness-claim-b.err"
2563
+
2564
+ LIVENESS_EVENTS="$LIVENESS_WORK_ROOT/liveness/events.jsonl"
2565
+ if [[ -f "$LIVENESS_EVENTS" ]] \
2566
+ && grep -q '"subjectId":"kontourai-flow-agents-162"' "$LIVENESS_EVENTS"; then
2567
+ _pass "liveness events contain subjectId kontourai-flow-agents-162"
2568
+ else
2569
+ _fail "liveness events missing expected subjectId: $(cat "$LIVENESS_EVENTS" 2>/dev/null || echo 'file not found')"
2570
+ fi
2571
+
2572
+ # Both events must share the same subjectId value (two agents, same ref → same subjectId)
2573
+ subject_count=$(grep -c '"subjectId":"kontourai-flow-agents-162"' "$LIVENESS_EVENTS" 2>/dev/null || echo 0)
2574
+ if [[ "$subject_count" -ge 2 ]]; then
2575
+ _pass "both liveness events share subjectId kontourai-flow-agents-162 (same ref → same subjectId)"
2576
+ else
2577
+ _fail "expected >=2 liveness events with subjectId kontourai-flow-agents-162, found $subject_count"
2578
+ fi
2579
+
2580
+ # (e) malformed ref is rejected
2581
+ if flow_agents_node "$WRITER" ensure-session \
2582
+ --artifact-root "$WORK_ITEM_ROOT" \
2583
+ --work-item "kontourai/flow-agents/bad" \
2584
+ --title "Bad Ref" \
2585
+ --summary "Should fail." \
2586
+ --timestamp "2026-06-25T00:00:06Z" >"$TMPDIR_EVAL/wi-bad-slash.out" 2>&1; then
2587
+ _fail "ensure-session should reject work-item ref without # separator"
2588
+ elif grep -q 'owner/repo#id format' "$TMPDIR_EVAL/wi-bad-slash.out"; then
2589
+ _pass "ensure-session rejects work-item ref without # separator"
2590
+ else
2591
+ _fail "malformed ref rejection message was unexpected: $(cat "$TMPDIR_EVAL/wi-bad-slash.out")"
2592
+ fi
2593
+
2594
+ if flow_agents_node "$WRITER" ensure-session \
2595
+ --artifact-root "$WORK_ITEM_ROOT" \
2596
+ --work-item "kontourai/flow-agents#abc" \
2597
+ --title "Bad ID" \
2598
+ --summary "Should fail on non-numeric id." \
2599
+ --timestamp "2026-06-25T00:00:07Z" >"$TMPDIR_EVAL/wi-bad-id.out" 2>&1; then
2600
+ _fail "ensure-session should reject work-item with non-numeric id"
2601
+ elif grep -q 'numeric issue number' "$TMPDIR_EVAL/wi-bad-id.out"; then
2602
+ _pass "ensure-session rejects work-item with non-numeric id"
2603
+ else
2604
+ _fail "non-numeric id rejection message was unexpected: $(cat "$TMPDIR_EVAL/wi-bad-id.out")"
2605
+ fi
2606
+
2607
+ # Neither --task-slug nor --work-item → back-compat error message must contain "task-slug is required"
2608
+ if flow_agents_node "$WRITER" ensure-session \
2609
+ --artifact-root "$WORK_ITEM_ROOT" \
2610
+ --title "No Slug" \
2611
+ --summary "Should fail." \
2612
+ --timestamp "2026-06-25T00:00:08Z" >"$TMPDIR_EVAL/wi-no-slug.out" 2>&1; then
2613
+ _fail "ensure-session should require --task-slug or --work-item"
2614
+ elif grep -q 'task-slug is required' "$TMPDIR_EVAL/wi-no-slug.out"; then
2615
+ _pass "ensure-session dies with 'task-slug is required' when neither flag is supplied (back-compat)"
2616
+ else
2617
+ _fail "missing slug error message lacked 'task-slug is required': $(cat "$TMPDIR_EVAL/wi-no-slug.out")"
2618
+ fi
2619
+
2620
+
2106
2621
  if [[ "$errors" -eq 0 ]]; then
2107
2622
  echo "Workflow sidecar writer integration passed."
2108
2623
  exit 0