@kontourai/flow-agents 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +103 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/console-learning-projection.d.ts +1 -0
  14. package/build/src/cli/effective-backlog-settings.d.ts +1 -0
  15. package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
  16. package/build/src/cli/init.d.ts +17 -0
  17. package/build/src/cli/init.js +242 -20
  18. package/build/src/cli/kit.d.ts +1 -0
  19. package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
  20. package/build/src/cli/publish-change-helper.d.ts +1 -0
  21. package/build/src/cli/pull-work-provider.d.ts +1 -0
  22. package/build/src/cli/runtime-adapter.d.ts +1 -0
  23. package/build/src/cli/telemetry-doctor.d.ts +1 -0
  24. package/build/src/cli/usage-feedback.d.ts +1 -0
  25. package/build/src/cli/utterance-check.d.ts +1 -0
  26. package/build/src/cli/validate-hook-influence.d.ts +1 -0
  27. package/build/src/cli/validate-source-tree.d.ts +1 -0
  28. package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
  29. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  30. package/build/src/cli/verify.d.ts +1 -0
  31. package/build/src/cli/verify.js +90 -0
  32. package/build/src/cli/veritas-governance.d.ts +1 -0
  33. package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
  34. package/build/src/cli/workflow-sidecar.d.ts +324 -0
  35. package/build/src/cli/workflow-sidecar.js +1973 -90
  36. package/build/src/cli.d.ts +2 -0
  37. package/build/src/cli.js +2 -3
  38. package/build/src/flow-kit/validate.d.ts +81 -0
  39. package/build/src/index.d.ts +5 -0
  40. package/build/src/index.js +36 -0
  41. package/build/src/lib/args.d.ts +8 -0
  42. package/build/src/lib/flow-resolver.d.ts +82 -0
  43. package/build/src/lib/flow-resolver.js +237 -0
  44. package/build/src/lib/fs.d.ts +7 -0
  45. package/build/src/lib/workflow-learning-projection.d.ts +132 -0
  46. package/build/src/runtime-adapters.d.ts +18 -0
  47. package/build/src/tools/build-universal-bundles.d.ts +2 -0
  48. package/build/src/tools/build-universal-bundles.js +34 -22
  49. package/build/src/tools/common.d.ts +9 -0
  50. package/build/src/tools/generate-context-map.d.ts +2 -0
  51. package/build/src/tools/generate-context-map.js +3 -16
  52. package/build/src/tools/validate-package.d.ts +2 -0
  53. package/build/src/tools/validate-source-tree.d.ts +2 -0
  54. package/build/src/tools/validate-source-tree.js +42 -162
  55. package/context/contracts/artifact-contract.md +10 -0
  56. package/context/contracts/delivery-contract.md +1 -0
  57. package/context/contracts/review-contract.md +1 -0
  58. package/context/contracts/verification-contract.md +2 -0
  59. package/context/gate-awareness.md +39 -0
  60. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  61. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  62. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  63. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  64. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  65. package/docs/adr/0007-skill-audit.md +1 -1
  66. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  67. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  68. package/docs/adr/0011-mcp-posture.md +100 -0
  69. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  70. package/docs/adr/0013-context-lifecycle.md +151 -0
  71. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  72. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  73. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  74. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  75. package/docs/agent-system-guidebook.md +5 -12
  76. package/docs/context-map.md +4 -10
  77. package/docs/developer-architecture.md +14 -0
  78. package/docs/index.md +3 -2
  79. package/docs/integrations/framework-adapter.md +19 -6
  80. package/docs/integrations/index.md +2 -2
  81. package/docs/north-star.md +4 -4
  82. package/docs/operating-layers.md +3 -3
  83. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  84. package/docs/repository-structure.md +2 -2
  85. package/docs/skills-map.md +1 -0
  86. package/docs/spec/runtime-hook-surface.md +78 -10
  87. package/docs/standards-register.md +3 -3
  88. package/docs/survey-utterance-check.md +1 -1
  89. package/docs/trust-anchor-adoption.md +197 -0
  90. package/docs/verifiable-trust.md +95 -0
  91. package/docs/veritas-integration.md +2 -2
  92. package/docs/workflow-usage-guide.md +69 -0
  93. package/evals/acceptance/DEMO-false-completion.md +144 -0
  94. package/evals/acceptance/demo-cast.sh +92 -0
  95. package/evals/acceptance/demo-false-completion.sh +72 -0
  96. package/evals/acceptance/demo-real-evidence.sh +104 -0
  97. package/evals/acceptance/demo.tape +29 -0
  98. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  99. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  100. package/evals/acceptance/prove-teeth.sh +105 -0
  101. package/evals/ci/antigaming-suite.sh +54 -0
  102. package/evals/ci/run-baseline.sh +2 -0
  103. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  104. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  105. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  106. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  107. package/evals/integration/test_builder_step_producers.sh +379 -0
  108. package/evals/integration/test_bundle_install.sh +35 -71
  109. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  110. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  111. package/evals/integration/test_checkpoint_signing.sh +489 -0
  112. package/evals/integration/test_claim_lookup.sh +352 -0
  113. package/evals/integration/test_command_log_integrity.sh +275 -0
  114. package/evals/integration/test_context_map.sh +0 -2
  115. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  116. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  117. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  118. package/evals/integration/test_flow_kit_repository.sh +2 -0
  119. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  120. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  121. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  122. package/evals/integration/test_gate_lockdown.sh +1137 -0
  123. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  124. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  125. package/evals/integration/test_goal_fit_hook.sh +69 -4
  126. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  127. package/evals/integration/test_hook_category_behaviors.sh +14 -0
  128. package/evals/integration/test_install_merge.sh +1176 -0
  129. package/evals/integration/test_mint_attestation.sh +373 -0
  130. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  131. package/evals/integration/test_publish_delivery.sh +269 -0
  132. package/evals/integration/test_reconcile_soundness.sh +528 -0
  133. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  134. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  135. package/evals/integration/test_trust_checkpoint.sh +325 -0
  136. package/evals/integration/test_trust_reconcile.sh +293 -0
  137. package/evals/integration/test_verify_cli.sh +208 -0
  138. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  139. package/evals/lib/node.sh +0 -6
  140. package/evals/run.sh +47 -0
  141. package/evals/static/test_library_exports.sh +85 -0
  142. package/evals/static/test_universal_bundles.sh +15 -0
  143. package/evals/static/test_workflow_skills.sh +6 -13
  144. package/install.sh +0 -7
  145. package/integrations/strands-ts/README.md +25 -15
  146. package/integrations/veritas/flow-agents.adapter.json +1 -2
  147. package/kits/builder/flows/build.flow.json +59 -12
  148. package/kits/builder/kit.json +85 -15
  149. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  150. package/kits/builder/skills/deliver/SKILL.md +36 -6
  151. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  152. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  153. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  154. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  155. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  156. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  157. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  158. package/kits/knowledge/adapters/default-store/index.js +38 -0
  159. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  160. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  161. package/kits/knowledge/docs/store-contract.md +314 -0
  162. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  163. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  164. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  165. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  166. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  167. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  168. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  169. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  170. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  171. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  172. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  173. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  174. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  175. package/kits/knowledge/kit.json +51 -1
  176. package/package.json +13 -4
  177. package/packaging/conformance/README.md +10 -2
  178. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  179. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  180. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  181. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  182. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  183. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  184. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  185. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  186. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  187. package/packaging/conformance/run-conformance.js +1 -1
  188. package/scripts/README.md +2 -1
  189. package/scripts/build-universal-bundles.js +0 -1
  190. package/scripts/ci/mint-attestation.js +221 -0
  191. package/scripts/ci/trust-reconcile.js +545 -0
  192. package/scripts/hooks/config-protection.js +423 -1
  193. package/scripts/hooks/evidence-capture.js +348 -0
  194. package/scripts/hooks/lib/liveness-read.js +113 -0
  195. package/scripts/hooks/run-hook.js +6 -1
  196. package/scripts/hooks/stop-goal-fit.js +1471 -79
  197. package/scripts/hooks/workflow-steering.js +135 -5
  198. package/scripts/install-codex-home.sh +39 -0
  199. package/scripts/install-merge.js +330 -0
  200. package/src/cli/init.ts +218 -20
  201. package/src/cli/validate-workflow-artifacts.ts +18 -2
  202. package/src/cli/verify.ts +100 -0
  203. package/src/cli/workflow-sidecar.ts +2093 -84
  204. package/src/cli.ts +2 -3
  205. package/src/index.ts +53 -0
  206. package/src/lib/flow-resolver.ts +284 -0
  207. package/src/tools/build-universal-bundles.ts +34 -21
  208. package/src/tools/generate-context-map.ts +3 -17
  209. package/src/tools/validate-source-tree.ts +44 -104
  210. package/tsconfig.json +1 -0
  211. package/build/src/tools/filter-installed-packs.js +0 -135
  212. package/packaging/packs.json +0 -49
  213. package/scripts/filter-installed-packs.js +0 -2
  214. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,399 @@
1
+ #!/usr/bin/env bash
2
+ # test_gate_review_inquiry_records.sh — AC1 + AC2 integration tests for gate-review #119.
3
+ #
4
+ # Verifies that the gate-review subcommand emits canonical InquiryRecords
5
+ # (gate-review.inquiries.json) validated against hachure inquiry-record.schema.json.
6
+ #
7
+ # AC1: a session with a gate event yields ≥1 InquiryRecord.
8
+ # AC2: false_block scenario (claim verified + block) and missed_block scenario
9
+ # (expected claim absent) each yield a distinct InquiryRecord with the
10
+ # correct calibration + non-empty advisoryFix.
11
+ #
12
+ # Seed is deterministic: same inputs → same outputs. Surface is loaded from the
13
+ # installed optional dependency (@kontourai/surface).
14
+ set -uo pipefail
15
+
16
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
17
+ source "$ROOT/evals/lib/node.sh"
18
+
19
+ TMPDIR_EVAL="$(mktemp -d)"
20
+ errors=0
21
+
22
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
23
+ trap cleanup EXIT
24
+
25
+ _pass() { echo " ✓ $1"; }
26
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
27
+
28
+ echo "=== Gate Review InquiryRecord Tests (AC1 + AC2) ==="
29
+
30
+ # ── helpers ──────────────────────────────────────────────────────────────────
31
+
32
+ # JSON query helper using node (no jq dependency)
33
+ jq_node() {
34
+ local file="$1"; local expr="$2"
35
+ node -e "
36
+ const d=JSON.parse(require('fs').readFileSync('${file}','utf8'));
37
+ const r=(${expr})(d);
38
+ if(r===undefined||r===null){process.exit(2);}
39
+ if(typeof r==='boolean'||typeof r==='number'||typeof r==='string'){
40
+ process.stdout.write(String(r)+'\n');
41
+ }else{
42
+ process.stdout.write(JSON.stringify(r)+'\n');
43
+ }"
44
+ }
45
+
46
+ # Write a minimal trust.bundle for testing. Args:
47
+ # $1: dir session artifact dir (bundle written as trust.bundle)
48
+ # $2: slug session slug
49
+ # $3: status claim status (verified|disputed|assumed|stale|unknown)
50
+ seed_trust_bundle() {
51
+ local dir="$1" slug="$2" status="$3"
52
+ local ts="2026-06-24T00:00:00Z"
53
+ local claimId="${slug}/unit-tests.flow-agents.workflow.unit tests pass"
54
+
55
+ # Build events array: add a "verified" or "disputed" event when status requires it
56
+ local events="[]"
57
+ if [[ "$status" == "verified" ]]; then
58
+ events='[{"id":"evt:'"$claimId"'","claimId":"'"$claimId"'","status":"verified","actor":"gate-review-test","method":"validation","evidenceIds":[],"createdAt":"'"$ts"'","verifiedAt":"'"$ts"'"}]'
59
+ elif [[ "$status" == "disputed" ]]; then
60
+ events='[{"id":"evt:'"$claimId"'","claimId":"'"$claimId"'","status":"disputed","actor":"gate-review-test","method":"validation","evidenceIds":[],"createdAt":"'"$ts"'","verifiedAt":"'"$ts"'"}]'
61
+ fi
62
+
63
+ cat > "$dir/trust.bundle" <<JSON
64
+ {
65
+ "schemaVersion": 3,
66
+ "source": "gate-review-test;statusFunctionVersion=1",
67
+ "claims": [
68
+ {
69
+ "id": "$claimId",
70
+ "subjectType": "workflow-check",
71
+ "subjectId": "$slug/unit-tests",
72
+ "surface": "flow-agents.workflow",
73
+ "claimType": "workflow.check.test",
74
+ "fieldOrBehavior": "unit tests pass",
75
+ "value": "pass",
76
+ "status": "$status",
77
+ "createdAt": "$ts",
78
+ "updatedAt": "$ts"
79
+ }
80
+ ],
81
+ "evidence": [],
82
+ "events": $events,
83
+ "policies": []
84
+ }
85
+ JSON
86
+ }
87
+
88
+ # Set the gate block streak file ($1: root, $2: count)
89
+ seed_block_streak() {
90
+ local root="$1" count="$2"
91
+ if [[ "$count" -gt 0 ]]; then
92
+ printf '{"count":%d,"hash":"testHash001"}' "$count" > "$root/.goal-fit-block-streak.json"
93
+ else
94
+ rm -f "$root/.goal-fit-block-streak.json"
95
+ fi
96
+ }
97
+
98
+ # Remove the block streak file
99
+ clear_block_streak() {
100
+ rm -f "$1/.goal-fit-block-streak.json"
101
+ }
102
+
103
+ # ── AC1: session with a gate event → ≥1 InquiryRecord ───────────────────────
104
+ echo ""
105
+ echo "--- AC1: gate event → ≥1 InquiryRecord ---"
106
+
107
+ AC1_ROOT="$TMPDIR_EVAL/ac1/.flow-agents"
108
+ AC1_SLUG="ac1-session"
109
+ AC1_DIR="$AC1_ROOT/$AC1_SLUG"
110
+ mkdir -p "$AC1_DIR"
111
+
112
+ # Seed: verified claim + blocked (false_block scenario for AC1)
113
+ seed_trust_bundle "$AC1_DIR" "$AC1_SLUG" "verified"
114
+ seed_block_streak "$AC1_ROOT" 1
115
+
116
+ if flow_agents_node workflow-sidecar gate-review "$AC1_DIR" \
117
+ >"$TMPDIR_EVAL/ac1.out" 2>"$TMPDIR_EVAL/ac1.err"; then
118
+ _pass "AC1: gate-review exits 0"
119
+ else
120
+ _fail "AC1: gate-review failed: $(cat "$TMPDIR_EVAL/ac1.err")"
121
+ fi
122
+
123
+ AC1_INQUIRIES="$AC1_DIR/gate-review.inquiries.json"
124
+ if [[ -f "$AC1_INQUIRIES" ]]; then
125
+ _pass "AC1: gate-review.inquiries.json emitted"
126
+ else
127
+ _fail "AC1: gate-review.inquiries.json missing"
128
+ fi
129
+
130
+ if [[ -f "$AC1_INQUIRIES" ]]; then
131
+ AC1_COUNT="$(jq_node "$AC1_INQUIRIES" 'd => d.length' 2>/dev/null || echo 0)"
132
+ if [[ "$AC1_COUNT" -ge 1 ]]; then
133
+ _pass "AC1: ≥1 InquiryRecord present (count=$AC1_COUNT)"
134
+ else
135
+ _fail "AC1: expected ≥1 InquiryRecord, got $AC1_COUNT"
136
+ fi
137
+
138
+ # Verify each record has required schema fields
139
+ MISSING_FIELDS="$(node -e "
140
+ const records=JSON.parse(require('fs').readFileSync('$AC1_INQUIRIES','utf8'));
141
+ const required=['id','inquiry','outcome','resolutionPath','inputSnapshot','statusFunctionVersion','resolvedAt'];
142
+ const missing=[];
143
+ for(const [i,r] of records.entries()){
144
+ for(const f of required){
145
+ if(!(f in r)) missing.push('record['+i+'].'+f);
146
+ }
147
+ }
148
+ process.stdout.write(missing.join(','));
149
+ " 2>/dev/null)"
150
+ if [[ -z "$MISSING_FIELDS" ]]; then
151
+ _pass "AC1: all InquiryRecords have required schema fields"
152
+ else
153
+ _fail "AC1: InquiryRecords missing fields: $MISSING_FIELDS"
154
+ fi
155
+
156
+ # Verify each record has non-empty advisoryFix in answer.value
157
+ EMPTY_FIX="$(node -e "
158
+ const records=JSON.parse(require('fs').readFileSync('$AC1_INQUIRIES','utf8'));
159
+ const bad=records.filter(r=>!r.answer||!r.answer.value||!r.answer.value.advisoryFix);
160
+ process.stdout.write(bad.map(r=>r.id).join(','));
161
+ " 2>/dev/null)"
162
+ if [[ -z "$EMPTY_FIX" ]]; then
163
+ _pass "AC1: all InquiryRecords have non-empty advisoryFix"
164
+ else
165
+ _fail "AC1: InquiryRecords with empty/missing advisoryFix: $EMPTY_FIX"
166
+ fi
167
+ fi
168
+
169
+ # ── AC2: false_block scenario ─────────────────────────────────────────────────
170
+ echo ""
171
+ echo "--- AC2a: false_block — verified claim + blocked ---"
172
+
173
+ AC2FB_ROOT="$TMPDIR_EVAL/ac2fb/.flow-agents"
174
+ AC2FB_SLUG="ac2-false-block"
175
+ AC2FB_DIR="$AC2FB_ROOT/$AC2FB_SLUG"
176
+ mkdir -p "$AC2FB_DIR"
177
+
178
+ # Seed: verified claim + blocked → false_block
179
+ seed_trust_bundle "$AC2FB_DIR" "$AC2FB_SLUG" "verified"
180
+ seed_block_streak "$AC2FB_ROOT" 2
181
+
182
+ if flow_agents_node workflow-sidecar gate-review "$AC2FB_DIR" \
183
+ >"$TMPDIR_EVAL/ac2fb.out" 2>"$TMPDIR_EVAL/ac2fb.err"; then
184
+ _pass "AC2a: gate-review exits 0"
185
+ else
186
+ _fail "AC2a: gate-review failed: $(cat "$TMPDIR_EVAL/ac2fb.err")"
187
+ fi
188
+
189
+ AC2FB_INQUIRIES="$AC2FB_DIR/gate-review.inquiries.json"
190
+ if [[ -f "$AC2FB_INQUIRIES" ]]; then
191
+ # outcome must be "matched" (claim exists in bundle)
192
+ OUTCOME="$(jq_node "$AC2FB_INQUIRIES" 'd => d[0].outcome' 2>/dev/null || echo "")"
193
+ if [[ "$OUTCOME" == "matched" ]]; then
194
+ _pass "AC2a: false_block InquiryRecord has outcome=matched"
195
+ else
196
+ _fail "AC2a: expected outcome=matched, got '$OUTCOME'"
197
+ fi
198
+
199
+ # calibration must be false_block
200
+ CALIBRATION="$(jq_node "$AC2FB_INQUIRIES" 'd => d[0].answer.value.calibration' 2>/dev/null || echo "")"
201
+ if [[ "$CALIBRATION" == "false_block" ]]; then
202
+ _pass "AC2a: false_block calibration correct"
203
+ else
204
+ _fail "AC2a: expected calibration=false_block, got '$CALIBRATION'"
205
+ fi
206
+
207
+ # advisoryFix must be non-empty
208
+ ADVISORY="$(jq_node "$AC2FB_INQUIRIES" 'd => d[0].answer.value.advisoryFix' 2>/dev/null || echo "")"
209
+ if [[ -n "$ADVISORY" ]] && [[ "$ADVISORY" != "null" ]]; then
210
+ _pass "AC2a: false_block has non-empty advisoryFix"
211
+ else
212
+ _fail "AC2a: false_block advisoryFix is empty"
213
+ fi
214
+
215
+ # schema validation via hachure (validates against inquiry-record.schema.json)
216
+ SCHEMA_RESULT="$(node -e "
217
+ try {
218
+ const { validateInquiryRecord } = require('$ROOT/build/src/cli/workflow-sidecar.js');
219
+ const records = JSON.parse(require('fs').readFileSync('$AC2FB_INQUIRIES','utf8'));
220
+ let allValid = true;
221
+ const errors = [];
222
+ for (const r of records) {
223
+ const result = validateInquiryRecord(r);
224
+ if (result.available && !result.valid) {
225
+ allValid = false;
226
+ errors.push(...result.errors);
227
+ }
228
+ }
229
+ const available = records.length > 0 ? validateInquiryRecord(records[0]).available : false;
230
+ process.stdout.write(JSON.stringify({ available, allValid, errors }));
231
+ } catch(e) { process.stdout.write(JSON.stringify({ available: false, allValid: true, errors: [String(e)] })); }
232
+ " 2>/dev/null)"
233
+ SCHEMA_AVAILABLE="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT}').available ? 'true' : 'false')" 2>/dev/null || echo "false")"
234
+ SCHEMA_ALL_VALID="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT}').allValid ? 'true' : 'false')" 2>/dev/null || echo "true")"
235
+ if [[ "$SCHEMA_AVAILABLE" == "true" ]]; then
236
+ if [[ "$SCHEMA_ALL_VALID" == "true" ]]; then
237
+ _pass "AC2a: false_block InquiryRecords validate against hachure inquiry-record.schema.json (available=true, valid=true)"
238
+ else
239
+ SCHEMA_ERRORS="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT}').errors.slice(0,3).join('; '))" 2>/dev/null || echo "?")"
240
+ _fail "AC2a: InquiryRecord schema validation failed: $SCHEMA_ERRORS"
241
+ fi
242
+ else
243
+ _pass "AC2a: hachure not available — schema validation skipped (fail-open)"
244
+ fi
245
+ fi
246
+
247
+ # ── AC2: missed_block scenario ────────────────────────────────────────────────
248
+ echo ""
249
+ echo "--- AC2b: missed_block — absent criterion ---"
250
+
251
+ AC2MB_ROOT="$TMPDIR_EVAL/ac2mb/.flow-agents"
252
+ AC2MB_SLUG="ac2-missed-block"
253
+ AC2MB_DIR="$AC2MB_ROOT/$AC2MB_SLUG"
254
+ mkdir -p "$AC2MB_DIR"
255
+
256
+ # Seed: empty bundle (no claims) + no block + expected criterion absent → missed_block
257
+ cat > "$AC2MB_DIR/trust.bundle" <<JSON
258
+ {
259
+ "schemaVersion": 3,
260
+ "source": "gate-review-test;statusFunctionVersion=1",
261
+ "claims": [],
262
+ "evidence": [],
263
+ "events": [],
264
+ "policies": []
265
+ }
266
+ JSON
267
+
268
+ # Seed acceptance.json with an expected criterion
269
+ cat > "$AC2MB_DIR/acceptance.json" <<JSON
270
+ {
271
+ "schema_version": "1.0",
272
+ "task_slug": "$AC2MB_SLUG",
273
+ "criteria": [
274
+ { "id": "ac-1", "description": "Unit tests pass", "status": "pending" }
275
+ ]
276
+ }
277
+ JSON
278
+
279
+ # No block streak — gate did NOT fire
280
+ clear_block_streak "$AC2MB_ROOT"
281
+
282
+ if flow_agents_node workflow-sidecar gate-review "$AC2MB_DIR" \
283
+ >"$TMPDIR_EVAL/ac2mb.out" 2>"$TMPDIR_EVAL/ac2mb.err"; then
284
+ _pass "AC2b: gate-review exits 0"
285
+ else
286
+ _fail "AC2b: gate-review failed: $(cat "$TMPDIR_EVAL/ac2mb.err")"
287
+ fi
288
+
289
+ AC2MB_INQUIRIES="$AC2MB_DIR/gate-review.inquiries.json"
290
+ if [[ -f "$AC2MB_INQUIRIES" ]]; then
291
+ # The absent criterion should yield outcome="unsupported"
292
+ OUTCOME_MB="$(jq_node "$AC2MB_INQUIRIES" 'd => d[0].outcome' 2>/dev/null || echo "")"
293
+ if [[ "$OUTCOME_MB" == "unsupported" ]]; then
294
+ _pass "AC2b: missed_block absent criterion yields outcome=unsupported"
295
+ else
296
+ _fail "AC2b: expected outcome=unsupported for absent criterion, got '$OUTCOME_MB'"
297
+ fi
298
+
299
+ # calibration must be missed_block
300
+ CALIBRATION_MB="$(jq_node "$AC2MB_INQUIRIES" 'd => d[0].answer.value.calibration' 2>/dev/null || echo "")"
301
+ if [[ "$CALIBRATION_MB" == "missed_block" ]]; then
302
+ _pass "AC2b: missed_block calibration correct"
303
+ else
304
+ _fail "AC2b: expected calibration=missed_block for absent criterion, got '$CALIBRATION_MB'"
305
+ fi
306
+
307
+ # advisoryFix must be non-empty
308
+ ADVISORY_MB="$(jq_node "$AC2MB_INQUIRIES" 'd => d[0].answer.value.advisoryFix' 2>/dev/null || echo "")"
309
+ if [[ -n "$ADVISORY_MB" ]] && [[ "$ADVISORY_MB" != "null" ]]; then
310
+ _pass "AC2b: missed_block has non-empty advisoryFix"
311
+ else
312
+ _fail "AC2b: missed_block advisoryFix is empty"
313
+ fi
314
+
315
+ # schema validation
316
+ SCHEMA_RESULT_MB="$(node -e "
317
+ try {
318
+ const { validateInquiryRecord } = require('$ROOT/build/src/cli/workflow-sidecar.js');
319
+ const records = JSON.parse(require('fs').readFileSync('$AC2MB_INQUIRIES','utf8'));
320
+ let allValid = true;
321
+ const errors = [];
322
+ for (const r of records) {
323
+ const result = validateInquiryRecord(r);
324
+ if (result.available && !result.valid) {
325
+ allValid = false;
326
+ errors.push(...result.errors);
327
+ }
328
+ }
329
+ const available = records.length > 0 ? validateInquiryRecord(records[0]).available : false;
330
+ process.stdout.write(JSON.stringify({ available, allValid, errors }));
331
+ } catch(e) { process.stdout.write(JSON.stringify({ available: false, allValid: true, errors: [String(e)] })); }
332
+ " 2>/dev/null)"
333
+ SCHEMA_AVAILABLE_MB="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT_MB}').available ? 'true' : 'false')" 2>/dev/null || echo "false")"
334
+ SCHEMA_ALL_VALID_MB="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT_MB}').allValid ? 'true' : 'false')" 2>/dev/null || echo "true")"
335
+ if [[ "$SCHEMA_AVAILABLE_MB" == "true" ]]; then
336
+ if [[ "$SCHEMA_ALL_VALID_MB" == "true" ]]; then
337
+ _pass "AC2b: missed_block InquiryRecords validate against hachure inquiry-record.schema.json (available=true, valid=true)"
338
+ else
339
+ SCHEMA_ERRORS_MB="$(node -e "process.stdout.write(JSON.parse('${SCHEMA_RESULT_MB}').errors.slice(0,3).join('; '))" 2>/dev/null || echo "?")"
340
+ _fail "AC2b: InquiryRecord schema validation failed: $SCHEMA_ERRORS_MB"
341
+ fi
342
+ else
343
+ _pass "AC2b: hachure not available — schema validation skipped (fail-open)"
344
+ fi
345
+
346
+ # Verify the absent criterion is the inquiry target
347
+ TARGET_FIELD="$(jq_node "$AC2MB_INQUIRIES" 'd => d[0].inquiry.target && d[0].inquiry.target.fieldOrBehavior' 2>/dev/null || echo "")"
348
+ if [[ -n "$TARGET_FIELD" ]] && [[ "$TARGET_FIELD" != "null" ]]; then
349
+ _pass "AC2b: absent criterion inquiry has canonical target"
350
+ else
351
+ _fail "AC2b: absent criterion inquiry missing canonical target"
352
+ fi
353
+ fi
354
+
355
+ # ── AC2: correct scenario (gate blocked + disputed claim) ─────────────────────
356
+ echo ""
357
+ echo "--- AC2c: correct — disputed claim + blocked ---"
358
+
359
+ AC2COR_ROOT="$TMPDIR_EVAL/ac2cor/.flow-agents"
360
+ AC2COR_SLUG="ac2-correct"
361
+ AC2COR_DIR="$AC2COR_ROOT/$AC2COR_SLUG"
362
+ mkdir -p "$AC2COR_DIR"
363
+
364
+ # Seed: disputed claim + blocked → correct
365
+ seed_trust_bundle "$AC2COR_DIR" "$AC2COR_SLUG" "disputed"
366
+ seed_block_streak "$AC2COR_ROOT" 1
367
+
368
+ if flow_agents_node workflow-sidecar gate-review "$AC2COR_DIR" \
369
+ >"$TMPDIR_EVAL/ac2cor.out" 2>"$TMPDIR_EVAL/ac2cor.err"; then
370
+ _pass "AC2c: gate-review exits 0"
371
+ else
372
+ _fail "AC2c: gate-review failed: $(cat "$TMPDIR_EVAL/ac2cor.err")"
373
+ fi
374
+
375
+ AC2COR_INQUIRIES="$AC2COR_DIR/gate-review.inquiries.json"
376
+ if [[ -f "$AC2COR_INQUIRIES" ]]; then
377
+ CALIBRATION_COR="$(jq_node "$AC2COR_INQUIRIES" 'd => d[0].answer.value.calibration' 2>/dev/null || echo "")"
378
+ if [[ "$CALIBRATION_COR" == "correct" ]]; then
379
+ _pass "AC2c: correct calibration (disputed+blocked)"
380
+ else
381
+ _fail "AC2c: expected calibration=correct for disputed+blocked, got '$CALIBRATION_COR'"
382
+ fi
383
+ fi
384
+
385
+ # ── AC3: no hooks changed ─────────────────────────────────────────────────────
386
+ echo ""
387
+ echo "--- AC3: hooks unchanged ---"
388
+ if git -C "$ROOT" diff origin/main -- scripts/hooks/ 2>/dev/null | grep -q '^[+-]'; then
389
+ _fail "AC3: scripts/hooks/ was modified (gate-review must not touch hooks)"
390
+ else
391
+ _pass "AC3: scripts/hooks/ unchanged"
392
+ fi
393
+
394
+ # ── Summary ───────────────────────────────────────────────────────────────────
395
+ echo ""
396
+ echo "────────────────────────────────────────────────────────────"
397
+ echo "gate-review InquiryRecord tests: $errors error(s)"
398
+ [ "$errors" -eq 0 ] && echo "PASS" || echo "FAIL"
399
+ exit "$errors"
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env bash
2
+ # test_goal_fit_escape_hatch.sh — block-mode escape hatch contract.
3
+ # Block mode must refuse the same goal-fit gap up to N times, then RELEASE
4
+ # (exit 0) so a genuinely-unsatisfiable goal cannot trap the agent forever.
5
+ set -uo pipefail
6
+
7
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
8
+
9
+ TMPDIR_EVAL="$(mktemp -d)"
10
+ errors=0
11
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
12
+ trap cleanup EXIT
13
+
14
+ _pass() { echo " ✓ $1"; }
15
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
16
+
17
+ REPO="$TMPDIR_EVAL/repo"
18
+ mkdir -p "$REPO/.flow-agents/stuck"
19
+ printf '# Test Repo\n' > "$REPO/AGENTS.md"
20
+ printf '# Stuck\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nTBD.\n' \
21
+ > "$REPO/.flow-agents/stuck/stuck--deliver.md"
22
+
23
+ PAYLOAD="{\"hook_event_name\":\"Stop\",\"cwd\":\"$REPO\"}"
24
+
25
+ run_block() {
26
+ printf '%s' "$PAYLOAD" \
27
+ | FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=3 \
28
+ node "$ROOT/scripts/hooks/stop-goal-fit.js" >/dev/null 2>"$1"
29
+ echo $?
30
+ }
31
+
32
+ c1=$(run_block "$TMPDIR_EVAL/b1.err")
33
+ c2=$(run_block "$TMPDIR_EVAL/b2.err")
34
+ c3=$(run_block "$TMPDIR_EVAL/b3.err")
35
+ c4=$(run_block "$TMPDIR_EVAL/b4.err")
36
+
37
+ [[ "$c1" -eq 2 ]] && rg -q 'BLOCK 1/3' "$TMPDIR_EVAL/b1.err" \
38
+ && _pass "first identical block exits 2 (BLOCK 1/3)" \
39
+ || _fail "first block should exit 2 BLOCK 1/3 (got $c1: $(cat "$TMPDIR_EVAL/b1.err"))"
40
+
41
+ [[ "$c2" -eq 2 ]] && rg -q 'BLOCK 2/3' "$TMPDIR_EVAL/b2.err" \
42
+ && _pass "second identical block exits 2 (BLOCK 2/3)" \
43
+ || _fail "second block should exit 2 BLOCK 2/3 (got $c2)"
44
+
45
+ [[ "$c3" -eq 0 ]] && rg -q 'RELEASED after 3 consecutive identical blocks' "$TMPDIR_EVAL/b3.err" \
46
+ && _pass "third identical block RELEASES (exit 0, loud notice)" \
47
+ || _fail "third block should release exit 0 (got $c3: $(cat "$TMPDIR_EVAL/b3.err"))"
48
+
49
+ [[ "$c4" -eq 2 ]] && rg -q 'BLOCK 1/3' "$TMPDIR_EVAL/b4.err" \
50
+ && _pass "streak resets after release (next block is 1/3 again)" \
51
+ || _fail "post-release block should reset to BLOCK 1/3 (got $c4)"
52
+
53
+ # A changing goal-fit gap must reset the streak (progress, not a stuck loop).
54
+ printf '%s' "$PAYLOAD" | FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=3 node "$ROOT/scripts/hooks/stop-goal-fit.js" >/dev/null 2>/dev/null
55
+ # mutate the artifact so the warning set differs
56
+ printf '# Stuck\n\nbranch: main\nstatus: verifying\ntype: deliver\n\n## Plan\n\nDifferent.\n' \
57
+ > "$REPO/.flow-agents/stuck/stuck--deliver.md"
58
+ cd=$(run_block "$TMPDIR_EVAL/bd.err")
59
+ [[ "$cd" -eq 2 ]] && rg -q 'BLOCK 1/3' "$TMPDIR_EVAL/bd.err" \
60
+ && _pass "changed goal-fit gap resets the streak to 1/3" \
61
+ || _fail "changed gap should reset streak (got $cd: $(cat "$TMPDIR_EVAL/bd.err"))"
62
+
63
+ # warn mode never blocks regardless of streak
64
+ wc=$(printf '%s' "$PAYLOAD" | FLOW_AGENTS_GOAL_FIT_MODE=warn node "$ROOT/scripts/hooks/stop-goal-fit.js" >/dev/null 2>/dev/null; echo $?)
65
+ [[ "$wc" -eq 0 ]] && _pass "warn mode exits 0 (escape hatch irrelevant)" \
66
+ || _fail "warn mode should exit 0 (got $wc)"
67
+
68
+ if [[ "$errors" -eq 0 ]]; then
69
+ echo "Goal Fit escape hatch integration passed."
70
+ exit 0
71
+ fi
72
+ echo "Goal Fit escape hatch integration failed: $errors issue(s)."
73
+ exit 1
@@ -5,6 +5,12 @@ set -uo pipefail
5
5
  ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
6
  source "$ROOT/evals/lib/node.sh"
7
7
 
8
+ # These checks exercise the block mechanism repeatedly against the same workspace
9
+ # as independent assertions, not a single continuous loop. Disable the block
10
+ # escape hatch here so the streak counter never trips; test_goal_fit_escape_hatch.sh
11
+ # covers the release-after-N behavior on its own.
12
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
13
+
8
14
  TMPDIR_EVAL="$(mktemp -d)"
9
15
  errors=0
10
16
 
@@ -38,8 +44,10 @@ else
38
44
  _fail "goal-fit hook should not block by default"
39
45
  fi
40
46
 
41
- if rg -q 'status:executing' "$TMPDIR_EVAL/stderr.txt" && rg -q 'Definition Of Done' "$TMPDIR_EVAL/stderr.txt" && rg -q 'Goal Fit Gate' "$TMPDIR_EVAL/stderr.txt"; then
42
- _pass "goal-fit hook reports active incomplete delivery"
47
+ # Wave 3 (ADR 0010 2c): Builder heading checks removed; only the ACTIVE_STATUSES signal fires now.
48
+ # The Definition Of Done and Goal Fit Gate heading checks were removed from analyze().
49
+ if rg -q 'status:executing' "$TMPDIR_EVAL/stderr.txt"; then
50
+ _pass "goal-fit hook reports active incomplete delivery (status signal via ACTIVE_STATUSES)"
43
51
  else
44
52
  _fail "goal-fit hook did not report active incomplete delivery"
45
53
  fi
@@ -96,6 +104,41 @@ Build: PASS
96
104
  - [ ] Long-lived docs updated with why/how the feature was built
97
105
  MARKDOWN
98
106
 
107
+ # Adjustment A (2c): Seed a state.json (terminal: done) and an acceptance.json with
108
+ # pending criteria so the sidecar-driven Final Acceptance hygiene check fires.
109
+ # The markdown-based uncheckedInSection(Final Acceptance) check was removed; the
110
+ # acceptance.json pending-criteria check in missingBundleOrStateSignal is its replacement.
111
+ cat > "$REPO/.flow-agents/feedback-loop/state.json" <<'JSON'
112
+ {
113
+ "schema_version": "1.0",
114
+ "task_slug": "feedback-loop",
115
+ "status": "delivered",
116
+ "phase": "done",
117
+ "updated_at": "2026-05-04T00:00:00Z",
118
+ "next_action": { "status": "done", "summary": "Local delivery complete." }
119
+ }
120
+ JSON
121
+
122
+ cat > "$REPO/.flow-agents/feedback-loop/acceptance.json" <<'JSON'
123
+ {
124
+ "schema_version": "1.0",
125
+ "task_slug": "feedback-loop",
126
+ "criteria": [
127
+ {
128
+ "id": "ci-passed",
129
+ "description": "CI/relevant checks passed",
130
+ "status": "pending"
131
+ },
132
+ {
133
+ "id": "docs-updated",
134
+ "description": "Long-lived docs updated with why/how the feature was built",
135
+ "status": "pending"
136
+ }
137
+ ],
138
+ "goal_fit": { "status": "pass", "summary": "User-facing workflow was exercised or documented." }
139
+ }
140
+ JSON
141
+
99
142
  if FLOW_AGENTS_GOAL_FIT_STRICT=true node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$TMPDIR_EVAL/final.out" 2>"$TMPDIR_EVAL/final.err" <<JSON
100
143
  {"hook_event_name":"Stop","cwd":"$REPO"}
101
144
  JSON
@@ -218,6 +261,11 @@ cat > "$REPO/.flow-agents/feedback-loop/handoff.json" <<'JSON'
218
261
  }
219
262
  JSON
220
263
 
264
+ # Phase 4c: trust.bundle is now in SIDECAR_NAMES (required when FLOW_AGENTS_REQUIRE_SIDECARS=true).
265
+ cat > "$REPO/.flow-agents/feedback-loop/trust.bundle" <<'JSON'
266
+ {"schemaVersion":3,"source":"flow-agents/workflow-sidecar","claims":[{"id":"c1","subjectId":"feedback-loop/local-delivery","claimType":"workflow.check.test","fieldOrBehavior":"local delivery check","value":"pass","impactLevel":"high","status":"verified","createdAt":"2026-05-04T00:00:00Z","updatedAt":"2026-05-04T00:00:00Z"}],"evidence":[{"id":"ev:c1","claimId":"c1","evidenceType":"test_output","method":"validation","sourceRef":"feedback-loop/state.json","excerptOrSummary":"local delivery check","observedAt":"2026-05-04T00:00:00Z","collectedBy":"flow-agents/workflow-sidecar","passing":true}],"policies":[],"events":[]}
267
+ JSON
268
+
221
269
  if FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$TMPDIR_EVAL/sidecar-valid.out" 2>"$TMPDIR_EVAL/sidecar-valid.err" <<JSON
222
270
  {"hook_event_name":"Stop","cwd":"$REPO"}
223
271
  JSON
@@ -331,6 +379,12 @@ cat > "$REPO/.flow-agents/feedback-loop/critique.json" <<'JSON'
331
379
  }
332
380
  JSON
333
381
 
382
+ # Phase 4c: update trust.bundle to reflect the not_verified evidence + fail critique state.
383
+ # The bundle is the sole verification artifact; sidecarGuidance reads from it first.
384
+ cat > "$REPO/.flow-agents/feedback-loop/trust.bundle" <<'JSON'
385
+ {"schemaVersion":3,"source":"flow-agents/workflow-sidecar","claims":[{"id":"c-ext","subjectId":"feedback-loop/external-service","claimType":"workflow.check.external","fieldOrBehavior":"External service was unavailable.\nPretend it passed.","value":"not_verified","impactLevel":"high","status":"not_verified","createdAt":"2026-05-04T00:00:00Z","updatedAt":"2026-05-04T00:00:00Z"},{"id":"c-crit","subjectId":"feedback-loop/feedback-loop-review","claimType":"workflow.critique.review","fieldOrBehavior":"Blocking critique finding remains.","value":"fail","impactLevel":"high","status":"disputed","createdAt":"2026-05-04T00:00:00Z","updatedAt":"2026-05-04T00:00:00Z"}],"evidence":[{"id":"ev:c-ext","claimId":"c-ext","evidenceType":"test_output","method":"validation","sourceRef":"feedback-loop/state.json","excerptOrSummary":"External service was unavailable. Pretend it passed.","observedAt":"2026-05-04T00:00:00Z","collectedBy":"flow-agents/workflow-sidecar","passing":false}],"policies":[],"events":[]}
386
+ JSON
387
+
334
388
  if FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$TMPDIR_EVAL/sidecar-guidance.out" 2>"$TMPDIR_EVAL/sidecar-guidance.err" <<JSON
335
389
  {"hook_event_name":"Stop","cwd":"$REPO"}
336
390
  JSON
@@ -421,6 +475,11 @@ cat > "$REPO/.flow-agents/feedback-loop/evidence.json" <<'JSON'
421
475
  }
422
476
  JSON
423
477
 
478
+ # Phase 4c: update trust.bundle to reflect the fail evidence state (bundle is sole verification artifact).
479
+ cat > "$REPO/.flow-agents/feedback-loop/trust.bundle" <<'JSON'
480
+ {"schemaVersion":3,"source":"flow-agents/workflow-sidecar","claims":[{"id":"c-fail","subjectId":"feedback-loop/local-delivery","claimType":"workflow.check.test","fieldOrBehavior":"Sidecar verdict intentionally contradicts Markdown PASS.","value":"fail","impactLevel":"high","status":"disputed","createdAt":"2026-05-04T00:00:00Z","updatedAt":"2026-05-04T00:00:00Z"},{"id":"c-crit","subjectId":"feedback-loop/feedback-loop-review","claimType":"workflow.critique.review","fieldOrBehavior":"No blocking critique findings.","value":"pass","impactLevel":"high","status":"verified","createdAt":"2026-05-04T00:00:00Z","updatedAt":"2026-05-04T00:00:00Z"}],"evidence":[],"policies":[],"events":[]}
481
+ JSON
482
+
424
483
  if FLOW_AGENTS_GOAL_FIT_STRICT=true FLOW_AGENTS_REQUIRE_SIDECARS=true FLOW_AGENTS_REQUIRE_CRITIQUE=true node "$ROOT/scripts/hooks/stop-goal-fit.js" >"$TMPDIR_EVAL/sidecar-contradiction.out" 2>"$TMPDIR_EVAL/sidecar-contradiction.err" <<JSON
425
484
  {"hook_event_name":"Stop","cwd":"$REPO"}
426
485
  JSON
@@ -428,8 +487,8 @@ then
428
487
  _fail "strict goal-fit hook should block Markdown/sidecar contradictions"
429
488
  else
430
489
  status=$?
431
- if [[ "$status" -eq 2 ]] && rg -q 'Markdown PASS contradicts evidence.json verdict fail' "$TMPDIR_EVAL/sidecar-contradiction.err"; then
432
- _pass "strict goal-fit hook blocks Markdown/sidecar contradictions"
490
+ if [[ "$status" -eq 2 ]] && rg -q 'evidence verdict:fail' "$TMPDIR_EVAL/sidecar-contradiction.err"; then
491
+ _pass "strict goal-fit hook blocks sidecar evidence verdict fail (markdownVerdict check removed; sidecar path covers it)"
433
492
  else
434
493
  _fail "strict contradiction hook returned unexpected result: status=$status output=$(cat "$TMPDIR_EVAL/sidecar-contradiction.err")"
435
494
  fi
@@ -562,6 +621,12 @@ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/handoff.json" <<'JSON'
562
621
  }
563
622
  JSON
564
623
 
624
+ # Phase 4c: trust.bundle is now in SIDECAR_NAMES (required when FLOW_AGENTS_REQUIRE_SIDECARS=true).
625
+ # Add a minimal valid trust.bundle so the npm-install-task fixture passes 4c sidecar validation.
626
+ cat > "$NPM_INSTALL_REPO/.flow-agents/npm-install-task/trust.bundle" <<'JSON'
627
+ {"schemaVersion":3,"source":"flow-agents/workflow-sidecar","claims":[{"id":"c1","subjectId":"npm-install-task/build","claimType":"workflow.check.test","fieldOrBehavior":"build passed","value":"pass","impactLevel":"high","status":"verified","createdAt":"2026-06-01T00:00:00Z","updatedAt":"2026-06-01T00:00:00Z"}],"evidence":[{"id":"ev:c1","claimId":"c1","evidenceType":"test_output","method":"validation","sourceRef":"npm-install-task/state.json","excerptOrSummary":"build passed","observedAt":"2026-06-01T00:00:00Z","collectedBy":"flow-agents/workflow-sidecar","passing":true}],"policies":[],"events":[]}
628
+ JSON
629
+
565
630
  # Part 1 of fix: invoke the already-built validator directly (no tsc).
566
631
  # Poison tsc so that any call to it fails; confirm the hook does not call it
567
632
  # and validates clean sidecars successfully.