@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,820 @@
1
+ #!/usr/bin/env bash
2
+ # test_captured_fail_reconciliation.sh — Security regression proof for the
3
+ # namespace-agnostic captured-FAIL reconciliation (AC1) and related fixes.
4
+ #
5
+ # Closes the critical allowlist false-completion bypass:
6
+ # A kit-typed claim (e.g. builder.verify.tests) asserting a command passed, while
7
+ # command-log.jsonl recorded that command as FAIL, used to SHIP (exit 0) whenever
8
+ # the gate didn't resolve the exact flow declaring that exact claimType — including
9
+ # the default no-active-flow case (current.json has no active_flow_id) and any
10
+ # agent-chosen non-declared type.
11
+ #
12
+ # Tests:
13
+ # 1. BYPASS CLOSED: kit-typed claim (builder.verify.tests) + command-log FAIL,
14
+ # NO active flow → PRE-FIX exit 0, POST-FIX exit 2 (namespace-agnostic false-completion).
15
+ # 2. BYPASS CLOSED: agent-chosen non-declared type + active flow + command-log FAIL
16
+ # → POST-FIX exit 2.
17
+ # 3. NO OVER-BLOCK (a): clean session, no captured fails → not blocked.
18
+ # 4. NO OVER-BLOCK (b): fail-then-re-run-to-pass (latest capture PASS) → not blocked.
19
+ # 5. NO OVER-BLOCK (c): acknowledged failure (evidence marks command disputed/failed) → not blocked.
20
+ # 6. NO OVER-BLOCK (d): no-command doc/policy session (NO evidence.execution.label,
21
+ # no command-log) → NOT blocked (fixes #216 over-block).
22
+ # 7. AC3 empty-expects regression: declared-only bundle + fake flow with expects:[]
23
+ # → gate misconfiguration HARD_BLOCK (two-part dependency: union form + empty-expects guard).
24
+ #
25
+ # Deterministic, no model spend, self-cleaning.
26
+ # Usage: bash evals/integration/test_captured_fail_reconciliation.sh
27
+ set -uo pipefail
28
+
29
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
30
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
31
+
32
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
33
+
34
+ TMP="$(mktemp -d)"
35
+ errors=0
36
+ _pass() { echo " PASS: $1"; }
37
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
38
+
39
+ cleanup() { rm -rf "$TMP"; }
40
+ trap cleanup EXIT
41
+
42
+ # ─── Helper: seed a delivered (terminal) workflow artifact ────────────────────
43
+ seed_delivered() { # $1=dir $2=slug
44
+ local p="$1" slug="$2"
45
+ mkdir -p "$p/.flow-agents/$slug"
46
+ printf '# Repo\n' > "$p/AGENTS.md"
47
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
48
+ > "$p/.flow-agents/$slug/state.json"
49
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
50
+ # $slug
51
+
52
+ branch: main
53
+ status: delivered
54
+ type: deliver
55
+
56
+ ## Definition Of Done
57
+ - [x] tests pass
58
+
59
+ ## Goal Fit Gate
60
+ - [x] acceptance verified
61
+
62
+ ### Verdict: PASS
63
+ MD
64
+ }
65
+
66
+ # ─── Helper: write a bundle with kit-typed claim (builder.verify.tests) asserting pass ──
67
+ # Evidence item has execution.label="npm test" (the critical scenario).
68
+ write_kit_pass_bundle() { # $1=bundle_path $2=slug $3=claim_value(opt)
69
+ local claim_val="${3:-pass}"
70
+ python3 - "$1" "$2" "$claim_val" << 'PY'
71
+ import json, sys
72
+ bundle_path, slug, claim_val = sys.argv[1], sys.argv[2], sys.argv[3]
73
+ bundle = {
74
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
75
+ "claims": [{
76
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "flow-step",
77
+ "claimType": "builder.verify.tests",
78
+ "fieldOrBehavior": "npm test",
79
+ "value": claim_val, "impactLevel": "high", "status": "verified",
80
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
81
+ }],
82
+ "evidence": [{
83
+ "id": "ev1", "claimId": "c1",
84
+ "evidenceType": "command_output", "method": "capture",
85
+ "sourceRef": "command-log.jsonl",
86
+ "excerptOrSummary": "npm test passed (agent claimed)",
87
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
88
+ "passing": True,
89
+ "execution": {"label": "npm test", "exitCode": 0}
90
+ }],
91
+ "policies": [], "events": []
92
+ }
93
+ json.dump(bundle, open(bundle_path, 'w'))
94
+ PY
95
+ }
96
+
97
+ # ─── Helper: write a command-log with npm test FAIL ──────────────────────────
98
+ write_fail_log() { # $1=log_path
99
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' > "$1"
100
+ }
101
+
102
+ # ─── Helper: write a command-log with npm test PASS ──────────────────────────
103
+ write_pass_log() { # $1=log_path
104
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' > "$1"
105
+ }
106
+
107
+ # ─── Helper: run gate in block mode ──────────────────────────────────────────
108
+ run_gate() { # $1=cwd, returns exit code; output on stdout
109
+ FLOW_AGENTS_GOAL_FIT_MODE=block \
110
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
111
+ FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
112
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$1\"}"
113
+ }
114
+
115
+ echo ""
116
+ echo "================================================================="
117
+ echo " Namespace-Agnostic Captured-FAIL Reconciliation"
118
+ echo " (AC1 allowlist bypass closure + AC2 no-over-block)"
119
+ echo "================================================================="
120
+
121
+
122
+ # ─────────────────────────────────────────────────────────────────────────────
123
+ # Test 1: BYPASS CLOSED — kit-typed claim + command-log FAIL, NO active flow
124
+ # ─────────────────────────────────────────────────────────────────────────────
125
+ echo ""
126
+ echo "=== 1. BYPASS CLOSED: kit-typed claim (builder.verify.tests) + command-log FAIL, NO active_flow_id ==="
127
+ echo " PRE-FIX: gate was blind to builder.verify.tests (not workflow.* and no active flow)"
128
+ echo " POST-FIX: capturedFailReconciliation catches it namespace-agnostically"
129
+
130
+ T1="$TMP/t1-bypass"
131
+ seed_delivered "$T1" "bypass-kit"
132
+
133
+ # NO active_flow_id in current.json
134
+ printf '%s' '{"artifact_dir":"bypass-kit"}' > "$T1/.flow-agents/current.json"
135
+
136
+ write_kit_pass_bundle "$T1/.flow-agents/bypass-kit/trust.bundle" "bypass-kit"
137
+ write_fail_log "$T1/.flow-agents/bypass-kit/command-log.jsonl"
138
+
139
+ echo ""
140
+ echo "--- 1a. PRE-FIX simulation: show the gate was blind ---"
141
+ node -e "
142
+ // PRE-FIX: captureCrossReference only checked workflow.* OR declared types.
143
+ // No active_flow_id → declaredClaimTypes = null → only workflow.* selected.
144
+ // builder.verify.tests does NOT start with 'workflow.' → NOT selected → missed.
145
+ const claimType = 'builder.verify.tests';
146
+ const declaredClaimTypes = null; // no active flow
147
+
148
+ // Old code: bundleClaimedPassCommandChecks only included claims in the allowlist
149
+ const inAllowlist = claimType.startsWith('workflow.')
150
+ || (declaredClaimTypes != null && declaredClaimTypes.has(claimType));
151
+ console.log(' builder.verify.tests in allowlist (pre-fix):', inAllowlist);
152
+ console.log(' PRE-FIX: 0 claimed-pass checks → no cross-reference → exit 0 (BYPASS)');
153
+ if (inAllowlist) { console.error('ERROR: pre-fix simulation incorrect'); process.exit(1); }
154
+ " 2>&1 && _pass "PRE-FIX: builder.verify.tests NOT in allowlist → captureCrossReference blind (exit 0)" \
155
+ || _fail "PRE-FIX simulation error"
156
+
157
+ echo ""
158
+ echo "--- 1b. POST-FIX: capturedFailReconciliation blocks namespace-agnostically ---"
159
+ set +e
160
+ t1_out="$(run_gate "$T1")"
161
+ t1_exit=$?
162
+ set -e
163
+
164
+ echo " POST-FIX exit code: $t1_exit (expected 2)"
165
+ if [ "$t1_exit" -eq 2 ]; then
166
+ _pass "POST-FIX: kit-typed false-completion BLOCKED (exit 2)"
167
+ else
168
+ _fail "POST-FIX: expected exit 2, got $t1_exit. output: ${t1_out:0:300}"
169
+ fi
170
+
171
+ if echo "$t1_out" | grep -q "caught false-completion"; then
172
+ _pass "POST-FIX: emits 'caught false-completion' (namespace-agnostic)"
173
+ else
174
+ _fail "POST-FIX: missing 'caught false-completion'. output: ${t1_out:0:300}"
175
+ fi
176
+
177
+ if echo "$t1_out" | grep -q "npm test"; then
178
+ _pass "POST-FIX: warning names the contradicted command (npm test)"
179
+ else
180
+ _fail "POST-FIX: warning does not name the command. output: ${t1_out:0:300}"
181
+ fi
182
+
183
+ if echo "$t1_out" | grep -q "builder.verify.tests"; then
184
+ _pass "POST-FIX: warning names the claimType (builder.verify.tests)"
185
+ else
186
+ _fail "POST-FIX: warning does not name the claimType. output: ${t1_out:0:300}"
187
+ fi
188
+
189
+ echo ""
190
+ echo "--- 1c. Exit code summary ---"
191
+ echo " PRE-FIX exit code (simulated): 0 — builder.verify.tests not in allowlist → gate blind"
192
+ echo " POST-FIX exit code (actual): $t1_exit — capturedFailReconciliation blocks regardless of namespace"
193
+ if [ "$t1_exit" -eq 2 ]; then
194
+ echo " Result: BYPASS CLOSED (pre=0, post=2)"
195
+ else
196
+ echo " Result: BYPASS STILL OPEN"
197
+ fi
198
+
199
+
200
+ # ─────────────────────────────────────────────────────────────────────────────
201
+ # Test 2: BYPASS CLOSED — agent-chosen non-declared type + active flow + FAIL
202
+ # ─────────────────────────────────────────────────────────────────────────────
203
+ echo ""
204
+ echo "=== 2. BYPASS CLOSED: agent-chosen non-declared type + active flow + command-log FAIL ==="
205
+
206
+ T2="$TMP/t2-nondeclared"
207
+ seed_delivered "$T2" "nondeclared"
208
+
209
+ # current.json: active flow (builder.build/verify)
210
+ printf '%s' '{"artifact_dir":"nondeclared","active_flow_id":"builder.build","active_step_id":"verify"}' \
211
+ > "$T2/.flow-agents/current.json"
212
+
213
+ # Fake flow defs dir (safe, not agent-writable)
214
+ FLOW_DEFS_DIR="$TMP/flows"
215
+ mkdir -p "$FLOW_DEFS_DIR"
216
+ cat > "$FLOW_DEFS_DIR/builder.build.flow.json" << 'FLOWJSON'
217
+ {
218
+ "id": "builder.build",
219
+ "version": "1.0",
220
+ "gates": {
221
+ "verify-gate": {
222
+ "step": "verify",
223
+ "expects": [
224
+ {
225
+ "id": "tests-evidence",
226
+ "kind": "trust.bundle",
227
+ "required": true,
228
+ "bundle_claim": {
229
+ "claimType": "builder.verify.tests",
230
+ "subjectType": "flow-step",
231
+ "accepted_statuses": ["trusted", "accepted"]
232
+ }
233
+ }
234
+ ]
235
+ }
236
+ }
237
+ }
238
+ FLOWJSON
239
+
240
+ # Bundle: agent-chosen NON-declared claimType (e.g. "acme.custom.verify") claiming npm test passed
241
+ python3 - "$T2/.flow-agents/nondeclared/trust.bundle" "nondeclared" << 'PY'
242
+ import json, sys
243
+ bundle_path, slug = sys.argv[1], sys.argv[2]
244
+ bundle = {
245
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
246
+ "claims": [{
247
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "custom",
248
+ "claimType": "acme.custom.verify", # neither workflow.* NOR declared by the flow
249
+ "fieldOrBehavior": "npm test",
250
+ "value": "pass", "impactLevel": "high", "status": "verified",
251
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
252
+ }],
253
+ "evidence": [{
254
+ "id": "ev1", "claimId": "c1",
255
+ "evidenceType": "command_output", "method": "capture",
256
+ "sourceRef": "command-log.jsonl",
257
+ "excerptOrSummary": "npm test passed (agent claimed)",
258
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
259
+ "passing": True,
260
+ "execution": {"label": "npm test", "exitCode": 0}
261
+ }],
262
+ "policies": [], "events": []
263
+ }
264
+ json.dump(bundle, open(bundle_path, 'w'))
265
+ PY
266
+ write_fail_log "$T2/.flow-agents/nondeclared/command-log.jsonl"
267
+
268
+ set +e
269
+ t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
270
+ FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 FLOW_AGENTS_FLOW_DEFS_DIR="$FLOW_DEFS_DIR" \
271
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2\"}")"
272
+ t2_exit=$?
273
+ set -e
274
+
275
+ echo " Non-declared type (acme.custom.verify) + active flow + FAIL: exit=$t2_exit (expected 2)"
276
+ if [ "$t2_exit" -eq 2 ]; then
277
+ _pass "Non-declared type with FAIL: BLOCKED (exit 2)"
278
+ else
279
+ _fail "Non-declared type with FAIL: NOT blocked (exit $t2_exit). output: ${t2_out:0:300}"
280
+ fi
281
+ if echo "$t2_out" | grep -q "caught false-completion\|unaccounted at completion"; then
282
+ _pass "Non-declared type: 'caught false-completion' or 'unaccounted' emitted"
283
+ else
284
+ _fail "Non-declared type: expected blocking message not found. output: ${t2_out:0:300}"
285
+ fi
286
+
287
+
288
+ # ─────────────────────────────────────────────────────────────────────────────
289
+ # Test 3: NO OVER-BLOCK (a) — clean session, no captured fails
290
+ # ─────────────────────────────────────────────────────────────────────────────
291
+ echo ""
292
+ echo "=== 3. NO OVER-BLOCK (a): clean session, no captured fails ==="
293
+
294
+ T3="$TMP/t3-clean"
295
+ seed_delivered "$T3" "clean-sess"
296
+ printf '%s' '{"artifact_dir":"clean-sess"}' > "$T3/.flow-agents/current.json"
297
+ write_kit_pass_bundle "$T3/.flow-agents/clean-sess/trust.bundle" "clean-sess"
298
+ write_pass_log "$T3/.flow-agents/clean-sess/command-log.jsonl"
299
+
300
+ set +e
301
+ t3_out="$(run_gate "$T3")"
302
+ t3_exit=$?
303
+ set -e
304
+
305
+ blocked_new="$(echo "$t3_out" | grep -c "unaccounted at completion\|namespace-agnostic caught false-completion" || true)"
306
+ echo " Clean session (latest=PASS): exit=$t3_exit, new_logic_blocks=$blocked_new"
307
+ if [ "$blocked_new" -eq 0 ]; then
308
+ _pass "Clean session NOT blocked by new reconciliation logic"
309
+ else
310
+ _fail "Clean session INCORRECTLY blocked by new logic. output: ${t3_out:0:300}"
311
+ fi
312
+
313
+
314
+ # ─────────────────────────────────────────────────────────────────────────────
315
+ # Test 4: NO OVER-BLOCK (b) — fail-then-re-run-to-pass (latest=PASS)
316
+ # ─────────────────────────────────────────────────────────────────────────────
317
+ echo ""
318
+ echo "=== 4. NO OVER-BLOCK (b): fail-then-re-run-to-pass (latest capture PASS) ==="
319
+
320
+ T4="$TMP/t4-rerun"
321
+ seed_delivered "$T4" "rerun-pass"
322
+ printf '%s' '{"artifact_dir":"rerun-pass"}' > "$T4/.flow-agents/current.json"
323
+ write_kit_pass_bundle "$T4/.flow-agents/rerun-pass/trust.bundle" "rerun-pass"
324
+ # FAIL first, then PASS (re-run fixed it — latest is PASS)
325
+ {
326
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:00Z","source":"test"}'
327
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:01Z","source":"test"}'
328
+ } > "$T4/.flow-agents/rerun-pass/command-log.jsonl"
329
+
330
+ set +e
331
+ t4_out="$(run_gate "$T4")"
332
+ t4_exit=$?
333
+ set -e
334
+
335
+ blocked_new="$(echo "$t4_out" | grep -c "unaccounted at completion\|namespace-agnostic caught false-completion" || true)"
336
+ echo " Fail-then-re-run-to-pass (latest=PASS): exit=$t4_exit, new_logic_blocks=$blocked_new"
337
+ if [ "$blocked_new" -eq 0 ]; then
338
+ _pass "Fail-then-re-run-to-pass NOT blocked (latest capture PASS clears it)"
339
+ else
340
+ _fail "Fail-then-re-run-to-pass INCORRECTLY blocked. output: ${t4_out:0:300}"
341
+ fi
342
+
343
+
344
+ # ─────────────────────────────────────────────────────────────────────────────
345
+ # Test 5: NO OVER-BLOCK (c) — acknowledged failure (evidence disputed/failed)
346
+ # ─────────────────────────────────────────────────────────────────────────────
347
+ echo ""
348
+ echo "=== 5. NO OVER-BLOCK (c): acknowledged failure (evidence marks command disputed/failed) ==="
349
+
350
+ T5="$TMP/t5-ack"
351
+ seed_delivered "$T5" "ack-fail"
352
+ printf '%s' '{"artifact_dir":"ack-fail"}' > "$T5/.flow-agents/current.json"
353
+
354
+ # Bundle: claim acknowledges failure (status=disputed, value=fail)
355
+ python3 - "$T5/.flow-agents/ack-fail/trust.bundle" "ack-fail" << 'PY'
356
+ import json, sys
357
+ bundle_path, slug = sys.argv[1], sys.argv[2]
358
+ bundle = {
359
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
360
+ "claims": [{
361
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "flow-step",
362
+ "claimType": "builder.verify.tests",
363
+ "fieldOrBehavior": "npm test",
364
+ "value": "fail", # acknowledges failure
365
+ "impactLevel": "low", # low-impact avoids surface-unavailable block
366
+ "status": "disputed", # acknowledges failure
367
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
368
+ }],
369
+ "evidence": [{
370
+ "id": "ev1", "claimId": "c1",
371
+ "evidenceType": "command_output", "method": "capture",
372
+ "sourceRef": "command-log.jsonl",
373
+ "excerptOrSummary": "npm test failed (acknowledged in evidence)",
374
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
375
+ "passing": False,
376
+ "execution": {"label": "npm test", "exitCode": 1}
377
+ }],
378
+ "policies": [], "events": []
379
+ }
380
+ json.dump(bundle, open(bundle_path, 'w'))
381
+ PY
382
+ write_fail_log "$T5/.flow-agents/ack-fail/command-log.jsonl"
383
+
384
+ set +e
385
+ t5_out="$(run_gate "$T5")"
386
+ t5_exit=$?
387
+ set -e
388
+
389
+ blocked_new="$(echo "$t5_out" | grep -c "unaccounted at completion\|namespace-agnostic caught false-completion" || true)"
390
+ echo " Acknowledged failure (status=disputed, value=fail): exit=$t5_exit, new_logic_blocks=$blocked_new"
391
+ if [ "$blocked_new" -eq 0 ]; then
392
+ _pass "Acknowledged failure NOT blocked (agent owns the failure in evidence)"
393
+ else
394
+ _fail "Acknowledged failure INCORRECTLY blocked. output: ${t5_out:0:300}"
395
+ fi
396
+
397
+
398
+ # ─────────────────────────────────────────────────────────────────────────────
399
+ # Test 6: NO OVER-BLOCK (d) — no-command doc/policy session (fixes #216)
400
+ # ─────────────────────────────────────────────────────────────────────────────
401
+ echo ""
402
+ echo "=== 6. NO OVER-BLOCK (d): no-command doc/policy session (verified, no execution.label, no command-log) ==="
403
+ echo " (#216 fix: missing-log check must NOT fire when no command was expected)"
404
+
405
+ T6="$TMP/t6-nocommand"
406
+ mkdir -p "$T6/.flow-agents/nocommand"
407
+ printf '# Repo\n' > "$T6/AGENTS.md"
408
+ # State is verified (completing) but no commands were run
409
+ printf '%s' '{"schema_version":"1.0","task_slug":"nocommand","status":"verified","phase":"verification","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"done","summary":"done"}}' \
410
+ > "$T6/.flow-agents/nocommand/state.json"
411
+ cat > "$T6/.flow-agents/nocommand/nocommand--deliver.md" << 'MD'
412
+ # nocommand
413
+
414
+ branch: main
415
+ status: verified
416
+ type: deliver
417
+
418
+ ## Definition Of Done
419
+ - [x] policy document reviewed
420
+
421
+ ## Goal Fit Gate
422
+ - [x] acceptance verified
423
+
424
+ ### Verdict: PASS
425
+ MD
426
+ printf '%s' '{"artifact_dir":"nocommand"}' > "$T6/.flow-agents/current.json"
427
+
428
+ # Bundle with NO execution.label (doc/policy session — no commands run)
429
+ python3 - "$T6/.flow-agents/nocommand/trust.bundle" << 'PY'
430
+ import json, sys
431
+ bundle = {
432
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
433
+ "claims": [{
434
+ "id": "c1", "subjectId": "nocommand/review", "subjectType": "workflow-check",
435
+ "claimType": "workflow.check.review", "fieldOrBehavior": "policy doc reviewed",
436
+ "value": "pass", "impactLevel": "low", "status": "verified",
437
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
438
+ }],
439
+ "evidence": [{
440
+ "id": "ev1", "claimId": "c1",
441
+ "evidenceType": "review_output", "method": "manual",
442
+ "sourceRef": "docs/policy.md",
443
+ "excerptOrSummary": "Policy document reviewed and approved",
444
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
445
+ "passing": True
446
+ # NOTE: NO execution.label — no command was run
447
+ }],
448
+ "policies": [], "events": []
449
+ }
450
+ json.dump(bundle, open(sys.argv[1], 'w'))
451
+ PY
452
+ # NO command-log.jsonl
453
+
454
+ set +e
455
+ t6_out="$(run_gate "$T6")"
456
+ t6_exit=$?
457
+ set -e
458
+
459
+ blocked_missing_log="$(echo "$t6_out" | grep -c "expected capture log is missing" || true)"
460
+ blocked_new="$(echo "$t6_out" | grep -c "unaccounted at completion\|namespace-agnostic caught false-completion" || true)"
461
+ echo " No-command session (verified, no execution.label): exit=$t6_exit"
462
+ echo " blocked_by_missing_log=$blocked_missing_log, blocked_by_new_logic=$blocked_new"
463
+ if [ "$blocked_missing_log" -eq 0 ] && [ "$blocked_new" -eq 0 ]; then
464
+ _pass "#216 FIXED: no-command session NOT blocked by missing-log or new reconciliation"
465
+ else
466
+ _fail "#216 NOT FIXED or new regression: session blocked. output: ${t6_out:0:400}"
467
+ fi
468
+
469
+
470
+ # ─────────────────────────────────────────────────────────────────────────────
471
+ # Test 7: AC3 empty-expects regression
472
+ # ─────────────────────────────────────────────────────────────────────────────
473
+ echo ""
474
+ echo "=== 7. AC3 empty-expects regression: declared-only bundle + fake flow with expects:[] ==="
475
+ echo " (Two-part dependency: union form ALWAYS enforces workflow.* + empty-expects guard"
476
+ echo " emits gate-misconfiguration HARD_BLOCK for empty expects[])"
477
+
478
+ T7="$TMP/t7-empty-expects"
479
+ mkdir -p "$T7/.flow-agents/empty-expects"
480
+ printf '# Repo\n' > "$T7/AGENTS.md"
481
+ printf '%s' '{"schema_version":"1.0","task_slug":"empty-expects","status":"in_progress","phase":"execution","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"in_progress","summary":"Testing"}}' \
482
+ > "$T7/.flow-agents/empty-expects/state.json"
483
+ cat > "$T7/.flow-agents/empty-expects/empty-expects--deliver.md" << 'MD'
484
+ # empty-expects
485
+
486
+ branch: main
487
+ status: in_progress
488
+ type: deliver
489
+
490
+ ## Definition Of Done
491
+ - [ ] tests pass
492
+ MD
493
+ printf '%s' '{"artifact_dir":"empty-expects","active_flow_id":"builder.build","active_step_id":"verify"}' \
494
+ > "$T7/.flow-agents/current.json"
495
+
496
+ # Bundle with ONLY kit-typed claims (no workflow.*)
497
+ python3 - "$T7/.flow-agents/empty-expects/trust.bundle" "empty-expects" << 'PY'
498
+ import json, sys
499
+ bundle_path, slug = sys.argv[1], sys.argv[2]
500
+ bundle = {
501
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
502
+ "claims": [{
503
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "flow-step",
504
+ "claimType": "builder.verify.tests",
505
+ "fieldOrBehavior": "npm test",
506
+ "value": "pass", "impactLevel": "high", "status": "disputed",
507
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
508
+ }],
509
+ "evidence": [], "policies": [], "events": []
510
+ }
511
+ json.dump(bundle, open(bundle_path, 'w'))
512
+ PY
513
+ printf '' > "$T7/.flow-agents/empty-expects/command-log.jsonl"
514
+
515
+ # Fake flow with expects:[] (safe dir, not agent-writable)
516
+ FAKE_FLOWS="$TMP/fake-flows-ac3"
517
+ mkdir -p "$FAKE_FLOWS"
518
+ cat > "$FAKE_FLOWS/builder.build.flow.json" << 'FLOWJSON'
519
+ {"id":"builder.build","version":"0.0","gates":{"fake-gate":{"step":"verify","expects":[]}}}
520
+ FLOWJSON
521
+
522
+ set +e
523
+ t7_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
524
+ FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 FLOW_AGENTS_FLOW_DEFS_DIR="$FAKE_FLOWS" \
525
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T7\"}")"
526
+ t7_exit=$?
527
+ set -e
528
+
529
+ echo " Declared-only bundle + fake flow with expects:[]: exit=$t7_exit (expected 2)"
530
+ if [ "$t7_exit" -eq 2 ]; then
531
+ _pass "AC3: declared-only bundle + empty-expects flow → BLOCKS (exit 2)"
532
+ else
533
+ _fail "AC3: expected exit 2, got $t7_exit. output: ${t7_out:0:300}"
534
+ fi
535
+
536
+ if echo "$t7_out" | grep -q "gate misconfiguration"; then
537
+ _pass "AC3: 'gate misconfiguration' HARD_BLOCK emitted (empty expects[] guard)"
538
+ else
539
+ _fail "AC3: 'gate misconfiguration' NOT emitted. output: ${t7_out:0:300}"
540
+ fi
541
+
542
+ if echo "$t7_out" | grep -q "disputed\|caught false-completion\|not auto-releasing"; then
543
+ _pass "AC3: union form still enforces workflow.* claim (disputed builder.verify.tests caught)"
544
+ else
545
+ # The disputed builder.verify.tests is high-impact; surface may be unavailable
546
+ if echo "$t7_out" | grep -q "surface unavailable\|gate misconfiguration"; then
547
+ _pass "AC3: union form active (gate misconfiguration or surface-unavailable emitted for high-impact claim)"
548
+ else
549
+ _fail "AC3: union form not enforcing. output: ${t7_out:0:300}"
550
+ fi
551
+ fi
552
+
553
+
554
+ # ─────────────────────────────────────────────────────────────────────────────
555
+ # Test 8: PROOF SCENARIO 1 — Status-gated dodge closed (Fix A: completing guard removed)
556
+ #
557
+ # PRE-FIX: capturedFailReconciliation had `if (!completing) return []`.
558
+ # A non-terminal status (e.g. 'blocked') would skip the check entirely —
559
+ # a kit-typed claim asserting pass for a FAIL command would SHIP.
560
+ # POST-FIX: completing guard removed; the check runs on EVERY stop regardless
561
+ # of state.json.status.
562
+ # ─────────────────────────────────────────────────────────────────────────────
563
+ echo ""
564
+ echo "=== 8. PROOF SCENARIO 1 — Status-gated dodge closed (Fix A) ==="
565
+ echo " PRE-FIX: completing guard skipped reconciliation for non-terminal statuses"
566
+ echo " POST-FIX: guard removed → check runs on every stop (status-independent)"
567
+
568
+ T8="$TMP/t8-status-dodge"
569
+ mkdir -p "$T8/.flow-agents/status-dodge"
570
+ printf '# Repo\n' > "$T8/AGENTS.md"
571
+ # CRITICAL: status = 'blocked' (non-terminal — pre-fix would have returned [] here)
572
+ printf '%s' '{"schema_version":"1.0","task_slug":"status-dodge","status":"blocked","phase":"executing","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"in_progress","summary":"running"}}' \
573
+ > "$T8/.flow-agents/status-dodge/state.json"
574
+ cat > "$T8/.flow-agents/status-dodge/status-dodge--deliver.md" << 'MD'
575
+ # status-dodge
576
+
577
+ branch: main
578
+ status: blocked
579
+ type: deliver
580
+
581
+ ## Definition Of Done
582
+ - [x] tests pass
583
+
584
+ ## Goal Fit Gate
585
+ - [x] acceptance verified
586
+
587
+ ### Verdict: PASS
588
+ MD
589
+
590
+ # Bundle: kit-typed claim asserting pass for "npm test"
591
+ python3 - "$T8/.flow-agents/status-dodge/trust.bundle" "status-dodge" << 'PY'
592
+ import json, sys
593
+ bundle_path, slug = sys.argv[1], sys.argv[2]
594
+ bundle = {
595
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
596
+ "claims": [{
597
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "flow-step",
598
+ "claimType": "builder.verify.tests",
599
+ "fieldOrBehavior": "npm test",
600
+ "value": "pass", "impactLevel": "high", "status": "verified",
601
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
602
+ }],
603
+ "evidence": [{
604
+ "id": "ev1", "claimId": "c1",
605
+ "evidenceType": "command_output", "method": "capture",
606
+ "sourceRef": "command-log.jsonl",
607
+ "excerptOrSummary": "npm test passed (agent claimed)",
608
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
609
+ "passing": True,
610
+ "execution": {"label": "npm test", "exitCode": 0}
611
+ }],
612
+ "policies": [], "events": []
613
+ }
614
+ json.dump(bundle, open(bundle_path, 'w'))
615
+ PY
616
+ # command-log: "npm test" FAIL (latest capture is FAIL — the agent lied)
617
+ write_fail_log "$T8/.flow-agents/status-dodge/command-log.jsonl"
618
+
619
+ echo ""
620
+ echo "--- 8a. PRE-FIX simulation (completing guard) ---"
621
+ # Old code: `const completing = TERMINAL_STATUSES.has(taskStatus) || taskStatus === 'verified'`
622
+ # With status='blocked', completing=false → return [] → gate blind
623
+ node -e "
624
+ const TERMINAL_STATUSES = new Set(['done','delivered','accepted','archived','complete','completed']);
625
+ const taskStatus = 'blocked';
626
+ const completing = TERMINAL_STATUSES.has(taskStatus) || taskStatus === 'verified';
627
+ console.log(' completing (pre-fix logic):', completing, '(false → capturedFailReconciliation skipped → gate blind)');
628
+ if (completing) { process.exit(1); }
629
+ " 2>&1 && _pass "PRE-FIX: status=blocked → completing=false → reconciliation skipped → gate blind" \
630
+ || _fail "PRE-FIX simulation error"
631
+
632
+ echo ""
633
+ echo "--- 8b. POST-FIX: guard removed → blocks regardless of status ---"
634
+ set +e
635
+ t8_out="$(run_gate "$T8")"
636
+ t8_exit=$?
637
+ set -e
638
+ echo " POST-FIX exit: $t8_exit (expected 2, status=blocked, latest=FAIL, claim=pass)"
639
+ if [ "$t8_exit" -eq 2 ]; then
640
+ _pass "PROOF 1: status-gated dodge closed — POST-FIX blocks (exit 2) regardless of status=blocked"
641
+ else
642
+ _fail "PROOF 1 FAILED: status=blocked + FAIL + claim=pass should exit 2, got $t8_exit. output: ${t8_out:0:400}"
643
+ fi
644
+ if echo "$t8_out" | grep -q "caught false-completion\|namespace-agnostic"; then
645
+ _pass "PROOF 1: 'caught false-completion' emitted for status=blocked session"
646
+ else
647
+ _fail "PROOF 1: expected 'caught false-completion' message not found. output: ${t8_out:0:400}"
648
+ fi
649
+
650
+
651
+ # ─────────────────────────────────────────────────────────────────────────────
652
+ # Test 9: PROOF SCENARIO 2 — Over-block removed (Fix B: Case B removed)
653
+ #
654
+ # PRE-FIX: Case B would HARD_BLOCK any captured FAIL with no matching claim —
655
+ # including incidental commands (grep no-match exit 1, git diff --exit-code, etc.).
656
+ # POST-FIX: Case B removed. Only Case A (claimed pass contradicts captured FAIL) blocks.
657
+ # A genuine incidental failure with no claim is NOT blocked.
658
+ # ─────────────────────────────────────────────────────────────────────────────
659
+ echo ""
660
+ echo "=== 9. PROOF SCENARIO 2 — Over-block removed (Fix B: Case B removed) ==="
661
+ echo " PRE-FIX: 'unaccounted at completion' HARD_BLOCK fired for ANY unaccounted FAIL"
662
+ echo " POST-FIX: Case B removed → incidental fails with no claim NOT blocked"
663
+
664
+ T9="$TMP/t9-overblock"
665
+ seed_delivered "$T9" "overblock-sess"
666
+ printf '%s' '{"artifact_dir":"overblock-sess"}' > "$T9/.flow-agents/current.json"
667
+ # Bundle: only "npm test" claim asserting pass (no claim about the grep incidental fail)
668
+ write_kit_pass_bundle "$T9/.flow-agents/overblock-sess/trust.bundle" "overblock-sess"
669
+ # Log: "npm test" PASS + incidental "grep --quiet somepattern AGENTS.md" FAIL (exit 1)
670
+ printf '%s\n%s\n' \
671
+ '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
672
+ '{"command":"grep --quiet somepattern AGENTS.md","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:01Z","source":"postToolUse-capture"}' \
673
+ > "$T9/.flow-agents/overblock-sess/command-log.jsonl"
674
+
675
+ set +e
676
+ t9_out="$(run_gate "$T9")"
677
+ t9_exit=$?
678
+ set -e
679
+ echo " POST-FIX exit: $t9_exit (expected 0 — incidental grep fail NOT a false-completion)"
680
+ if [ "$t9_exit" -ne 2 ]; then
681
+ _pass "PROOF 2: over-block removed — incidental fail with no claim NOT blocked (exit $t9_exit)"
682
+ else
683
+ if echo "$t9_out" | grep -q "unaccounted at completion"; then
684
+ _fail "PROOF 2 FAILED: 'unaccounted at completion' Case B still firing (should be removed). output: ${t9_out:0:400}"
685
+ else
686
+ _fail "PROOF 2 FAILED: blocked (exit 2) but NOT by unaccounted Case B — check output: ${t9_out:0:400}"
687
+ fi
688
+ fi
689
+ if echo "$t9_out" | grep -q "unaccounted at completion"; then
690
+ _fail "PROOF 2: 'unaccounted at completion' emitted (Case B must be removed)"
691
+ else
692
+ _pass "PROOF 2: 'unaccounted at completion' NOT emitted (Case B confirmed removed)"
693
+ fi
694
+
695
+
696
+ # ─────────────────────────────────────────────────────────────────────────────
697
+ # Test 10: PROOF SCENARIO 3 — Fix-then-pass not blocked (Fix C: latest-wins)
698
+ #
699
+ # PRE-FIX: captureCrossReference used readCommandLog (sticky-FAIL), so a legit
700
+ # fix-then-rerun-to-pass session would still be blocked.
701
+ # POST-FIX: readLatestCommandLog is used; the LAST entry wins. A genuine re-run
702
+ # that produces a PASS clears the block.
703
+ # ─────────────────────────────────────────────────────────────────────────────
704
+ echo ""
705
+ echo "=== 10. PROOF SCENARIO 3 — Fix-then-pass not blocked (Fix C: latest-wins) ==="
706
+ echo " PRE-FIX: sticky-FAIL in captureCrossReference kept a FAIL block even after re-run"
707
+ echo " POST-FIX: latest-wins → re-run to PASS clears the block"
708
+
709
+ T10="$TMP/t10-fixpass"
710
+ seed_delivered "$T10" "fixpass-sess"
711
+ printf '%s' '{"artifact_dir":"fixpass-sess"}' > "$T10/.flow-agents/current.json"
712
+ write_kit_pass_bundle "$T10/.flow-agents/fixpass-sess/trust.bundle" "fixpass-sess"
713
+ # Log: FAIL first, then PASS (genuine fix-then-re-run)
714
+ printf '%s\n%s\n' \
715
+ '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
716
+ '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:01Z","source":"postToolUse-capture"}' \
717
+ > "$T10/.flow-agents/fixpass-sess/command-log.jsonl"
718
+
719
+ set +e
720
+ t10_out="$(run_gate "$T10")"
721
+ t10_exit=$?
722
+ set -e
723
+ echo " POST-FIX exit: $t10_exit (expected 0 — latest capture PASS clears the earlier FAIL)"
724
+ if [ "$t10_exit" -ne 2 ]; then
725
+ _pass "PROOF 3: fix-then-pass NOT blocked — latest PASS clears earlier FAIL (exit $t10_exit)"
726
+ else
727
+ _fail "PROOF 3 FAILED: fix-then-pass session should exit 0 but got exit 2. output: ${t10_out:0:400}"
728
+ fi
729
+ if echo "$t10_out" | grep -q "caught false-completion\|CONTRADICTS"; then
730
+ _fail "PROOF 3: false-completion incorrectly emitted for fix-then-pass. output: ${t10_out:0:400}"
731
+ else
732
+ _pass "PROOF 3: no false-completion for fix-then-pass (latest PASS is the truth)"
733
+ fi
734
+
735
+
736
+ # ─────────────────────────────────────────────────────────────────────────────
737
+ # Test 11: PROOF SCENARIO 4 — Exit-code laundering flagged (Fix D)
738
+ #
739
+ # A claim asserts pass for "npm test || true" (captured exit 0 — because || true
740
+ # masks the real exit code). This is not a trustworthy PASS signal.
741
+ # POST-FIX: hasLaunderingOperator detects || true → HARD_BLOCK "exit-code-laundered".
742
+ # ─────────────────────────────────────────────────────────────────────────────
743
+ echo ""
744
+ echo "=== 11. PROOF SCENARIO 4 — Exit-code laundering flagged (Fix D) ==="
745
+ echo " PRE-FIX: 'npm test || true' captured exit 0 → claimed pass → gate satisfied"
746
+ echo " POST-FIX: hasLaunderingOperator detects || true → 'exit-code-laundered' HARD_BLOCK"
747
+
748
+ T11="$TMP/t11-laundering"
749
+ seed_delivered "$T11" "laundering-sess"
750
+ printf '%s' '{"artifact_dir":"laundering-sess"}' > "$T11/.flow-agents/current.json"
751
+ # Bundle: claim asserting pass for "npm test || true" — command string has laundering operator
752
+ python3 - "$T11/.flow-agents/laundering-sess/trust.bundle" "laundering-sess" << 'PY'
753
+ import json, sys
754
+ bundle_path, slug = sys.argv[1], sys.argv[2]
755
+ bundle = {
756
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
757
+ "claims": [{
758
+ "id": "c1", "subjectId": slug + "/tests", "subjectType": "flow-step",
759
+ "claimType": "builder.verify.tests",
760
+ "fieldOrBehavior": "npm test || true",
761
+ "value": "pass", "impactLevel": "high", "status": "verified",
762
+ "createdAt": "2026-06-27T00:00:00Z", "updatedAt": "2026-06-27T00:00:00Z"
763
+ }],
764
+ "evidence": [{
765
+ "id": "ev1", "claimId": "c1",
766
+ "evidenceType": "command_output", "method": "capture",
767
+ "sourceRef": "command-log.jsonl",
768
+ "excerptOrSummary": "npm test || true: exit 0",
769
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
770
+ "passing": True,
771
+ "execution": {"label": "npm test || true", "exitCode": 0}
772
+ }],
773
+ "policies": [], "events": []
774
+ }
775
+ json.dump(bundle, open(bundle_path, 'w'))
776
+ PY
777
+ # Log: "npm test || true" captured as PASS (exit 0) — the laundering worked
778
+ printf '%s\n' \
779
+ '{"command":"npm test || true","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
780
+ > "$T11/.flow-agents/laundering-sess/command-log.jsonl"
781
+
782
+ set +e
783
+ t11_out="$(run_gate "$T11")"
784
+ t11_exit=$?
785
+ set -e
786
+ echo " POST-FIX exit: $t11_exit (expected 2 — || true laundering detected)"
787
+ if [ "$t11_exit" -eq 2 ]; then
788
+ _pass "PROOF 4: exit-code laundering BLOCKED (exit 2) — 'npm test || true' not a trustworthy pass"
789
+ else
790
+ _fail "PROOF 4 FAILED: laundering should exit 2 but got $t11_exit. output: ${t11_out:0:400}"
791
+ fi
792
+ if echo "$t11_out" | grep -q "exit-code-laundered\|laundering operators mask"; then
793
+ _pass "PROOF 4: 'exit-code-laundered' warning emitted"
794
+ else
795
+ _fail "PROOF 4: expected 'exit-code-laundered' message not found. output: ${t11_out:0:400}"
796
+ fi
797
+
798
+
799
+ # ─────────────────────────────────────────────────────────────────────────────
800
+ # Summary
801
+ # ─────────────────────────────────────────────────────────────────────────────
802
+ echo ""
803
+ echo "================================================================="
804
+ if [ "$errors" -eq 0 ]; then
805
+ echo "PASS test_captured_fail_reconciliation: all checks passed."
806
+ echo ""
807
+ echo "Security proof:"
808
+ echo " BYPASS CLOSED: kit-typed false-completion blocked namespace-agnostically"
809
+ echo " PRE-FIX exit 0 (ships) → POST-FIX exit 2 (blocked)"
810
+ echo " NO OVER-BLOCK: all 4 legitimate cases remain unblocked by new logic"
811
+ echo " #216 FIXED: no-command session NOT blocked by missing-log check"
812
+ echo " AC3: empty-expects regression caught by gate-misconfiguration HARD_BLOCK"
813
+ echo " PROOF 1: Status-gated dodge closed (Fix A) — status=blocked + FAIL + claim=pass → exit 2"
814
+ echo " PROOF 2: Over-block removed (Fix B) — incidental grep fail, no claim → exit 0"
815
+ echo " PROOF 3: Fix-then-pass not blocked (Fix C) — FAIL then PASS + claim=pass → exit 0"
816
+ echo " PROOF 4: Exit-code laundering flagged (Fix D) — 'npm test || true' claim → exit 2"
817
+ exit 0
818
+ fi
819
+ echo "FAIL test_captured_fail_reconciliation: $errors check(s) failed."
820
+ exit 1