@kontourai/flow-agents 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +103 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/console-learning-projection.d.ts +1 -0
  14. package/build/src/cli/effective-backlog-settings.d.ts +1 -0
  15. package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
  16. package/build/src/cli/init.d.ts +17 -0
  17. package/build/src/cli/init.js +242 -20
  18. package/build/src/cli/kit.d.ts +1 -0
  19. package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
  20. package/build/src/cli/publish-change-helper.d.ts +1 -0
  21. package/build/src/cli/pull-work-provider.d.ts +1 -0
  22. package/build/src/cli/runtime-adapter.d.ts +1 -0
  23. package/build/src/cli/telemetry-doctor.d.ts +1 -0
  24. package/build/src/cli/usage-feedback.d.ts +1 -0
  25. package/build/src/cli/utterance-check.d.ts +1 -0
  26. package/build/src/cli/validate-hook-influence.d.ts +1 -0
  27. package/build/src/cli/validate-source-tree.d.ts +1 -0
  28. package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
  29. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  30. package/build/src/cli/verify.d.ts +1 -0
  31. package/build/src/cli/verify.js +90 -0
  32. package/build/src/cli/veritas-governance.d.ts +1 -0
  33. package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
  34. package/build/src/cli/workflow-sidecar.d.ts +324 -0
  35. package/build/src/cli/workflow-sidecar.js +1973 -90
  36. package/build/src/cli.d.ts +2 -0
  37. package/build/src/cli.js +2 -3
  38. package/build/src/flow-kit/validate.d.ts +81 -0
  39. package/build/src/index.d.ts +5 -0
  40. package/build/src/index.js +36 -0
  41. package/build/src/lib/args.d.ts +8 -0
  42. package/build/src/lib/flow-resolver.d.ts +82 -0
  43. package/build/src/lib/flow-resolver.js +237 -0
  44. package/build/src/lib/fs.d.ts +7 -0
  45. package/build/src/lib/workflow-learning-projection.d.ts +132 -0
  46. package/build/src/runtime-adapters.d.ts +18 -0
  47. package/build/src/tools/build-universal-bundles.d.ts +2 -0
  48. package/build/src/tools/build-universal-bundles.js +34 -22
  49. package/build/src/tools/common.d.ts +9 -0
  50. package/build/src/tools/generate-context-map.d.ts +2 -0
  51. package/build/src/tools/generate-context-map.js +3 -16
  52. package/build/src/tools/validate-package.d.ts +2 -0
  53. package/build/src/tools/validate-source-tree.d.ts +2 -0
  54. package/build/src/tools/validate-source-tree.js +42 -162
  55. package/context/contracts/artifact-contract.md +10 -0
  56. package/context/contracts/delivery-contract.md +1 -0
  57. package/context/contracts/review-contract.md +1 -0
  58. package/context/contracts/verification-contract.md +2 -0
  59. package/context/gate-awareness.md +39 -0
  60. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  61. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  62. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  63. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  64. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  65. package/docs/adr/0007-skill-audit.md +1 -1
  66. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  67. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  68. package/docs/adr/0011-mcp-posture.md +100 -0
  69. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  70. package/docs/adr/0013-context-lifecycle.md +151 -0
  71. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  72. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  73. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  74. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  75. package/docs/agent-system-guidebook.md +5 -12
  76. package/docs/context-map.md +4 -10
  77. package/docs/developer-architecture.md +14 -0
  78. package/docs/index.md +3 -2
  79. package/docs/integrations/framework-adapter.md +19 -6
  80. package/docs/integrations/index.md +2 -2
  81. package/docs/north-star.md +4 -4
  82. package/docs/operating-layers.md +3 -3
  83. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  84. package/docs/repository-structure.md +2 -2
  85. package/docs/skills-map.md +1 -0
  86. package/docs/spec/runtime-hook-surface.md +78 -10
  87. package/docs/standards-register.md +3 -3
  88. package/docs/survey-utterance-check.md +1 -1
  89. package/docs/trust-anchor-adoption.md +197 -0
  90. package/docs/verifiable-trust.md +95 -0
  91. package/docs/veritas-integration.md +2 -2
  92. package/docs/workflow-usage-guide.md +69 -0
  93. package/evals/acceptance/DEMO-false-completion.md +144 -0
  94. package/evals/acceptance/demo-cast.sh +92 -0
  95. package/evals/acceptance/demo-false-completion.sh +72 -0
  96. package/evals/acceptance/demo-real-evidence.sh +104 -0
  97. package/evals/acceptance/demo.tape +29 -0
  98. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  99. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  100. package/evals/acceptance/prove-teeth.sh +105 -0
  101. package/evals/ci/antigaming-suite.sh +54 -0
  102. package/evals/ci/run-baseline.sh +2 -0
  103. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  104. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  105. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  106. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  107. package/evals/integration/test_builder_step_producers.sh +379 -0
  108. package/evals/integration/test_bundle_install.sh +35 -71
  109. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  110. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  111. package/evals/integration/test_checkpoint_signing.sh +489 -0
  112. package/evals/integration/test_claim_lookup.sh +352 -0
  113. package/evals/integration/test_command_log_integrity.sh +275 -0
  114. package/evals/integration/test_context_map.sh +0 -2
  115. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  116. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  117. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  118. package/evals/integration/test_flow_kit_repository.sh +2 -0
  119. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  120. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  121. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  122. package/evals/integration/test_gate_lockdown.sh +1137 -0
  123. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  124. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  125. package/evals/integration/test_goal_fit_hook.sh +69 -4
  126. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  127. package/evals/integration/test_hook_category_behaviors.sh +14 -0
  128. package/evals/integration/test_install_merge.sh +1176 -0
  129. package/evals/integration/test_mint_attestation.sh +373 -0
  130. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  131. package/evals/integration/test_publish_delivery.sh +269 -0
  132. package/evals/integration/test_reconcile_soundness.sh +528 -0
  133. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  134. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  135. package/evals/integration/test_trust_checkpoint.sh +325 -0
  136. package/evals/integration/test_trust_reconcile.sh +293 -0
  137. package/evals/integration/test_verify_cli.sh +208 -0
  138. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  139. package/evals/lib/node.sh +0 -6
  140. package/evals/run.sh +47 -0
  141. package/evals/static/test_library_exports.sh +85 -0
  142. package/evals/static/test_universal_bundles.sh +15 -0
  143. package/evals/static/test_workflow_skills.sh +6 -13
  144. package/install.sh +0 -7
  145. package/integrations/strands-ts/README.md +25 -15
  146. package/integrations/veritas/flow-agents.adapter.json +1 -2
  147. package/kits/builder/flows/build.flow.json +59 -12
  148. package/kits/builder/kit.json +85 -15
  149. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  150. package/kits/builder/skills/deliver/SKILL.md +36 -6
  151. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  152. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  153. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  154. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  155. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  156. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  157. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  158. package/kits/knowledge/adapters/default-store/index.js +38 -0
  159. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  160. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  161. package/kits/knowledge/docs/store-contract.md +314 -0
  162. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  163. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  164. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  165. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  166. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  167. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  168. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  169. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  170. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  171. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  172. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  173. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  174. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  175. package/kits/knowledge/kit.json +51 -1
  176. package/package.json +13 -4
  177. package/packaging/conformance/README.md +10 -2
  178. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  179. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  180. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  181. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  182. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  183. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  184. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  185. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  186. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  187. package/packaging/conformance/run-conformance.js +1 -1
  188. package/scripts/README.md +2 -1
  189. package/scripts/build-universal-bundles.js +0 -1
  190. package/scripts/ci/mint-attestation.js +221 -0
  191. package/scripts/ci/trust-reconcile.js +545 -0
  192. package/scripts/hooks/config-protection.js +423 -1
  193. package/scripts/hooks/evidence-capture.js +348 -0
  194. package/scripts/hooks/lib/liveness-read.js +113 -0
  195. package/scripts/hooks/run-hook.js +6 -1
  196. package/scripts/hooks/stop-goal-fit.js +1471 -79
  197. package/scripts/hooks/workflow-steering.js +135 -5
  198. package/scripts/install-codex-home.sh +39 -0
  199. package/scripts/install-merge.js +330 -0
  200. package/src/cli/init.ts +218 -20
  201. package/src/cli/validate-workflow-artifacts.ts +18 -2
  202. package/src/cli/verify.ts +100 -0
  203. package/src/cli/workflow-sidecar.ts +2093 -84
  204. package/src/cli.ts +2 -3
  205. package/src/index.ts +53 -0
  206. package/src/lib/flow-resolver.ts +284 -0
  207. package/src/tools/build-universal-bundles.ts +34 -21
  208. package/src/tools/generate-context-map.ts +3 -17
  209. package/src/tools/validate-source-tree.ts +44 -104
  210. package/tsconfig.json +1 -0
  211. package/build/src/tools/filter-installed-packs.js +0 -135
  212. package/packaging/packs.json +0 -49
  213. package/scripts/filter-installed-packs.js +0 -2
  214. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,1137 @@
1
+ #!/usr/bin/env bash
2
+ # test_gate_lockdown.sh — Gate lock-down security regression (adversarial review
3
+ # Findings 2 + MEDIUM fail-opens).
4
+ #
5
+ # HONEST FRAMING: this is a bar-RAISER, not an airtight defense. An agent with
6
+ # shell access can still evade several of these controls (see individual test notes).
7
+ # The real tamper-proof anchors are external: signed checkpoints + human review.
8
+ # These tests prove the COST IS RAISED, not that the gate is unbreakable.
9
+ #
10
+ # Covers:
11
+ # AC1 — config-protection.js: blocked Write/Edit to kill-switch / routing files
12
+ # (.claude/settings.json, shell profiles, .flow-agents/current.json) and
13
+ # shell-redirect detection (best-effort, incomplete coverage).
14
+ # AC2 — stop-goal-fit.js: MAX_BLOCKS cannot release a HARD block (false-completion
15
+ # / integrity failure); only soft/advisory streaks may auto-release.
16
+ # AC3 — stop-goal-fit.js fail-closed:
17
+ # 3.1 Surface unavailable + high-impact claim → BLOCKS (FULL_BLOCK scope).
18
+ # 3.2 Missing command log in post-execution session → BLOCKS.
19
+ # Clean pre-execution session → NOT blocked.
20
+ # 3.3 CHAIN_GENESIS comments corrected in both files.
21
+ #
22
+ # Usage: bash evals/integration/test_gate_lockdown.sh
23
+ set -uo pipefail
24
+
25
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
26
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
27
+ PROT="$ROOT/scripts/hooks/config-protection.js"
28
+
29
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
30
+
31
+ TMP="$(mktemp -d)"
32
+ errors=0
33
+ _pass() { echo " PASS: $1"; }
34
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
35
+
36
+ cleanup() { rm -rf "$TMP"; }
37
+ trap cleanup EXIT
38
+
39
+ # ─── Helpers ─────────────────────────────────────────────────────────────────
40
+
41
+ seed_repo_inprogress() { # $1=dir $2=slug $3=phase(opt) $4=status(opt)
42
+ local dir="$1" slug="$2" phase="${3:-execution}" status="${4:-in_progress}"
43
+ mkdir -p "$dir/.flow-agents/$slug"
44
+ printf '# Repo\n' > "$dir/AGENTS.md"
45
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"$status\",\"phase\":\"$phase\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"in_progress\",\"summary\":\"Testing\"}}" \
46
+ > "$dir/.flow-agents/$slug/state.json"
47
+ cat > "$dir/.flow-agents/$slug/$slug--deliver.md" << MD
48
+ # $slug
49
+
50
+ branch: main
51
+ status: $status
52
+ type: deliver
53
+
54
+ ## Definition Of Done
55
+ - [ ] tests pass
56
+ MD
57
+ }
58
+
59
+ seed_repo_preexec() { # $1=dir $2=slug
60
+ local dir="$1" slug="$2"
61
+ mkdir -p "$dir/.flow-agents/$slug"
62
+ printf '# Repo\n' > "$dir/AGENTS.md"
63
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"planned\",\"phase\":\"planning\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"planned\",\"summary\":\"Planning\"}}" \
64
+ > "$dir/.flow-agents/$slug/state.json"
65
+ cat > "$dir/.flow-agents/$slug/$slug--deliver.md" << MD
66
+ # $slug
67
+
68
+ branch: main
69
+ status: planned
70
+ type: deliver
71
+
72
+ ## Definition Of Done
73
+ - [ ] tests pass
74
+ MD
75
+ }
76
+
77
+ write_clean_bundle() { # $1=path
78
+ python3 - "$1" << 'PY'
79
+ import json, sys
80
+ bundle = {
81
+ "schemaVersion": 3, "source": "test",
82
+ "claims": [], "evidence": [], "policies": [], "events": []
83
+ }
84
+ json.dump(bundle, open(sys.argv[1], 'w'))
85
+ PY
86
+ }
87
+
88
+ write_high_impact_bundle() { # $1=path $2=slug $3=status(verified|disputed)
89
+ python3 - "$1" "$2" "$3" << 'PY'
90
+ import json, sys
91
+ bp, slug, status = sys.argv[1], sys.argv[2], sys.argv[3]
92
+ bundle = {
93
+ "schemaVersion": 3, "source": "test",
94
+ "claims": [{
95
+ "id": "c-high",
96
+ "subjectId": slug + "/tests",
97
+ "subjectType": "workflow-check",
98
+ "claimType": "workflow.check.command",
99
+ "fieldOrBehavior": "tests",
100
+ "value": "pass",
101
+ "impactLevel": "high",
102
+ "status": status,
103
+ "createdAt": "2026-06-27T00:00:00Z",
104
+ "updatedAt": "2026-06-27T00:00:00Z"
105
+ }],
106
+ "evidence": [], "policies": [], "events": []
107
+ }
108
+ json.dump(bundle, open(bp, 'w'))
109
+ PY
110
+ }
111
+
112
+ write_chained_fail_log() { # $1=log_file $2=command
113
+ python3 - "$1" "$2" << 'PY'
114
+ import json, hashlib, sys
115
+ log_file, cmd = sys.argv[1], sys.argv[2]
116
+ CHAIN_GENESIS = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3'
117
+ def canonical_json(rec):
118
+ keys = sorted(k for k in rec if k != '_chain')
119
+ return json.dumps({k: rec[k] for k in keys}, separators=(',', ':'))
120
+ def chain_hash(prev_hash, rec):
121
+ return hashlib.sha256((prev_hash + canonical_json(rec)).encode('utf-8')).hexdigest()
122
+ entry = {'command': cmd, 'observedResult': 'fail', 'exitCode': 1,
123
+ 'capturedAt': '2026-06-27T00:00:00Z', 'source': 'postToolUse-capture'}
124
+ h = chain_hash(CHAIN_GENESIS, entry)
125
+ entry['_chain'] = {'seq': 0, 'prevHash': CHAIN_GENESIS, 'hash': h}
126
+ with open(log_file, 'w') as f:
127
+ f.write(json.dumps(entry) + '\n')
128
+ PY
129
+ }
130
+
131
+ run_gate() { # $1=cwd, output on stdout/stderr, return exit code
132
+ local cwd="$1"
133
+ FLOW_AGENTS_GOAL_FIT_MODE=block \
134
+ FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS="${FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS:-100000}" \
135
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
136
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$cwd\"}"
137
+ }
138
+
139
+ run_prot() { # JSON payload on stdin, output on stderr+stdout, return exit code
140
+ echo "$1" | node "$PROT" 2>&1
141
+ return ${PIPESTATUS[1]}
142
+ }
143
+
144
+ echo ""
145
+ echo "================================================================="
146
+ echo " Gate Lock-Down Security Eval (Findings 2 + MEDIUM fail-opens)"
147
+ echo "================================================================="
148
+
149
+ # ═══════════════════════════════════════════════════════════════════════════
150
+ # AC1 — config-protection: kill-switch / routing file protection
151
+ # ═══════════════════════════════════════════════════════════════════════════
152
+ echo ""
153
+ echo "=== AC1 — config-protection: kill-switch file protection ==="
154
+
155
+ echo ""
156
+ echo "--- AC1.1: Write/Edit to .claude/settings.json BLOCKED ---"
157
+ set +e
158
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/home/user/.claude/settings.json"}}' | node "$PROT" 2>&1)
159
+ prot_exit=$?
160
+ set -e
161
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED.*settings\.json"; then
162
+ _pass "Write to .claude/settings.json blocked (exit 2)"
163
+ else
164
+ _fail "Write to .claude/settings.json NOT blocked (exit=$prot_exit, out=$prot_out)"
165
+ fi
166
+
167
+ set +e
168
+ prot_out=$(echo '{"tool_name":"Edit","tool_input":{"path":".claude/settings.local.json"}}' | node "$PROT" 2>&1)
169
+ prot_exit=$?
170
+ set -e
171
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
172
+ _pass "Edit to .claude/settings.local.json blocked (exit 2)"
173
+ else
174
+ _fail "Edit to .claude/settings.local.json NOT blocked (exit=$prot_exit)"
175
+ fi
176
+
177
+ echo ""
178
+ echo "--- AC1.2: Write/Edit to shell profiles BLOCKED ---"
179
+ for profile in ".bash_profile" ".bashrc" ".profile" ".zprofile" ".zshrc"; do
180
+ set +e
181
+ prot_out=$(echo "{\"tool_name\":\"Write\",\"tool_input\":{\"path\":\"/home/user/$profile\"}}" | node "$PROT" 2>&1)
182
+ prot_exit=$?
183
+ set -e
184
+ if [ "$prot_exit" -eq 2 ]; then
185
+ _pass "Write to ~/$profile blocked (exit 2)"
186
+ else
187
+ _fail "Write to ~/$profile NOT blocked (exit=$prot_exit, out=$prot_out)"
188
+ fi
189
+ done
190
+
191
+ echo ""
192
+ echo "--- AC1.3: Write/Edit to .flow-agents/current.json BLOCKED ---"
193
+ set +e
194
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/repo/.flow-agents/current.json"}}' | node "$PROT" 2>&1)
195
+ prot_exit=$?
196
+ set -e
197
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
198
+ _pass "Write to .flow-agents/current.json blocked (exit 2)"
199
+ else
200
+ _fail "Write to .flow-agents/current.json NOT blocked (exit=$prot_exit)"
201
+ fi
202
+
203
+ echo ""
204
+ echo "--- AC1.4: Non-protected file still ALLOWED ---"
205
+ set +e
206
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/repo/src/main.js"}}' | node "$PROT" 2>&1)
207
+ prot_exit=$?
208
+ set -e
209
+ if [ "$prot_exit" -eq 0 ]; then
210
+ _pass "Write to src/main.js allowed (exit 0)"
211
+ else
212
+ _fail "Write to src/main.js falsely blocked (exit=$prot_exit)"
213
+ fi
214
+
215
+ echo ""
216
+ echo "--- AC1.5: Bash redirect >> ~/.bashrc BLOCKED (best-effort) ---"
217
+ set +e
218
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo export FLOW_AGENTS_GOAL_FIT_MODE=off >> ~/.bashrc"}}' | node "$PROT" 2>&1)
219
+ prot_exit=$?
220
+ set -e
221
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
222
+ _pass "Bash redirect >> ~/.bashrc blocked (exit 2)"
223
+ else
224
+ _fail "Bash redirect >> ~/.bashrc NOT blocked (exit=$prot_exit, out=$prot_out)"
225
+ fi
226
+ echo " HONEST: sed -i and other redirect forms are NOT covered (incomplete coverage)"
227
+
228
+ echo ""
229
+ echo "--- AC1.6: Bash redirect > .claude/settings.json BLOCKED ---"
230
+ set +e
231
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"cat evil.json > .claude/settings.json"}}' | node "$PROT" 2>&1)
232
+ prot_exit=$?
233
+ set -e
234
+ if [ "$prot_exit" -eq 2 ]; then
235
+ _pass "Bash redirect > .claude/settings.json blocked (exit 2)"
236
+ else
237
+ _fail "Bash redirect > .claude/settings.json NOT blocked (exit=$prot_exit)"
238
+ fi
239
+
240
+ echo ""
241
+ echo "--- AC1.7: tee .flow-agents/current.json BLOCKED ---"
242
+ set +e
243
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} | tee .flow-agents/current.json"}}' | node "$PROT" 2>&1)
244
+ prot_exit=$?
245
+ set -e
246
+ if [ "$prot_exit" -eq 2 ]; then
247
+ _pass "tee to .flow-agents/current.json blocked (exit 2)"
248
+ else
249
+ _fail "tee to .flow-agents/current.json NOT blocked (exit=$prot_exit)"
250
+ fi
251
+
252
+ echo ""
253
+ echo "--- AC1.8: Normal bash command still ALLOWED ---"
254
+ set +e
255
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"npm test && npm run lint"}}' | node "$PROT" 2>&1)
256
+ prot_exit=$?
257
+ set -e
258
+ if [ "$prot_exit" -eq 0 ]; then
259
+ _pass "npm test still allowed (exit 0)"
260
+ else
261
+ _fail "npm test falsely blocked (exit=$prot_exit)"
262
+ fi
263
+
264
+ echo ""
265
+ echo "--- AC1.10: tee multi-file — tee /dev/null ~/.bashrc BLOCKED (protected 2nd arg) ---"
266
+ # PRE-FIX: break after first non-flag arg stopped at /dev/null; ~/.bashrc was never checked.
267
+ # POST-FIX: all positional args are checked; ~/.bashrc triggers the block.
268
+ set +e
269
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} | tee /dev/null ~/.bashrc"}}' | node "$PROT" 2>&1)
270
+ prot_exit=$?
271
+ set -e
272
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
273
+ _pass "tee /dev/null ~/.bashrc blocked (protected 2nd arg now checked) (exit 2)"
274
+ else
275
+ _fail "tee /dev/null ~/.bashrc NOT blocked — multi-file tee evasion still possible (exit=$prot_exit, out=$prot_out)"
276
+ fi
277
+
278
+ echo ""
279
+ echo "--- AC1.11: tee multi-file — tee /tmp/x .flow-agents/current.json BLOCKED ---"
280
+ set +e
281
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} | tee /tmp/x .flow-agents/current.json"}}' | node "$PROT" 2>&1)
282
+ prot_exit=$?
283
+ set -e
284
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
285
+ _pass "tee /tmp/x .flow-agents/current.json blocked (protected 2nd arg checked) (exit 2)"
286
+ else
287
+ _fail "tee /tmp/x .flow-agents/current.json NOT blocked (exit=$prot_exit, out=$prot_out)"
288
+ fi
289
+
290
+ echo ""
291
+ echo "--- AC1.12: tee single-file to safe path — tee /tmp/legit.log still ALLOWED ---"
292
+ set +e
293
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo output | tee /tmp/legit.log"}}' | node "$PROT" 2>&1)
294
+ prot_exit=$?
295
+ set -e
296
+ if [ "$prot_exit" -eq 0 ]; then
297
+ _pass "tee /tmp/legit.log still allowed (no protected path) (exit 0)"
298
+ else
299
+ _fail "tee /tmp/legit.log falsely blocked (exit=$prot_exit)"
300
+ fi
301
+
302
+ echo ""
303
+ echo "--- AC1.9: CLI writes current.json via fs (not Write/Edit tool) — safe to block tool path ---"
304
+ # Verify writeJson in workflow-sidecar.ts is a direct fs.writeFileSync call (not via agent tool)
305
+ node -e "
306
+ const fs = require('fs');
307
+ const src = fs.readFileSync('$ROOT/src/cli/workflow-sidecar.ts', 'utf8');
308
+ const hasWriteJson = /function writeJson.*fs\.mkdirSync.*fs\.writeFileSync/s.test(src);
309
+ const calledByWriteCurrent = /writeCurrent.*writeJson.*current\.json/s.test(src);
310
+ const calledByAdvanceState = /advanceState.*writeJson/s.test(src);
311
+ if (!hasWriteJson) { console.error('ERROR: writeJson not found as fs.writeFileSync'); process.exit(1); }
312
+ if (!calledByWriteCurrent && !calledByAdvanceState) { console.error('ERROR: writeCurrent/advanceState not calling writeJson'); process.exit(1); }
313
+ console.log('writeJson uses fs.writeFileSync directly (not agent tool)');
314
+ console.log('writeCurrent and advanceState call writeJson → blocking Write/Edit tool is safe');
315
+ " 2>&1 && _pass "CLI current.json writes use fs (not Write/Edit tool) — tool-path block is safe" \
316
+ || _fail "Could not verify CLI fs write pattern"
317
+
318
+
319
+
320
+ # ═══════════════════════════════════════════════════════════════════════════
321
+ # AC1 (R5a) — state.json + trust.bundle agent-Write/Edit blocking
322
+ # + interpreter-write detection (best-effort, INCOMPLETE)
323
+ # ═══════════════════════════════════════════════════════════════════════════
324
+ echo ""
325
+ echo "=== AC1 R5a: state.json/trust.bundle protection + interpreter-write detection ==="
326
+
327
+ echo ""
328
+ echo "--- AC1.13: Write to .flow-agents/slug/state.json BLOCKED (R5a) ---"
329
+ set +e
330
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/repo/.flow-agents/my-slug/state.json"}}' | node "$PROT" 2>&1)
331
+ prot_exit=$?
332
+ set -e
333
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
334
+ _pass "Write to .flow-agents/slug/state.json blocked (exit 2)"
335
+ else
336
+ _fail "Write to .flow-agents/slug/state.json NOT blocked (exit=$prot_exit)"
337
+ fi
338
+
339
+ echo ""
340
+ echo "--- AC1.14: Edit to .flow-agents/slug/trust.bundle BLOCKED (R5a) ---"
341
+ set +e
342
+ prot_out=$(echo '{"tool_name":"Edit","tool_input":{"path":"/repo/.flow-agents/my-slug/trust.bundle"}}' | node "$PROT" 2>&1)
343
+ prot_exit=$?
344
+ set -e
345
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
346
+ _pass "Edit to .flow-agents/slug/trust.bundle blocked (exit 2)"
347
+ else
348
+ _fail "Edit to .flow-agents/slug/trust.bundle NOT blocked (exit=$prot_exit)"
349
+ fi
350
+
351
+ echo ""
352
+ echo "--- AC1.15: Non-protected file still ALLOWED (no over-block) ---"
353
+ set +e
354
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/repo/src/foo.ts"}}' | node "$PROT" 2>&1)
355
+ prot_exit=$?
356
+ set -e
357
+ if [ "$prot_exit" -eq 0 ]; then
358
+ _pass "Write to src/foo.ts allowed (exit 0) — no over-block"
359
+ else
360
+ _fail "Write to src/foo.ts falsely blocked (exit=$prot_exit)"
361
+ fi
362
+
363
+ echo ""
364
+ echo "--- AC1.16: Bash redirect > to state.json BLOCKED (R5a: REDIRECT_PROTECTED_RE extended) ---"
365
+ set +e
366
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} > .flow-agents/slug/state.json"}}' | node "$PROT" 2>&1)
367
+ prot_exit=$?
368
+ set -e
369
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
370
+ _pass "Bash redirect > .flow-agents/slug/state.json blocked (exit 2)"
371
+ else
372
+ _fail "Bash redirect > .flow-agents/slug/state.json NOT blocked (exit=$prot_exit)"
373
+ fi
374
+
375
+ echo ""
376
+ echo "--- AC1.17: tee to trust.bundle BLOCKED (R5a: REDIRECT_PROTECTED_RE extended) ---"
377
+ set +e
378
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} | tee .flow-agents/slug/trust.bundle"}}' | node "$PROT" 2>&1)
379
+ prot_exit=$?
380
+ set -e
381
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
382
+ _pass "tee to .flow-agents/slug/trust.bundle blocked (exit 2)"
383
+ else
384
+ _fail "tee to .flow-agents/slug/trust.bundle NOT blocked (exit=$prot_exit)"
385
+ fi
386
+
387
+ echo ""
388
+ echo "--- AC1.18 (interpreter-write): node with shell-profile literal token BLOCKED ---"
389
+ echo " INCOMPLETE: runtime path construction (process.env.HOME+path) evades ---"
390
+ set +e
391
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"node -e \".bashrc\""}}' | node "$PROT" 2>&1)
392
+ prot_exit=$?
393
+ set -e
394
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
395
+ _pass "node invocation with .bashrc literal token blocked (exit 2)"
396
+ else
397
+ _fail "node invocation with .bashrc literal token NOT blocked (exit=$prot_exit)"
398
+ fi
399
+ echo " INCOMPLETE: node -e with runtime-constructed path (no literal token) evades"
400
+
401
+ echo ""
402
+ echo "--- AC1.19 (interpreter-write): python3 with state-file literal token BLOCKED ---"
403
+ set +e
404
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"python3 -c \"state.json\""}}' | node "$PROT" 2>&1)
405
+ prot_exit=$?
406
+ set -e
407
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
408
+ _pass "python3 invocation with state.json literal token blocked (exit 2)"
409
+ else
410
+ _fail "python3 invocation with state.json literal token NOT blocked (exit=$prot_exit)"
411
+ fi
412
+
413
+ echo ""
414
+ echo "--- AC1.20 (interpreter-write): sed in-place with shell-profile literal token BLOCKED ---"
415
+ set +e
416
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"sed -i s/a/b/ ~/.zshrc"}}' | node "$PROT" 2>&1)
417
+ prot_exit=$?
418
+ set -e
419
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
420
+ _pass "sed -i with .zshrc literal token blocked (exit 2)"
421
+ else
422
+ _fail "sed -i with .zshrc literal token NOT blocked (exit=$prot_exit)"
423
+ fi
424
+
425
+ echo ""
426
+ echo "--- AC1.21 (interpreter-write): node invocation WITHOUT protected path ALLOWED ---"
427
+ set +e
428
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"node -e \"console.log(1)\""}}' | node "$PROT" 2>&1)
429
+ prot_exit=$?
430
+ set -e
431
+ if [ "$prot_exit" -eq 0 ]; then
432
+ _pass "node -e console.log(1) still allowed (exit 0) — no over-block"
433
+ else
434
+ _fail "node -e console.log(1) falsely blocked (exit=$prot_exit)"
435
+ fi
436
+ echo " INCOMPLETE evasions that still pass:"
437
+ echo " - Runtime path construction (process.env.HOME + path)"
438
+ echo " - Interpreters not in list (ruby, php, etc.)"
439
+
440
+ echo ""
441
+ echo "--- AC1.22: CLI sidecar uses fs for state/trust files (not Write/Edit tool) ---"
442
+ node -e "
443
+ const fs = require('fs');
444
+ const src = fs.readFileSync('$ROOT/src/cli/workflow-sidecar.ts', 'utf8');
445
+ const okState = /writeJson\(path\.join\(dir,\s*['\"]state\.json['\"]\)/.test(src);
446
+ const okBundle = /writeJson\(path\.join\(dir,\s*['\"]trust\.bundle['\"]\)/.test(src);
447
+ const okWriteJson = /function writeJson.*fs\.writeFileSync/.test(src);
448
+ if (!okState) { console.error('ERROR: writeJson(state.json) not found'); process.exit(1); }
449
+ if (!okBundle) { console.error('ERROR: writeJson(trust.bundle) not found'); process.exit(1); }
450
+ if (!okWriteJson) { console.error('ERROR: writeJson not using fs.writeFileSync'); process.exit(1); }
451
+ console.log('Verified: state+trust written via writeJson->fs.writeFileSync (not agent tool)');
452
+ " 2>&1 && _pass "CLI sidecar uses fs for state/trust — tool-path block is safe" \
453
+ || _fail "Could not verify CLI fs write pattern for state/trust files"
454
+
455
+ # ═══════════════════════════════════════════════════════════════════════════
456
+ # AC1 R6a — Laundering regex extended (|| ANY + trailing ;/\n forms)
457
+ # Round 6 Fix 1: hasLaunderingOperator now flags ANY || operator plus
458
+ # extended trailing-; / newline forms (exit 0, /bin/true, :).
459
+ # ═══════════════════════════════════════════════════════════════════════════
460
+ echo ""
461
+ echo "=== AC1 R6a — Laundering regex extended (R6 Fix 1) ==="
462
+
463
+ echo ""
464
+ echo "--- AC1.R6a.1: hasLaunderingOperator unit tests via require ---"
465
+ # Self-contained + portable: require hasLaunderingOperator from $ROOT (not a hardcoded
466
+ # session-scratchpad / worktree path, which is not present in CI).
467
+ _launder_js="$(mktemp -t launder_test.XXXXXX.js)"
468
+ cat > "$_launder_js" <<JS
469
+ const { hasLaunderingOperator } = require(process.env.ROOT + '/scripts/hooks/stop-goal-fit.js');
470
+ const flag = ['npm test || exit 0', 'npm test || echo ok', 'npm test || /bin/true', 'npm test || true', 'npm test ; true', 'npm test ; exit 0'];
471
+ const clean = ['npm test', 'npm run build && npm run eval:static', 'npm run lint'];
472
+ let ok = true;
473
+ for (const c of flag) { if (!hasLaunderingOperator(c)) { console.error('MISS (should flag): ' + c); ok = false; } }
474
+ for (const c of clean) { if (hasLaunderingOperator(c)) { console.error('OVER-FLAG (should not): ' + c); ok = false; } }
475
+ process.exit(ok ? 0 : 1);
476
+ JS
477
+ ROOT="$ROOT" node "$_launder_js" 2>&1
478
+ launder_exit=$?
479
+ rm -f "$_launder_js"
480
+ if [ "$launder_exit" -eq 0 ]; then
481
+ _pass "AC1.R6a: hasLaunderingOperator correctly flags new || forms and does not over-flag bare commands"
482
+ else
483
+ _fail "AC1.R6a: hasLaunderingOperator unit tests failed"
484
+ fi
485
+
486
+ echo ""
487
+ echo "--- AC1.R6a.2: Gate blocks npm test || exit 0 (claimed pass via laundered command) ---"
488
+ R6LA="$TMP/r6la-laundering"
489
+ seed_repo_inprogress "$R6LA" "launder-r6"
490
+ python3 - "$R6LA/.flow-agents/launder-r6/trust.bundle" "launder-r6" "npm test || exit 0" << 'PY'
491
+ import json, sys
492
+ bp, slug, cmd = sys.argv[1], sys.argv[2], sys.argv[3]
493
+ bundle = {
494
+ "schemaVersion": 3, "source": "flow-agents/workflow-sidecar",
495
+ "claims": [{"id":"c1","subjectId":slug+"/tests","subjectType":"flow-step",
496
+ "claimType":"builder.verify.tests","fieldOrBehavior":cmd,
497
+ "value":"pass","impactLevel":"high","status":"verified",
498
+ "createdAt":"2026-06-27T00:00:00Z","updatedAt":"2026-06-27T00:00:00Z"}],
499
+ "evidence": [{"id":"ev1","claimId":"c1","evidenceType":"command_output","method":"capture",
500
+ "sourceRef":"command-log.jsonl","excerptOrSummary":"exit 0 (laundered)",
501
+ "observedAt":"2026-06-27T00:00:00Z","collectedBy":"agent","passing":True,
502
+ "execution":{"label":cmd,"exitCode":0}}],
503
+ "policies":[],"events":[]
504
+ }
505
+ json.dump(bundle, open(bp, 'w'))
506
+ PY
507
+ printf '%s\n' '{"command":"npm test || exit 0","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
508
+ > "$R6LA/.flow-agents/launder-r6/command-log.jsonl"
509
+
510
+ set +e
511
+ r6la_out=$(run_gate "$R6LA")
512
+ r6la_exit=$?
513
+ set -e
514
+ if [ "$r6la_exit" -eq 2 ]; then
515
+ _pass "AC1.R6a.2: 'npm test || exit 0' claimed-pass BLOCKED (exit 2)"
516
+ else
517
+ _fail "AC1.R6a.2: 'npm test || exit 0' should be blocked, got exit=$r6la_exit. out=${r6la_out:0:200}"
518
+ fi
519
+ if echo "$r6la_out" | grep -q "exit-code-laundered\|laundering"; then
520
+ _pass "AC1.R6a.2: laundering warning emitted"
521
+ else
522
+ _fail "AC1.R6a.2: expected laundering warning not found. out=${r6la_out:0:200}"
523
+ fi
524
+
525
+ echo ""
526
+ echo "--- AC1.R6a.3: Gate blocks npm test || echo ok (claimed pass via laundered command) ---"
527
+ R6LB="$TMP/r6lb-laundering"
528
+ seed_repo_inprogress "$R6LB" "launder-r6b"
529
+ python3 - "$R6LB/.flow-agents/launder-r6b/trust.bundle" "launder-r6b" "npm test || echo ok" << 'PY'
530
+ import json, sys
531
+ bp, slug, cmd = sys.argv[1], sys.argv[2], sys.argv[3]
532
+ bundle = {
533
+ "schemaVersion": 3, "source": "test",
534
+ "claims": [{"id":"c1","subjectId":slug+"/tests","subjectType":"flow-step",
535
+ "claimType":"builder.verify.tests","fieldOrBehavior":cmd,
536
+ "value":"pass","impactLevel":"high","status":"verified",
537
+ "createdAt":"2026-06-27T00:00:00Z","updatedAt":"2026-06-27T00:00:00Z"}],
538
+ "evidence": [{"id":"ev1","claimId":"c1","evidenceType":"command_output","method":"capture",
539
+ "sourceRef":"command-log.jsonl","excerptOrSummary":"exit 0",
540
+ "observedAt":"2026-06-27T00:00:00Z","collectedBy":"agent","passing":True,
541
+ "execution":{"label":cmd,"exitCode":0}}],
542
+ "policies":[],"events":[]
543
+ }
544
+ json.dump(bundle, open(bp, 'w'))
545
+ PY
546
+ printf '%s\n' '{"command":"npm test || echo ok","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
547
+ > "$R6LB/.flow-agents/launder-r6b/command-log.jsonl"
548
+
549
+ set +e
550
+ r6lb_out=$(run_gate "$R6LB")
551
+ r6lb_exit=$?
552
+ set -e
553
+ if [ "$r6lb_exit" -eq 2 ]; then
554
+ _pass "AC1.R6a.3: 'npm test || echo ok' claimed-pass BLOCKED (exit 2)"
555
+ else
556
+ _fail "AC1.R6a.3: 'npm test || echo ok' should be blocked, got exit=$r6lb_exit"
557
+ fi
558
+
559
+ echo ""
560
+ echo "--- AC1.R6a.4: Gate blocks npm test || /bin/true (claimed pass via laundered command) ---"
561
+ R6LC="$TMP/r6lc-laundering"
562
+ seed_repo_inprogress "$R6LC" "launder-r6c"
563
+ python3 - "$R6LC/.flow-agents/launder-r6c/trust.bundle" "launder-r6c" "npm test || /bin/true" << 'PY'
564
+ import json, sys
565
+ bp, slug, cmd = sys.argv[1], sys.argv[2], sys.argv[3]
566
+ bundle = {
567
+ "schemaVersion": 3, "source": "test",
568
+ "claims": [{"id":"c1","subjectId":slug+"/tests","subjectType":"flow-step",
569
+ "claimType":"builder.verify.tests","fieldOrBehavior":cmd,
570
+ "value":"pass","impactLevel":"high","status":"verified",
571
+ "createdAt":"2026-06-27T00:00:00Z","updatedAt":"2026-06-27T00:00:00Z"}],
572
+ "evidence": [{"id":"ev1","claimId":"c1","evidenceType":"command_output","method":"capture",
573
+ "sourceRef":"command-log.jsonl","excerptOrSummary":"exit 0",
574
+ "observedAt":"2026-06-27T00:00:00Z","collectedBy":"agent","passing":True,
575
+ "execution":{"label":cmd,"exitCode":0}}],
576
+ "policies":[],"events":[]
577
+ }
578
+ json.dump(bundle, open(bp, 'w'))
579
+ PY
580
+ printf '%s\n' '{"command":"npm test || /bin/true","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
581
+ > "$R6LC/.flow-agents/launder-r6c/command-log.jsonl"
582
+
583
+ set +e
584
+ r6lc_out=$(run_gate "$R6LC")
585
+ r6lc_exit=$?
586
+ set -e
587
+ if [ "$r6lc_exit" -eq 2 ]; then
588
+ _pass "AC1.R6a.4: 'npm test || /bin/true' claimed-pass BLOCKED (exit 2)"
589
+ else
590
+ _fail "AC1.R6a.4: 'npm test || /bin/true' should be blocked, got exit=$r6lc_exit"
591
+ fi
592
+
593
+ echo ""
594
+ echo "--- AC1.R6a.5: Bare 'npm test' with PASS log NOT blocked (no over-flag) ---"
595
+ R6LD="$TMP/r6ld-legit"
596
+ seed_repo_inprogress "$R6LD" "legit-r6"
597
+ python3 - "$R6LD/.flow-agents/legit-r6/trust.bundle" "legit-r6" << 'PY'
598
+ import json, sys
599
+ bp, slug = sys.argv[1], sys.argv[2]
600
+ bundle = {
601
+ "schemaVersion": 3, "source": "test",
602
+ "claims": [{"id":"c1","subjectId":slug+"/tests","subjectType":"flow-step",
603
+ "claimType":"builder.verify.tests","fieldOrBehavior":"npm test",
604
+ "value":"pass","impactLevel":"high","status":"verified",
605
+ "createdAt":"2026-06-27T00:00:00Z","updatedAt":"2026-06-27T00:00:00Z"}],
606
+ "evidence": [{"id":"ev1","claimId":"c1","evidenceType":"command_output","method":"capture",
607
+ "sourceRef":"command-log.jsonl","excerptOrSummary":"pass",
608
+ "observedAt":"2026-06-27T00:00:00Z","collectedBy":"agent","passing":True,
609
+ "execution":{"label":"npm test","exitCode":0}}],
610
+ "policies":[],"events":[]
611
+ }
612
+ json.dump(bundle, open(bp, 'w'))
613
+ PY
614
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
615
+ > "$R6LD/.flow-agents/legit-r6/command-log.jsonl"
616
+
617
+ set +e
618
+ r6ld_out=$(run_gate "$R6LD")
619
+ r6ld_exit=$?
620
+ set -e
621
+ if ! echo "$r6ld_out" | grep -q "exit-code-laundered\|laundering operators"; then
622
+ _pass "AC1.R6a.5: bare 'npm test' NOT falsely flagged as laundering (no over-block)"
623
+ else
624
+ _fail "AC1.R6a.5: bare 'npm test' INCORRECTLY flagged as laundering. out=${r6ld_out:0:200}"
625
+ fi
626
+ echo " (Bare npm test exit: $r6ld_exit -- workflow-state warnings are OK)"
627
+
628
+ # ═══════════════════════════════════════════════════════════════════════════
629
+ # AC1 R6b — delivery/ path protection (R6 Fix 2)
630
+ # ═══════════════════════════════════════════════════════════════════════════
631
+ echo ""
632
+ echo "=== AC1 R6b — delivery/ path protection (R6 Fix 2) ==="
633
+ echo ""
634
+ echo " HONEST residual: runtime-constructed paths evade; publishDelivery CLI"
635
+ echo " uses fs.copyFileSync (not the Write/Edit tool or bash cp) -- unaffected."
636
+
637
+ echo ""
638
+ echo "--- AC1.23: Write/Edit to delivery/trust.bundle BLOCKED ---"
639
+ set +e
640
+ prot_out=$(echo '{"tool_name":"Write","tool_input":{"path":"/repo/delivery/trust.bundle"}}' | node "$PROT" 2>&1)
641
+ prot_exit=$?
642
+ set -e
643
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
644
+ _pass "AC1.23: Write to delivery/trust.bundle blocked (exit 2)"
645
+ else
646
+ _fail "AC1.23: Write to delivery/trust.bundle NOT blocked (exit=$prot_exit)"
647
+ fi
648
+
649
+ echo ""
650
+ echo "--- AC1.24: Write/Edit to delivery/trust.checkpoint.json BLOCKED ---"
651
+ set +e
652
+ prot_out=$(echo '{"tool_name":"Edit","tool_input":{"path":"delivery/trust.checkpoint.json"}}' | node "$PROT" 2>&1)
653
+ prot_exit=$?
654
+ set -e
655
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
656
+ _pass "AC1.24: Write to delivery/trust.checkpoint.json blocked (exit 2)"
657
+ else
658
+ _fail "AC1.24: Write to delivery/trust.checkpoint.json NOT blocked (exit=$prot_exit)"
659
+ fi
660
+
661
+ echo ""
662
+ echo "--- AC1.25: cp x delivery/trust.bundle BLOCKED (plain-cp attack) ---"
663
+ set +e
664
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"cp forged.json delivery/trust.bundle"}}' | node "$PROT" 2>&1)
665
+ prot_exit=$?
666
+ set -e
667
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
668
+ _pass "AC1.25: cp forged.json delivery/trust.bundle blocked (exit 2)"
669
+ else
670
+ _fail "AC1.25: cp to delivery/trust.bundle NOT blocked (exit=$prot_exit, out=$prot_out)"
671
+ fi
672
+
673
+ echo ""
674
+ echo "--- AC1.26: > delivery/trust.bundle BLOCKED (shell redirect) ---"
675
+ set +e
676
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"echo {} > delivery/trust.bundle"}}' | node "$PROT" 2>&1)
677
+ prot_exit=$?
678
+ set -e
679
+ if [ "$prot_exit" -eq 2 ] && echo "$prot_out" | grep -q "BLOCKED"; then
680
+ _pass "AC1.26: redirect > delivery/trust.bundle blocked (exit 2)"
681
+ else
682
+ _fail "AC1.26: redirect > delivery/trust.bundle NOT blocked (exit=$prot_exit)"
683
+ fi
684
+
685
+ echo ""
686
+ echo "--- AC1.27: cp x src/foo.ts ALLOWED (no over-block on normal copy) ---"
687
+ set +e
688
+ prot_out=$(echo '{"tool_name":"Bash","tool_input":{"command":"cp x src/foo.ts"}}' | node "$PROT" 2>&1)
689
+ prot_exit=$?
690
+ set -e
691
+ if [ "$prot_exit" -eq 0 ]; then
692
+ _pass "AC1.27: cp x src/foo.ts allowed (exit 0) — no over-block"
693
+ else
694
+ _fail "AC1.27: cp x src/foo.ts falsely blocked (exit=$prot_exit)"
695
+ fi
696
+
697
+ echo ""
698
+ echo "--- AC1.28: publishDelivery uses fs.copyFileSync (not bash cp) — unaffected ---"
699
+ node -e "
700
+ const fs = require('fs');
701
+ const src = fs.readFileSync('$ROOT/src/cli/workflow-sidecar.ts', 'utf8');
702
+ const hasFscp = /fs\.copyFileSync.*delivery/.test(src) || /copyFileSync\(bundleSrc/.test(src);
703
+ const noToolWrite = !/Write.*tool.*delivery/.test(src);
704
+ if (!hasFscp) { console.error('ERROR: publishDelivery does not use fs.copyFileSync to delivery'); process.exit(1); }
705
+ console.log('publishDelivery uses fs.copyFileSync to delivery/ (not bash cp or Write/Edit tool)');
706
+ " 2>&1 && _pass "AC1.28: publishDelivery CLI uses fs.copyFileSync — not affected by bash-cp block" \
707
+ || _fail "AC1.28: could not verify publishDelivery write method"
708
+
709
+ echo ""
710
+ echo " RESIDUAL gaps (honest):"
711
+ echo " - rsync, scp, dd targeting delivery/trust.bundle are NOT caught"
712
+ echo " - Runtime-constructed paths (e.g. path.join(dir, 'trust.bundle')) evade"
713
+ echo " - The real anchor is external: clean CI env + human review"
714
+
715
+
716
+ # ═══════════════════════════════════════════════════════════════════════════
717
+ # AC2 — MAX_BLOCKS cannot release a HARD block
718
+ # ═══════════════════════════════════════════════════════════════════════════
719
+ echo ""
720
+ echo "=== AC2 — MAX_BLOCKS hard-block guard ==="
721
+
722
+ echo ""
723
+ echo "--- AC2.1: MAX_BLOCKS=1 with caught-false-completion → still EXIT 2 (not released) ---"
724
+
725
+ AC2D="$TMP/ac2-hard"
726
+ seed_repo_inprogress "$AC2D" "ac2hard"
727
+ # Evidence claims npm test passed, but log shows it failed → caught false-completion
728
+ printf '%s' '{"schema_version":"1.0","task_slug":"ac2hard","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"passed"}]}' \
729
+ > "$AC2D/.flow-agents/ac2hard/evidence.json"
730
+ write_chained_fail_log "$AC2D/.flow-agents/ac2hard/command-log.jsonl" "npm test"
731
+
732
+ set +e
733
+ ac2h_1=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=1 FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
734
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC2D\"}")
735
+ ac2h_1_exit=$?
736
+
737
+ # Second call with MAX_BLOCKS=1 (would release a soft block, but not a hard one)
738
+ ac2h_2=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=1 FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
739
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC2D\"}")
740
+ ac2h_2_exit=$?
741
+ set -e
742
+
743
+ if [ "$ac2h_1_exit" -eq 2 ] && echo "$ac2h_1" | grep -q "caught false-completion\|CONTRADICTS"; then
744
+ _pass "AC2: first call: caught-false-completion blocks (exit 2)"
745
+ else
746
+ _fail "AC2: first call should block on caught-false-completion. exit=$ac2h_1_exit out=$ac2h_1"
747
+ fi
748
+
749
+ if [ "$ac2h_2_exit" -eq 2 ] && echo "$ac2h_2" | grep -q "not auto-releasing"; then
750
+ _pass "AC2: MAX_BLOCKS=1 with hard block → still EXIT 2 (not released, shows 'not auto-releasing')"
751
+ else
752
+ _fail "AC2: hard block should NOT release at MAX_BLOCKS. exit=$ac2h_2_exit out=${ac2h_2:0:300}"
753
+ fi
754
+
755
+ echo ""
756
+ echo "--- AC2.2: MAX_BLOCKS=1 with soft/advisory-only streak → may release (exit 0) ---"
757
+
758
+ AC2S="$TMP/ac2-soft"
759
+ seed_repo_inprogress "$AC2S" "ac2soft" "execution" "executing"
760
+ # Write a clean bundle with no disputed claims (just a workflow-state warning from status)
761
+ write_clean_bundle "$AC2S/.flow-agents/ac2soft/trust.bundle"
762
+ # Write empty command-log so missing-log doesn't fire
763
+ printf '' > "$AC2S/.flow-agents/ac2soft/command-log.jsonl"
764
+
765
+ set +e
766
+ # First call: blocks (streak=1, max=1 → already at max → soft: release)
767
+ ac2s_1=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=1 FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
768
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC2S\"}")
769
+ ac2s_1_exit=$?
770
+ set -e
771
+
772
+ if [ "$ac2s_1_exit" -eq 0 ] && echo "$ac2s_1" | grep -q "RELEASED after"; then
773
+ _pass "AC2: soft/advisory-only streak releases at MAX_BLOCKS=1 (exit 0, RELEASED after)"
774
+ else
775
+ _fail "AC2: soft streak should release at MAX_BLOCKS=1. exit=$ac2s_1_exit out=${ac2s_1:0:300}"
776
+ fi
777
+
778
+ echo ""
779
+ echo "--- AC2 summary ---"
780
+ echo " PRE-FIX: count >= maxBlocks → always clearBlockStreak + exit 0 (escape vector)"
781
+ echo " POST-FIX: count >= maxBlocks + hard block → exit 2, no auto-release"
782
+ echo " POST-FIX: count >= maxBlocks + soft only → exit 0, released (anti-loop preserved)"
783
+
784
+
785
+ # ═══════════════════════════════════════════════════════════════════════════
786
+ # AC3.1 — Surface unavailable + high-impact claim → BLOCKS
787
+ # ═══════════════════════════════════════════════════════════════════════════
788
+ echo ""
789
+ echo "=== AC3.1 — Surface unavailable fail-closed ==="
790
+ echo ""
791
+ echo "--- AC3.1a: Isolated (no @kontourai/surface) with high-impact claim → BLOCKS ---"
792
+
793
+ # Create isolated node context that can't find @kontourai/surface
794
+ ISO_DIR="$TMP/surface-iso"
795
+ mkdir -p "$ISO_DIR/repo/.flow-agents/surftest"
796
+ cp "$GATE" "$ISO_DIR/stop-goal-fit.js"
797
+ printf '# Repo\n' > "$ISO_DIR/repo/AGENTS.md"
798
+ # Non-terminal session (execution phase, in_progress status)
799
+ printf '%s' '{"schema_version":"1.0","task_slug":"surftest","status":"in_progress","phase":"execution","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"in_progress","summary":"running"}}' \
800
+ > "$ISO_DIR/repo/.flow-agents/surftest/state.json"
801
+ cat > "$ISO_DIR/repo/.flow-agents/surftest/surftest--deliver.md" << 'MD'
802
+ # surftest
803
+
804
+ branch: main
805
+ status: in_progress
806
+ type: deliver
807
+
808
+ ## Definition Of Done
809
+ - [ ] tests pass
810
+ MD
811
+ write_high_impact_bundle "$ISO_DIR/repo/.flow-agents/surftest/trust.bundle" "surftest" "verified"
812
+ # Empty log (non-missing)
813
+ printf '' > "$ISO_DIR/repo/.flow-agents/surftest/command-log.jsonl"
814
+
815
+ set +e
816
+ # Run in isolated dir with NODE_PATH=$ISO_DIR so @kontourai/surface cannot be found
817
+ surf_out=$(NODE_PATH="$ISO_DIR" FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
818
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
819
+ node "$ISO_DIR/stop-goal-fit.js" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$ISO_DIR/repo\"}")
820
+ surf_exit=$?
821
+ set -e
822
+
823
+ echo " Surface-isolated gate exit: $surf_exit (expected 2)"
824
+ if [ "$surf_exit" -eq 2 ]; then
825
+ _pass "AC3.1: surface unavailable + high-impact claim → BLOCKS (exit 2)"
826
+ else
827
+ _fail "AC3.1: expected exit 2 when surface unavailable. exit=$surf_exit"
828
+ fi
829
+ if echo "$surf_out" | grep -q "surface unavailable"; then
830
+ _pass "AC3.1: 'surface unavailable' warning emitted"
831
+ else
832
+ _fail "AC3.1: 'surface unavailable' warning NOT emitted. out=$surf_out"
833
+ fi
834
+
835
+ echo ""
836
+ echo "--- AC3.1b: Low-impact-only bundle with unavailable surface → NOT blocked ---"
837
+
838
+ ISO2_DIR="$TMP/surface-iso2"
839
+ mkdir -p "$ISO2_DIR/repo/.flow-agents/lowtest"
840
+ cp "$GATE" "$ISO2_DIR/stop-goal-fit.js"
841
+ printf '# Repo\n' > "$ISO2_DIR/repo/AGENTS.md"
842
+ printf '%s' '{"schema_version":"1.0","task_slug":"lowtest","status":"in_progress","phase":"execution","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"in_progress","summary":"running"}}' \
843
+ > "$ISO2_DIR/repo/.flow-agents/lowtest/state.json"
844
+ cat > "$ISO2_DIR/repo/.flow-agents/lowtest/lowtest--deliver.md" << 'MD'
845
+ # lowtest
846
+
847
+ branch: main
848
+ status: in_progress
849
+ type: deliver
850
+
851
+ ## Definition Of Done
852
+ - [ ] tests pass
853
+ MD
854
+ # Low-impact claim only
855
+ python3 - "$ISO2_DIR/repo/.flow-agents/lowtest/trust.bundle" "lowtest" << 'PY'
856
+ import json, sys
857
+ bp, slug = sys.argv[1], sys.argv[2]
858
+ bundle = {
859
+ "schemaVersion": 3, "source": "test",
860
+ "claims": [{
861
+ "id": "c-low",
862
+ "subjectId": slug + "/docs",
863
+ "subjectType": "workflow-check",
864
+ "claimType": "workflow.check.command",
865
+ "fieldOrBehavior": "docs",
866
+ "value": "pass",
867
+ "impactLevel": "low", # low impact — should NOT cause surface-unavailable block
868
+ "status": "verified",
869
+ "createdAt": "2026-06-27T00:00:00Z",
870
+ "updatedAt": "2026-06-27T00:00:00Z"
871
+ }],
872
+ "evidence": [], "policies": [], "events": []
873
+ }
874
+ json.dump(bundle, open(bp, 'w'))
875
+ PY
876
+ printf '' > "$ISO2_DIR/repo/.flow-agents/lowtest/command-log.jsonl"
877
+
878
+ set +e
879
+ surf2_out=$(NODE_PATH="$ISO2_DIR" FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
880
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
881
+ node "$ISO2_DIR/stop-goal-fit.js" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$ISO2_DIR/repo\"}")
882
+ surf2_exit=$?
883
+ set -e
884
+
885
+ if ! echo "$surf2_out" | grep -q "surface unavailable"; then
886
+ _pass "AC3.1: low-impact-only bundle → no surface-unavailable warning (noise reduction)"
887
+ else
888
+ _fail "AC3.1: low-impact bundle should NOT emit surface-unavailable warning. out=$surf2_out"
889
+ fi
890
+
891
+
892
+ # ═══════════════════════════════════════════════════════════════════════════
893
+ # AC3.2 — Missing command log
894
+ # ═══════════════════════════════════════════════════════════════════════════
895
+ echo ""
896
+ echo "=== AC3.2 — Missing command log fail-closed ==="
897
+
898
+ echo ""
899
+ echo "--- AC3.2a: Post-execution session with command-log deleted → BLOCKS ---"
900
+ echo " (#216 fix: missing-log guard requires evidence.execution.label to distinguish"
901
+ echo " from a legit no-command session. This bundle has execution.label=npm-test.)"
902
+
903
+ AC3D="$TMP/ac3-postexec"
904
+ seed_repo_inprogress "$AC3D" "postex" "execution" "in_progress"
905
+ # Write a bundle with execution.label to indicate a command was expected to be captured.
906
+ # This simulates a session where the agent ran commands (evidence.execution.label present)
907
+ # but deleted command-log.jsonl. The #216 guard uses execution.label to distinguish this
908
+ # from a legitimate no-command session (no execution.label → no missing-log warning).
909
+ python3 - "$AC3D/.flow-agents/postex/trust.bundle" << 'PY'
910
+ import json, sys
911
+ bundle = {
912
+ "schemaVersion": 3, "source": "test",
913
+ "claims": [],
914
+ "evidence": [{
915
+ "id": "ev-captured", "claimId": None,
916
+ "evidenceType": "command_output", "method": "capture",
917
+ "sourceRef": "command-log.jsonl",
918
+ "excerptOrSummary": "npm test was expected to run (log deleted by attacker)",
919
+ "observedAt": "2026-06-27T00:00:00Z", "collectedBy": "agent",
920
+ "passing": True,
921
+ "execution": {"label": "npm test", "exitCode": 0}
922
+ }],
923
+ "policies": [], "events": []
924
+ }
925
+ json.dump(bundle, open(sys.argv[1], 'w'))
926
+ PY
927
+ # DO NOT write command-log.jsonl (simulates deletion of the capture truth source)
928
+
929
+ set +e
930
+ ac3_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
931
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
932
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC3D\"}")
933
+ ac3_exit=$?
934
+ set -e
935
+
936
+ if [ "$ac3_exit" -eq 2 ]; then
937
+ _pass "AC3.2: post-execution missing command log → BLOCKS (exit 2)"
938
+ else
939
+ _fail "AC3.2: expected exit 2 for missing log in post-execution. exit=$ac3_exit"
940
+ fi
941
+ if echo "$ac3_out" | grep -q "expected capture log is missing"; then
942
+ _pass "AC3.2: 'expected capture log is missing' warning emitted"
943
+ else
944
+ _fail "AC3.2: missing-log warning NOT emitted. out=${ac3_out:0:300}"
945
+ fi
946
+
947
+ echo ""
948
+ echo "--- AC3.2b: Pre-execution session (planning/planned) with no log → NOT blocked ---"
949
+
950
+ AC3P="$TMP/ac3-preexec"
951
+ seed_repo_preexec "$AC3P" "preex"
952
+ write_clean_bundle "$AC3P/.flow-agents/preex/trust.bundle"
953
+ # No command-log.jsonl — pre-execution sessions have no commands yet
954
+
955
+ set +e
956
+ ac3p_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
957
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
958
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC3P\"}")
959
+ ac3p_exit=$?
960
+ set -e
961
+
962
+ if ! echo "$ac3p_out" | grep -q "expected capture log is missing"; then
963
+ _pass "AC3.2: pre-execution session (planning/planned) → NOT blocked by missing log"
964
+ else
965
+ _fail "AC3.2: pre-execution should NOT emit missing-log warning. out=${ac3p_out:0:300}"
966
+ fi
967
+ echo " Pre-execution exit: $ac3p_exit (0 or soft block is OK — not from missing log)"
968
+
969
+ echo ""
970
+ echo "--- AC3.2c: Post-execution session WITH command log present → NOT falsely blocked ---"
971
+
972
+ AC3C="$TMP/ac3-cleanlog"
973
+ seed_repo_inprogress "$AC3C" "cleanlog" "execution" "in_progress"
974
+ write_clean_bundle "$AC3C/.flow-agents/cleanlog/trust.bundle"
975
+ write_chained_fail_log "$AC3C/.flow-agents/cleanlog/command-log.jsonl" "echo hello"
976
+ # Override: mark the command as pass in evidence to avoid false-completion blocking
977
+ printf '%s' '{"schema_version":"1.0","task_slug":"cleanlog","verdict":"pass","checks":[]}' \
978
+ > "$AC3C/.flow-agents/cleanlog/evidence.json"
979
+
980
+ set +e
981
+ ac3c_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
982
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
983
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$AC3C\"}")
984
+ ac3c_exit=$?
985
+ set -e
986
+
987
+ if ! echo "$ac3c_out" | grep -q "expected capture log is missing"; then
988
+ _pass "AC3.2: session with command log present → no missing-log warning (correct)"
989
+ else
990
+ _fail "AC3.2: false-positive missing-log warning when log exists. out=${ac3c_out:0:300}"
991
+ fi
992
+
993
+
994
+ # ═══════════════════════════════════════════════════════════════════════════
995
+ # AC3.3 — CHAIN_GENESIS comment corrected
996
+ # ═══════════════════════════════════════════════════════════════════════════
997
+ echo ""
998
+ echo "=== AC3.3 — CHAIN_GENESIS comment correctness ==="
999
+
1000
+ echo ""
1001
+ echo "--- AC3.3a: evidence-capture.js no longer claims sha256(genesis_string) ---"
1002
+ if grep -q 'fixed arbitrary sentinel\|NOT the SHA256\|previous.*incorrect' "$ROOT/scripts/hooks/evidence-capture.js"; then
1003
+ _pass "AC3.3: evidence-capture.js CHAIN_GENESIS comment corrected (no false sha256 claim)"
1004
+ else
1005
+ _fail "AC3.3: evidence-capture.js still has incorrect CHAIN_GENESIS comment"
1006
+ fi
1007
+
1008
+ echo ""
1009
+ echo "--- AC3.3b: stop-goal-fit.js comment corrected ---"
1010
+ if grep -q 'fixed arbitrary sentinel\|NOT the SHA256\|previous.*incorrect' "$ROOT/scripts/hooks/stop-goal-fit.js"; then
1011
+ _pass "AC3.3: stop-goal-fit.js CHAIN_GENESIS_VERIFY comment corrected"
1012
+ else
1013
+ _fail "AC3.3: stop-goal-fit.js still has incorrect comment"
1014
+ fi
1015
+
1016
+ echo ""
1017
+ echo "--- AC3.3c: Both files use the SAME genesis constant value ---"
1018
+ genesis_ec=$(grep "const CHAIN_GENESIS = " "$ROOT/scripts/hooks/evidence-capture.js" | sed "s/.*= '//;s/'.*//")
1019
+ genesis_sg=$(grep "const CHAIN_GENESIS_VERIFY = " "$ROOT/scripts/hooks/stop-goal-fit.js" | sed "s/.*= '//;s/'.*//")
1020
+ if [ "$genesis_ec" = "$genesis_sg" ] && [ -n "$genesis_ec" ]; then
1021
+ _pass "AC3.3: Both files use the same genesis constant ($genesis_ec)"
1022
+ else
1023
+ _fail "AC3.3: Genesis constant mismatch: evidence-capture=$genesis_ec stop-goal-fit=$genesis_sg"
1024
+ fi
1025
+
1026
+
1027
+ # ═══════════════════════════════════════════════════════════════════════════
1028
+ # Over-block check: normal session (surface present, real log) NOT falsely blocked
1029
+ # ═══════════════════════════════════════════════════════════════════════════
1030
+ echo ""
1031
+ echo "=== Over-block check: normal session NOT falsely blocked ==="
1032
+
1033
+ OVR="$TMP/overblock"
1034
+ seed_repo_inprogress "$OVR" "normal" "execution" "in_progress"
1035
+ write_clean_bundle "$OVR/.flow-agents/normal/trust.bundle"
1036
+ # Write a valid command log (empty — no claims to cross-reference)
1037
+ printf '' > "$OVR/.flow-agents/normal/command-log.jsonl"
1038
+
1039
+ set +e
1040
+ ovr_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000 \
1041
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
1042
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$OVR\"}")
1043
+ ovr_exit=$?
1044
+ set -e
1045
+
1046
+ if ! echo "$ovr_out" | grep -q "expected capture log is missing\|surface unavailable"; then
1047
+ _pass "Over-block: normal session with present log → no false missing-log or surface-unavailable warning"
1048
+ else
1049
+ _fail "Over-block: false warning emitted for normal session. out=${ovr_out:0:300}"
1050
+ fi
1051
+ echo " Normal session exit: $ovr_exit (workflow-state warnings are expected for in-progress)"
1052
+
1053
+
1054
+ # ═══════════════════════════════════════════════════════════════════════════
1055
+ # Diff scope check
1056
+ # ═══════════════════════════════════════════════════════════════════════════
1057
+ echo ""
1058
+ echo "=== Diff scope check ==="
1059
+
1060
+ # Verify that ONLY the allowed files were modified.
1061
+ # Round 2 (fix/gate-lockdown) scope: config-protection.js, stop-goal-fit.js, evidence-capture.js.
1062
+ # The security-hardening files (config-protection.js, stop-goal-fit.js, workflow-sidecar.ts,
1063
+ # flow-resolver.ts, evidence-capture.js) are legitimately modified across the rounds, so the
1064
+ # only true invariants this scope-check enforces are the hard collision boundaries.
1065
+ # Use grep patterns to avoid triggering the source path validator.
1066
+ FORBIDDEN_MODIFIED=""
1067
+ FORBIDDEN_PATTERNS=(
1068
+ "kits/knowledge/"
1069
+ )
1070
+ # continue-work: the collision boundary skill file (not in scripts/hooks/).
1071
+ # Use a conservative basename check to avoid a false src-path reference.
1072
+ FORBIDDEN_PATTERNS+=("continue-work")
1073
+ for pat in "${FORBIDDEN_PATTERNS[@]}"; do
1074
+ if git -C "$ROOT" diff --name-only HEAD 2>/dev/null | grep -q "$pat"; then
1075
+ FORBIDDEN_MODIFIED="$FORBIDDEN_MODIFIED $pat"
1076
+ fi
1077
+ done
1078
+
1079
+ if [ -z "$FORBIDDEN_MODIFIED" ]; then
1080
+ _pass "Diff scope: no forbidden files modified"
1081
+ else
1082
+ _fail "Diff scope: FORBIDDEN files modified:$FORBIDDEN_MODIFIED"
1083
+ fi
1084
+
1085
+ # Verify the expected files were modified (cumulative R3+R5a scope)
1086
+ EXPECTED_CHANGED=0
1087
+ for f in \
1088
+ "scripts/hooks/config-protection.js" \
1089
+ "src/cli/workflow-sidecar.ts" \
1090
+ "scripts/hooks/stop-goal-fit.js"
1091
+ do
1092
+ if git -C "$ROOT" diff --name-only HEAD 2>/dev/null | grep -q "$f" || \
1093
+ git -C "$ROOT" status --short 2>/dev/null | grep -q "$f"; then
1094
+ EXPECTED_CHANGED=$((EXPECTED_CHANGED + 1))
1095
+ fi
1096
+ done
1097
+ if [ "$EXPECTED_CHANGED" -ge 1 ]; then
1098
+ _pass "Diff scope: expected fix files modified"
1099
+ else
1100
+ # Fallback: check the security-hardening fix patterns are present in the files
1101
+ if grep -q "INTERPRETER_WRITE_RE" "$ROOT/scripts/hooks/config-protection.js" && \
1102
+ grep -q "trust.bundle" "$ROOT/scripts/hooks/config-protection.js"; then
1103
+ _pass "Diff scope: R5a-protect fix patterns present in config-protection.js"
1104
+ elif grep -q "hasLaunderingOperator\|capturedFailReconciliation" "$ROOT/scripts/hooks/stop-goal-fit.js"; then
1105
+ _pass "Diff scope: security fix patterns present in stop-goal-fit.js"
1106
+ else
1107
+ _fail "Diff scope: expected fix changes not found in files"
1108
+ fi
1109
+ fi
1110
+
1111
+
1112
+ # Summary
1113
+ # ═══════════════════════════════════════════════════════════════════════════
1114
+ echo ""
1115
+ echo "================================================================="
1116
+ if [ "$errors" -eq 0 ]; then
1117
+ echo "PASS Gate lock-down eval: all $(($(grep -c '^\s*_pass\|^\s*_fail' "$0" || echo '?'))) checks passed."
1118
+ echo ""
1119
+ echo "Security coverage summary (HONEST):"
1120
+ echo " AC1: Write/Edit to kill-switch files blocked; redirect check covers > >> tee"
1121
+ echo " R5a: state.json+trust.bundle Write/Edit blocked; interpreter-write check"
1122
+ echo " (node -e, python3 -c, sed -i, perl -e) for literal path tokens."
1123
+ echo " R6a: hasLaunderingOperator extended -- ANY || flagged (|| exit 0, || echo ok, || /bin/true, etc.)"
1124
+ echo " R6b: delivery/trust.bundle + delivery/trust.checkpoint.json protected (Write/Edit + > + cp/mv blocked)"
1125
+ echo " INCOMPLETE: process substitution, runtime path construction, rsync/scp/dd evade"
1126
+ echo " AC2: Hard blocks (false-completion, integrity failure) NOT auto-released by MAX_BLOCKS"
1127
+ echo " INCOMPLETE: agent can reset streak by deleting .goal-fit-block-streak.json via Bash"
1128
+ echo " AC3.1: surface-unavailable + high-impact → FULL_BLOCK warning; low-impact non-blocking"
1129
+ echo " INCOMPLETE: agent could delete @kontourai/surface to force this path"
1130
+ echo " AC3.2: missing log in post-execution → FULL_BLOCK; pre-execution → not blocked"
1131
+ echo " INCOMPLETE: agent could forge state.json with pre-execution phase"
1132
+ echo " AC3.3: CHAIN_GENESIS comment corrected in both files (was falsely claiming sha256)"
1133
+ echo " RESIDUAL: real anchor is external — signed checkpoints + human review"
1134
+ exit 0
1135
+ fi
1136
+ echo "FAIL Gate lock-down eval: $errors check(s) failed."
1137
+ exit 1