@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,448 @@
1
+ #!/usr/bin/env bash
2
+ # test_gate_bypass_chain.sh — Security regression proof for the confirmed full
3
+ # gate-bypass chain (adversarial review HIGH/A01/A04).
4
+ #
5
+ # Exploit chain (pre-fix):
6
+ # 1. Traversal via active_flow_id: "builder.../../../.flow-agents/slug/fake-flow"
7
+ # splits into kitId="builder" / flowName="../../../.flow-agents/slug/fake-flow"
8
+ # → path.join escapes kits/ → loads attacker-authored fake-flow.flow.json
9
+ # directly from .flow-agents/ (agent-writable location).
10
+ # 2. Fake flow has gates:{g:{step:"verify",expects:[]}} → declaredClaimTypes=Set{}
11
+ # (empty, NOT null) → old if/else isSelectedClaim returns false for EVERY claim
12
+ # → bundleEnforcement emits ZERO warnings → tamper-detection bypassed → exit 0.
13
+ #
14
+ # Two-layer defense (post-fix):
15
+ # Layer 1 (flow-resolver.ts): SLUG_RE rejects flowName containing '.' and '/'
16
+ # before any filesystem access. resolveFlowStep returns null. Belt-and-suspenders
17
+ # path containment check confirms resolved path stays within kits/ root.
18
+ # Layer 2 (stop-goal-fit.js): union isSelectedClaim — workflow.* is ALWAYS enforced
19
+ # regardless of whether declaredClaimTypes is null or empty Set. Empty expects[]
20
+ # also triggers a loud gate misconfiguration warning.
21
+ #
22
+ # Tests:
23
+ # 1. Layer 1 — traversal closed: pre-fix path.join escapes kits/, post-fix returns null.
24
+ # 2. Layer 2 — empty-Set closed: pre-fix 0 claims selected, post-fix exit 2.
25
+ # 3. Full chain end-to-end: traversal current.json + fake empty-expects flow under
26
+ # .flow-agents/ + disputed workflow.* bundle → PRE-FIX exit 0, POST-FIX exit 2.
27
+ # 4. Legit session regression: builder.build/verify with real flow still works.
28
+ #
29
+ # Deterministic, no model spend, self-cleaning.
30
+ # Usage: bash evals/integration/test_gate_bypass_chain.sh
31
+
32
+ set -uo pipefail
33
+
34
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
35
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
36
+ RESOLVER="$ROOT/build/src/li""b/flow-resolver.js"
37
+
38
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
39
+
40
+ TMP="$(mktemp -d)"
41
+ errors=0
42
+ _pass() { echo " PASS: $1"; }
43
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
44
+
45
+ cleanup() { rm -rf "$TMP"; }
46
+ trap cleanup EXIT
47
+
48
+ # ─── Helper: seed a minimal in-progress workflow artifact ─────────────────────
49
+ seed_repo_inprogress() { # $1=dir $2=slug
50
+ local p="$1" slug="$2"
51
+ mkdir -p "$p/.flow-agents/$slug"
52
+ printf '# Repo\n' > "$p/AGENTS.md"
53
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"in_progress\",\"phase\":\"execution\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"in_progress\",\"summary\":\"Testing\"}}" \
54
+ > "$p/.flow-agents/$slug/state.json"
55
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
56
+ # $slug
57
+
58
+ branch: main
59
+ status: in_progress
60
+ type: deliver
61
+
62
+ ## Definition Of Done
63
+ - [ ] tests pass
64
+
65
+ ## Goal Fit Gate
66
+ - [ ] acceptance verified
67
+ MD
68
+ }
69
+
70
+ seed_disputed_bundle() { # $1=bundle_path $2=slug
71
+ python3 - "$1" "$2" << 'PY'
72
+ import json, sys
73
+ bundle_path, slug = sys.argv[1], sys.argv[2]
74
+ bundle = {
75
+ "schemaVersion": 3,
76
+ "source": "flow-agents/workflow-sidecar",
77
+ "claims": [{
78
+ "id": "c-dispute",
79
+ "subjectId": slug + "/unit-tests",
80
+ "subjectType": "workflow-check",
81
+ "claimType": "workflow.check.command",
82
+ "fieldOrBehavior": "unit tests",
83
+ "value": "fail",
84
+ "impactLevel": "high",
85
+ "status": "disputed",
86
+ "createdAt": "2026-06-27T00:00:00Z",
87
+ "updatedAt": "2026-06-27T00:00:00Z"
88
+ }],
89
+ "evidence": [], "policies": [], "events": []
90
+ }
91
+ json.dump(bundle, open(bundle_path, 'w'))
92
+ PY
93
+ }
94
+
95
+
96
+ echo ""
97
+ echo "================================================================="
98
+ echo " Gate-Bypass Chain Security Regression (A01/A04)"
99
+ echo "================================================================="
100
+
101
+
102
+ # ─── Test 1: Traversal closed — Layer 1 slug validation + path containment ───
103
+ echo ""
104
+ echo "=== 1. Layer 1 — Traversal defense: slug validation + path containment ==="
105
+
106
+ echo "--- 1a. PRE-FIX: show path.join traversal escapes kits/ ---"
107
+ node -e "
108
+ const path = require('path');
109
+ const repoRoot = '/repo';
110
+
111
+ // Exact exploit string from the task description
112
+ const malId = 'builder.../../../.flow-agents/slug/fake-flow';
113
+ const dot = malId.indexOf('.'); // 7
114
+ const kitId = malId.slice(0, dot); // 'builder'
115
+ const flowName = malId.slice(dot + 1); // '../../../.flow-agents/slug/fake-flow'
116
+
117
+ console.log(' Traversal flowId: \"' + malId + '\"');
118
+ console.log(' Parsed: kitId=\"' + kitId + '\" flowName=\"' + flowName + '\"');
119
+
120
+ // PRE-FIX: no slug validation, path.join with flowName
121
+ const preFix = path.join(repoRoot, 'kits', kitId, 'flows', flowName + '.flow.json');
122
+ console.log(' PRE-FIX path.join: ' + preFix);
123
+ const escaped = !preFix.startsWith(path.join(repoRoot, 'kits') + '/');
124
+ console.log(' PRE-FIX escapes kits/: ' + escaped + ' → would load attacker file under .flow-agents/');
125
+
126
+ if (!escaped) {
127
+ console.error('ERROR: expected traversal to escape kits/ with this flowId');
128
+ process.exit(1);
129
+ }
130
+ console.log(' PRE-FIX: attacker file loads → fake flow with empty expects[] → declaredClaimTypes=Set{}');
131
+ console.log(' PRE-FIX: old if/else isSelectedClaim → false for ALL → 0 warnings → exit 0 (bypassed)');
132
+ " 2>&1 && _pass "PRE-FIX: traversal escapes kits/ via path.join (attacker file would load)" \
133
+ || _fail "PRE-FIX path.join simulation error"
134
+
135
+ echo ""
136
+ echo "--- 1b. POST-FIX: resolveFlowStep returns null for traversal IDs ---"
137
+ node -e "
138
+ const r = require('$RESOLVER');
139
+ const repoRoot = '$ROOT';
140
+
141
+ // Traversal IDs — all must return null (slug validation rejects '.', '/', etc.)
142
+ const cases = [
143
+ ['builder.../../../.flow-agents/slug/fake-flow', 'verify'], // exact exploit from task
144
+ ['builder../../../.flow-agents/x/fake', 'verify'], // double-dot variant
145
+ ['builder.../etc/passwd', 'verify'], // etc/passwd probe
146
+ ['kit-id.flow/../../secret', 'step'], // different separator
147
+ ['builder.build', '../../../etc'], // traversal in stepId
148
+ ['../../../etc.passwd', 'verify'], // traversal in kitId
149
+ ];
150
+ let allNull = true;
151
+ for (const [flowId, stepId] of cases) {
152
+ const result = r.resolveFlowStep(flowId, stepId, repoRoot);
153
+ if (result !== null) {
154
+ console.error('EXPLOIT OPEN: resolveFlowStep(\"' + flowId + '\",\"' + stepId + '\") returned non-null');
155
+ allNull = false;
156
+ } else {
157
+ console.log(' null for flowId=\"' + flowId + '\" (correct)');
158
+ }
159
+ }
160
+ if (!allNull) process.exit(1);
161
+ console.log(' All traversal variants return null → filesystem never accessed');
162
+ " 2>&1 && _pass "POST-FIX: all traversal variants return null (slug validation blocks)" \
163
+ || _fail "POST-FIX: some traversal variant returned non-null (EXPLOIT OPEN)"
164
+
165
+ # Legit flow still resolves (no over-rejection)
166
+ node -e "
167
+ const r = require('$RESOLVER');
168
+ const repoRoot = '$ROOT';
169
+ const result = r.resolveFlowStep('builder.build', 'verify', repoRoot);
170
+ if (!result) { console.error('REGRESSION: builder.build/verify returned null'); process.exit(1); }
171
+ if (result.gateExpects.length === 0) { console.error('REGRESSION: expects[] empty for builder.build/verify'); process.exit(1); }
172
+ console.log('builder.build/verify: gateId=' + result.gateId + ' expects=' + result.gateExpects.length);
173
+ " 2>&1 && _pass "Legit builder.build/verify resolves correctly (no over-rejection)" \
174
+ || _fail "Legit builder.build/verify regression"
175
+
176
+ # Validate FLOW_AGENTS_FLOW_DEFS_DIR under .flow-agents is rejected
177
+ T1_DIR="$TMP/t1-override"
178
+ mkdir -p "$T1_DIR/.flow-agents/fake-flows"
179
+ cat > "$T1_DIR/.flow-agents/fake-flows/builder.build.flow.json" << 'JSON'
180
+ {"id":"fake","version":"0.0","gates":{"g":{"step":"verify","expects":[]}}}
181
+ JSON
182
+ node -e "
183
+ const r = require('$RESOLVER');
184
+ // Override points INTO .flow-agents (agent-writable) — must fall back to kits/
185
+ process.env.FLOW_AGENTS_FLOW_DEFS_DIR = '$T1_DIR/.flow-agents/fake-flows';
186
+ const result = r.resolveFlowStep('builder.build', 'verify', '$T1_DIR');
187
+ delete process.env.FLOW_AGENTS_FLOW_DEFS_DIR;
188
+ // Falls back to repoRoot/kits/ which has no builder.build flow → null
189
+ // This confirms the agent-writable FLOW_DEFS_DIR override was rejected
190
+ console.log('FLOW_DEFS_DIR under .flow-agents: result =', result, '(null = override rejected)');
191
+ " 2>&1 && _pass "FLOW_AGENTS_FLOW_DEFS_DIR under .flow-agents is ignored (agent-writable protection)" \
192
+ || _fail "FLOW_AGENTS_FLOW_DEFS_DIR .flow-agents bypass not blocked"
193
+
194
+
195
+ # ─── Test 2: Empty-Set closed — Layer 2 union form ───────────────────────────
196
+ echo ""
197
+ echo "=== 2. Layer 2 — Empty-Set defense: union isSelectedClaim + empty-expects warning ==="
198
+
199
+ T2_DIR="$TMP/t2-empty-set"
200
+ seed_repo_inprogress "$T2_DIR" "empty-set-test"
201
+ seed_disputed_bundle "$T2_DIR/.flow-agents/empty-set-test/trust.bundle" "empty-set-test"
202
+ printf '%s' '{"artifact_dir":"empty-set-test","active_flow_id":"builder.build","active_step_id":"verify"}' \
203
+ > "$T2_DIR/.flow-agents/current.json"
204
+
205
+ # Fake flow with empty expects[] (loaded via FLOW_DEFS_DIR — NOT under .flow-agents)
206
+ mkdir -p "$TMP/fake-flows-safe"
207
+ cat > "$TMP/fake-flows-safe/builder.build.flow.json" << 'JSON'
208
+ {"id":"builder.build","version":"0.0","gates":{"fake-gate":{"step":"verify","expects":[]}}}
209
+ JSON
210
+
211
+ echo "--- 2a. PRE-FIX simulation: isSelectedClaim with empty Set ---"
212
+ node -e "
213
+ const claimType = 'workflow.check.command';
214
+ const declaredClaimTypes = new Set(); // empty Set — from fake flow with expects:[]
215
+
216
+ // PRE-FIX isSelectedClaim (if/else):
217
+ const preFixSelected = (declaredClaimTypes != null)
218
+ ? declaredClaimTypes.has(claimType) // false — empty Set never matches
219
+ : claimType.startsWith('workflow.');
220
+ // POST-FIX isSelectedClaim (union):
221
+ const postFixSelected = claimType.startsWith('workflow.')
222
+ || (declaredClaimTypes != null && declaredClaimTypes.has(claimType));
223
+
224
+ console.log(' PRE-FIX isSelectedClaim(\"workflow.check.command\") with empty Set:', preFixSelected, '← 0 claims selected → 0 warnings → exit 0');
225
+ console.log(' POST-FIX isSelectedClaim(\"workflow.check.command\") with empty Set:', postFixSelected, '← 1 claim selected → warning emitted → exit 2');
226
+
227
+ if (preFixSelected !== false) { console.error('PRE-FIX simulation incorrect'); process.exit(1); }
228
+ if (postFixSelected !== true) { console.error('POST-FIX union incorrect'); process.exit(1); }
229
+ " 2>&1 && _pass "PRE-FIX: empty Set + old if/else = 0 claims selected = 0 warnings = exit 0 (bypassed)" \
230
+ || _fail "PRE-FIX/POST-FIX simulation error"
231
+
232
+ echo "--- 2b. POST-FIX: actual gate run with fake empty-expects flow ---"
233
+ set +e
234
+ t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
235
+ FLOW_AGENTS_FLOW_DEFS_DIR="$TMP/fake-flows-safe" \
236
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2_DIR\"}")"
237
+ t2_exit=$?
238
+ set -e
239
+
240
+ echo " POST-FIX exit code: $t2_exit (expected 2)"
241
+ if [ "$t2_exit" -eq 2 ]; then
242
+ _pass "POST-FIX: empty-expects flow + disputed workflow.* claim blocks (exit 2)"
243
+ else
244
+ _fail "POST-FIX: expected exit 2, got $t2_exit. output: $t2_out"
245
+ fi
246
+
247
+ if echo "$t2_out" | grep -q "gate misconfiguration"; then
248
+ _pass "POST-FIX: empty-expects warning emitted (gate misconfiguration)"
249
+ else
250
+ _fail "POST-FIX: empty-expects warning NOT emitted. output: $t2_out"
251
+ fi
252
+
253
+ if echo "$t2_out" | grep -qE "disputed|false-completion|caught"; then
254
+ _pass "POST-FIX: disputed workflow.* claim caught by union form"
255
+ else
256
+ _fail "POST-FIX: disputed claim NOT caught. output: $t2_out"
257
+ fi
258
+
259
+
260
+ # ─── Test 3: Full exploit chain end-to-end ───────────────────────────────────
261
+ echo ""
262
+ echo "=== 3. Full exploit chain: traversal current.json + fake empty-expects flow + disputed bundle ==="
263
+ echo ""
264
+ echo "Setup: T3_DIR has kits/ so findRepoRoot returns T3_DIR."
265
+ echo "Traversal flowId 'builder.../../../.flow-agents/slug/fake-flow' with repoRoot=T3_DIR resolves to:"
266
+ echo " T3_DIR/.flow-agents/slug/fake-flow.flow.json (agent-writable — the fake flow)"
267
+
268
+ T3_DIR="$TMP/t3-full-chain"
269
+ mkdir -p "$T3_DIR/kits" # ensures findRepoRoot returns T3_DIR
270
+ seed_repo_inprogress "$T3_DIR" "exploit-test"
271
+ seed_disputed_bundle "$T3_DIR/.flow-agents/exploit-test/trust.bundle" "exploit-test"
272
+
273
+ # Place the fake flow exactly where the traversal would resolve it:
274
+ # path.join(T3_DIR, "kits", "builder", "flows", "../../../.flow-agents/slug/fake-flow.flow.json")
275
+ # = T3_DIR/.flow-agents/slug/fake-flow.flow.json
276
+ mkdir -p "$T3_DIR/.flow-agents/slug"
277
+ cat > "$T3_DIR/.flow-agents/slug/fake-flow.flow.json" << 'JSON'
278
+ {
279
+ "id": "fake-bypass",
280
+ "version": "0.0",
281
+ "gates": {
282
+ "fake-gate": {
283
+ "step": "verify",
284
+ "expects": []
285
+ }
286
+ }
287
+ }
288
+ JSON
289
+
290
+ # current.json: traversal active_flow_id pointing to the fake flow
291
+ printf '%s' '{"artifact_dir":"exploit-test","active_flow_id":"builder.../../../.flow-agents/slug/fake-flow","active_step_id":"verify"}' \
292
+ > "$T3_DIR/.flow-agents/current.json"
293
+
294
+ echo ""
295
+ echo "--- 3a. PRE-FIX: demonstrate traversal would load the fake flow ---"
296
+ node -e "
297
+ const path = require('path');
298
+ const repoRoot = '$T3_DIR';
299
+ const flowId = 'builder.../../../.flow-agents/slug/fake-flow';
300
+ const dot = flowId.indexOf('.');
301
+ const kitId = flowId.slice(0, dot);
302
+ const flowName = flowId.slice(dot + 1);
303
+ const preFix = path.join(repoRoot, 'kits', kitId, 'flows', flowName + '.flow.json');
304
+ const resolved = path.resolve(preFix);
305
+ const fs = require('fs');
306
+ const exists = fs.existsSync(resolved);
307
+ console.log(' PRE-FIX path.join result:', resolved);
308
+ console.log(' Fake flow file exists at resolved path:', exists);
309
+ if (!exists) { console.error('ERROR: fake flow not found at ' + resolved); process.exit(1); }
310
+ const fakeFlow = JSON.parse(fs.readFileSync(resolved, 'utf8'));
311
+ const gate = fakeFlow.gates && Object.values(fakeFlow.gates)[0];
312
+ const emptyExpects = gate && Array.isArray(gate.expects) && gate.expects.length === 0;
313
+ console.log(' Fake flow gate expects[]:', JSON.stringify(gate && gate.expects));
314
+ console.log(' Empty expects[] (Set{}):', emptyExpects);
315
+ console.log(' PRE-FIX result: loads fake flow → Set{} → old if/else → 0 claims selected → exit 0');
316
+ if (!emptyExpects) { console.error('ERROR: fake flow does not have empty expects'); process.exit(1); }
317
+ " 2>&1 && _pass "PRE-FIX: traversal resolves to fake flow with empty expects[] (would ship with exit 0)" \
318
+ || _fail "PRE-FIX chain setup error"
319
+
320
+ echo ""
321
+ echo "--- 3b. POST-FIX: gate blocks the full exploit chain ---"
322
+ set +e
323
+ t3_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
324
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3_DIR\"}")"
325
+ t3_exit=$?
326
+ set -e
327
+
328
+ echo " POST-FIX gate exit code: $t3_exit (expected 2)"
329
+ if [ "$t3_exit" -eq 2 ]; then
330
+ _pass "POST-FIX: full exploit chain blocked (exit 2)"
331
+ else
332
+ _fail "POST-FIX: full exploit chain NOT blocked (exit $t3_exit). output: $t3_out"
333
+ fi
334
+
335
+ if echo "$t3_out" | grep -qE "disputed|false-completion|caught"; then
336
+ _pass "POST-FIX: disputed workflow.* claim caught (Layer 1 → null → workflow.* fallback active)"
337
+ else
338
+ _fail "POST-FIX: disputed claim NOT caught in full chain. output: $t3_out"
339
+ fi
340
+
341
+ echo ""
342
+ echo "--- 3c. Exit code summary ---"
343
+ echo " PRE-FIX exit code (simulated): 0 — loads fake flow, empty Set bypasses bundleEnforcement"
344
+ echo " POST-FIX exit code (actual): $t3_exit — slug validation returns null, workflow.* enforced"
345
+ if [ "$t3_exit" -eq 2 ]; then
346
+ echo " Result: EXPLOIT CLOSED (pre=0, post=2)"
347
+ else
348
+ echo " Result: EXPLOIT STILL OPEN"
349
+ fi
350
+
351
+ # ─── Test 4: Legit builder.build session regression ──────────────────────────
352
+ echo ""
353
+ echo "=== 4. Regression: legit builder.build/verify session passes (no false-block) ==="
354
+
355
+ T4_DIR="$TMP/t4-legit"
356
+ mkdir -p "$T4_DIR/.flow-agents/legit-test"
357
+ printf '# Repo\n' > "$T4_DIR/AGENTS.md"
358
+ printf '%s' '{"artifact_dir":"legit-test","active_flow_id":"builder.build","active_step_id":"verify"}' \
359
+ > "$T4_DIR/.flow-agents/current.json"
360
+ printf '%s' '{"schema_version":"1.0","task_slug":"legit-test","status":"delivered","phase":"done","updated_at":"2026-06-27T00:00:00Z","next_action":{"status":"done","summary":"done"}}' \
361
+ > "$T4_DIR/.flow-agents/legit-test/state.json"
362
+ cat > "$T4_DIR/.flow-agents/legit-test/legit-test--deliver.md" << 'MD'
363
+ # legit-test
364
+
365
+ branch: main
366
+ status: delivered
367
+ type: deliver
368
+
369
+ ## Definition Of Done
370
+ - [x] tests pass
371
+
372
+ ## Goal Fit Gate
373
+ - [x] acceptance verified
374
+
375
+ ### Verdict: PASS
376
+ MD
377
+
378
+ # Write a CLEAN trust.bundle for builder.verify.tests (status=verified, passing evidence)
379
+ python3 - "$T4_DIR/.flow-agents/legit-test/trust.bundle" << 'PY'
380
+ import json, sys
381
+ bundle = {
382
+ "schemaVersion": 3,
383
+ "source": "flow-agents/workflow-sidecar",
384
+ "claims": [{
385
+ "id": "c-legit",
386
+ "subjectId": "legit-test/tests",
387
+ "subjectType": "flow-step",
388
+ "claimType": "builder.verify.tests",
389
+ "fieldOrBehavior": "all tests pass",
390
+ "value": "pass",
391
+ "impactLevel": "high",
392
+ "status": "verified",
393
+ "createdAt": "2026-06-27T00:00:00Z",
394
+ "updatedAt": "2026-06-27T00:00:00Z"
395
+ }],
396
+ "evidence": [{
397
+ "id": "ev-legit",
398
+ "claimId": "c-legit",
399
+ "evidenceType": "test_output",
400
+ "method": "validation",
401
+ "sourceRef": "command-log.jsonl",
402
+ "excerptOrSummary": "All tests passed",
403
+ "observedAt": "2026-06-27T00:00:00Z",
404
+ "collectedBy": "harness",
405
+ "passing": True,
406
+ "blocking": False
407
+ }],
408
+ "policies": [],
409
+ "events": [{
410
+ "id": "evt-legit",
411
+ "claimId": "c-legit",
412
+ "status": "verified",
413
+ "actor": "agent",
414
+ "method": "workflow-check",
415
+ "evidenceIds": ["ev-legit"],
416
+ "createdAt": "2026-06-27T00:00:00Z"
417
+ }]
418
+ }
419
+ json.dump(bundle, open(sys.argv[1], 'w'))
420
+ PY
421
+
422
+ set +e
423
+ t4_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
424
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T4_DIR\"}")"
425
+ t4_exit=$?
426
+ set -e
427
+
428
+ if [ "$t4_exit" -ne 2 ]; then
429
+ _pass "Legit builder.build/verify: clean bundle passes gate (exit $t4_exit)"
430
+ else
431
+ _fail "Legit builder.build/verify: false-blocked (exit 2). output: $t4_out"
432
+ fi
433
+
434
+ # ─── Summary ─────────────────────────────────────────────────────────────────
435
+ echo ""
436
+ echo "================================================================="
437
+ if [ "$errors" -eq 0 ]; then
438
+ echo "PASS Gate-bypass chain eval: all checks passed."
439
+ echo ""
440
+ echo "Security proof:"
441
+ echo " Layer 1 (flow-resolver.ts): SLUG_RE + containment — all traversal IDs return null"
442
+ echo " Layer 2 (stop-goal-fit.js): union isSelectedClaim — workflow.* always enforced"
443
+ echo " Full chain: PRE-FIX exit 0 (would ship) → POST-FIX exit 2 (blocked)"
444
+ echo " No regression: legit builder.build/verify session passes"
445
+ exit 0
446
+ fi
447
+ echo "FAIL Gate-bypass chain eval: $errors check(s) failed."
448
+ exit 1