@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env bash
2
+ # test_enforcer_expects_driven.sh — Integration eval for ADR 0016 Abstraction A P-c.
3
+ #
4
+ # Proves:
5
+ # 1. A TAMPERED declared-type bundle BLOCKS (exit 2) with the tamper/disputed
6
+ # warning. Session has current.json with active_flow_id=builder.build,
7
+ # active_step_id=verify. trust.bundle has a builder.verify.tests claim with
8
+ # stored status "verified" but evidence passing=false (re-derives to disputed).
9
+ # This exercises the expects[] claim-selection path in bundleEnforcement.
10
+ # 2. A CLEAN declared-type bundle PASSES (exit 0). Same session, same claimType,
11
+ # but passing evidence → re-derives to verified.
12
+ # 3. A NO-ACTIVE-FLOW bundle uses the workflow.* fallback (the workflow.check.*
13
+ # path): a tampered workflow.check.command claim still BLOCKS. current.json
14
+ # has no active_flow_id/active_step_id.
15
+ #
16
+ # Deterministic, no model spend, self-cleaning.
17
+ # Usage: bash evals/integration/test_enforcer_expects_driven.sh
18
+
19
+ set -uo pipefail
20
+
21
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
22
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
23
+
24
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
25
+
26
+ TMP="$(mktemp -d)"
27
+ errors=0
28
+ _pass() { echo " ✓ $1"; }
29
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
30
+
31
+ cleanup() { rm -rf "$TMP"; }
32
+ trap cleanup EXIT
33
+
34
+ # ─── helper: seed a minimal delivered workflow artifact ───────────────────────
35
+ seed_repo() { # $1=dir $2=slug
36
+ local p="$1" slug="$2"
37
+ mkdir -p "$p/.flow-agents/$slug"
38
+ printf '# Repo\n' > "$p/AGENTS.md"
39
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-26T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
40
+ > "$p/.flow-agents/$slug/state.json"
41
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
42
+ # $slug
43
+
44
+ branch: main
45
+ status: delivered
46
+ type: deliver
47
+
48
+ ## Definition Of Done
49
+ - [x] tests pass
50
+
51
+ ## Goal Fit Gate
52
+ - [x] acceptance verified
53
+
54
+ ### Verdict: PASS
55
+ MD
56
+ }
57
+
58
+ # ─── Test 1: TAMPERED declared-type bundle BLOCKS via expects[] path ─────────
59
+ # current.json has active_flow_id=builder.build, active_step_id=verify.
60
+ # The trust.bundle has builder.verify.tests (declared by verify-gate expects[]),
61
+ # stored status "verified" but evidence passing=false → re-derives to "disputed".
62
+ # The enforcer must use the expects[] path and BLOCK with the tamper warning.
63
+ echo "Test 1: tampered declared-type bundle (builder.verify.tests, stored verified, evidence→disputed) must BLOCK via expects[] path"
64
+
65
+ T1_DIR="$TMP/t1"
66
+ seed_repo "$T1_DIR" "declares-tampered"
67
+
68
+ # current.json: active flow
69
+ printf '%s' '{"artifact_dir":"declares-tampered","active_flow_id":"builder.build","active_step_id":"verify"}' \
70
+ > "$T1_DIR/.flow-agents/current.json"
71
+
72
+ python3 - "$T1_DIR/.flow-agents/declares-tampered/trust.bundle" << 'PY'
73
+ import json, sys
74
+ bundle = {
75
+ "schemaVersion": 3,
76
+ "source": "flow-agents/workflow-sidecar",
77
+ "claims": [{
78
+ "id": "c1",
79
+ "subjectId": "declares-tampered/tests",
80
+ "subjectType": "flow-step",
81
+ "claimType": "builder.verify.tests",
82
+ "fieldOrBehavior": "build/verify tests",
83
+ "value": "pass",
84
+ "impactLevel": "high",
85
+ "status": "verified", # tampered: edited from "disputed" → "verified"
86
+ "createdAt": "2026-06-26T00:00:00Z",
87
+ "updatedAt": "2026-06-26T00:00:00Z"
88
+ }],
89
+ "evidence": [{
90
+ "id": "ev1",
91
+ "claimId": "c1",
92
+ "evidenceType": "test_output",
93
+ "method": "validation",
94
+ "sourceRef": "command-log.jsonl",
95
+ "excerptOrSummary": "npm test failed with exit 1",
96
+ "observedAt": "2026-06-26T00:00:00Z",
97
+ "collectedBy": "harness",
98
+ "passing": False,
99
+ "blocking": True
100
+ }],
101
+ "policies": [],
102
+ "events": [{
103
+ "id": "evt1",
104
+ "claimId": "c1",
105
+ "status": "verified",
106
+ "actor": "agent",
107
+ "method": "workflow-check",
108
+ "evidenceIds": ["ev1"],
109
+ "createdAt": "2026-06-26T00:00:00Z"
110
+ }]
111
+ }
112
+ json.dump(bundle, open(sys.argv[1], 'w'))
113
+ PY
114
+
115
+ set +e
116
+ t1_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
117
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T1_DIR\"}")"
118
+ t1_exit="$?"
119
+ set -e
120
+
121
+ if [ "$t1_exit" -eq 2 ]; then
122
+ _pass "tampered declared-type bundle blocks (exit 2)"
123
+ else
124
+ _fail "tampered declared-type bundle did NOT block: exit=$t1_exit output=$t1_out"
125
+ fi
126
+
127
+ if echo "$t1_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
128
+ _pass "tampered declared-type bundle emits tamper warning"
129
+ else
130
+ _fail "tampered declared-type bundle missing tamper warning: $t1_out"
131
+ fi
132
+
133
+ if echo "$t1_out" | grep -q "caught false-completion"; then
134
+ _pass "tampered declared-type bundle emits caught false-completion"
135
+ else
136
+ _fail "tampered declared-type bundle missing caught false-completion: $t1_out"
137
+ fi
138
+
139
+ if echo "$t1_out" | grep -q "builder.verify.tests"; then
140
+ _pass "tampered declared-type bundle warning names the declared claimType"
141
+ else
142
+ _fail "tampered declared-type bundle warning does not mention builder.verify.tests: $t1_out"
143
+ fi
144
+
145
+ # ─── Test 2: CLEAN declared-type bundle PASSES ───────────────────────────────
146
+ # Same session, same claimType, but passing evidence → re-derives to verified.
147
+ # Must NOT block.
148
+ echo ""
149
+ echo "Test 2: clean declared-type bundle (builder.verify.tests, passing evidence→verified) must ALLOW"
150
+
151
+ T2_DIR="$TMP/t2"
152
+ seed_repo "$T2_DIR" "declares-clean"
153
+
154
+ printf '%s' '{"artifact_dir":"declares-clean","active_flow_id":"builder.build","active_step_id":"verify"}' \
155
+ > "$T2_DIR/.flow-agents/current.json"
156
+
157
+ python3 - "$T2_DIR/.flow-agents/declares-clean/trust.bundle" << 'PY'
158
+ import json, sys
159
+ bundle = {
160
+ "schemaVersion": 3,
161
+ "source": "flow-agents/workflow-sidecar",
162
+ "claims": [{
163
+ "id": "c2",
164
+ "subjectId": "declares-clean/tests",
165
+ "subjectType": "flow-step",
166
+ "claimType": "builder.verify.tests",
167
+ "fieldOrBehavior": "build/verify tests",
168
+ "value": "pass",
169
+ "impactLevel": "high",
170
+ "status": "verified",
171
+ "createdAt": "2026-06-26T00:00:00Z",
172
+ "updatedAt": "2026-06-26T00:00:00Z"
173
+ }],
174
+ "evidence": [{
175
+ "id": "ev2",
176
+ "claimId": "c2",
177
+ "evidenceType": "test_output",
178
+ "method": "validation",
179
+ "sourceRef": "command-log.jsonl",
180
+ "excerptOrSummary": "npm test passed",
181
+ "observedAt": "2026-06-26T00:00:00Z",
182
+ "collectedBy": "harness",
183
+ "passing": True,
184
+ "blocking": False
185
+ }],
186
+ "policies": [],
187
+ "events": [{
188
+ "id": "evt2",
189
+ "claimId": "c2",
190
+ "status": "verified",
191
+ "actor": "agent",
192
+ "method": "workflow-check",
193
+ "evidenceIds": ["ev2"],
194
+ "createdAt": "2026-06-26T00:00:00Z"
195
+ }]
196
+ }
197
+ json.dump(bundle, open(sys.argv[1], 'w'))
198
+ PY
199
+
200
+ set +e
201
+ t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
202
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2_DIR\"}")"
203
+ t2_exit="$?"
204
+ set -e
205
+
206
+ if [ "$t2_exit" -ne 2 ]; then
207
+ _pass "clean declared-type bundle not blocked (exit $t2_exit)"
208
+ else
209
+ _fail "clean declared-type bundle false-blocked (exit 2): $t2_out"
210
+ fi
211
+
212
+ if echo "$t2_out" | grep -q "caught false-completion"; then
213
+ _fail "clean declared-type bundle incorrectly emits false-completion: $t2_out"
214
+ else
215
+ _pass "clean declared-type bundle does not emit false-completion"
216
+ fi
217
+
218
+ # ─── Test 3: NO-ACTIVE-FLOW bundle uses workflow.* fallback path ─────────────
219
+ # current.json has NO active_flow_id/active_step_id (or no current.json at all).
220
+ # The trust.bundle has workflow.check.command claims with stored "disputed".
221
+ # Must still BLOCK via the workflow.* path (no regression from #133).
222
+ echo ""
223
+ echo "Test 3: no-active-flow bundle must use workflow.* fallback and still BLOCK"
224
+
225
+ T3_DIR="$TMP/t3"
226
+ seed_repo "$T3_DIR" "no-flow"
227
+
228
+ # No current.json flow keys (empty current.json that is still valid)
229
+ printf '%s' '{"artifact_dir":"no-flow"}' \
230
+ > "$T3_DIR/.flow-agents/current.json"
231
+
232
+ python3 - "$T3_DIR/.flow-agents/no-flow/trust.bundle" << 'PY'
233
+ import json, sys
234
+ bundle = {
235
+ "schemaVersion": 3,
236
+ "source": "flow-agents/workflow-sidecar",
237
+ "claims": [{
238
+ "id": "c3",
239
+ "subjectId": "no-flow/unit-tests",
240
+ "subjectType": "workflow-check",
241
+ "claimType": "workflow.check.command",
242
+ "fieldOrBehavior": "unit tests",
243
+ "value": "fail",
244
+ "impactLevel": "high",
245
+ "status": "disputed", # stored as disputed (not tampered — correctly flagged)
246
+ "createdAt": "2026-06-26T00:00:00Z",
247
+ "updatedAt": "2026-06-26T00:00:00Z"
248
+ }],
249
+ "evidence": [],
250
+ "policies": [],
251
+ "events": []
252
+ }
253
+ json.dump(bundle, open(sys.argv[1], 'w'))
254
+ PY
255
+
256
+ set +e
257
+ t3_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
258
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3_DIR\"}")"
259
+ t3_exit="$?"
260
+ set -e
261
+
262
+ if [ "$t3_exit" -eq 2 ]; then
263
+ _pass "no-active-flow bundle still blocks via workflow.* fallback (exit 2)"
264
+ else
265
+ _fail "no-active-flow bundle did NOT block (exit $t3_exit): $t3_out"
266
+ fi
267
+
268
+ if echo "$t3_out" | grep -q "caught false-completion"; then
269
+ _pass "no-active-flow bundle emits caught false-completion"
270
+ else
271
+ _fail "no-active-flow bundle missing caught false-completion: $t3_out"
272
+ fi
273
+
274
+ # ─── Summary ─────────────────────────────────────────────────────────────────
275
+ echo ""
276
+ if [ "$errors" -eq 0 ]; then
277
+ echo "P-c enforcer expects-driven tests passed."
278
+ exit 0
279
+ fi
280
+ echo "P-c enforcer expects-driven tests FAILED: $errors issue(s)."
281
+ exit 1
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/env bash
2
+ # test_evidence_capture_hook.sh — Capture-first evidence determinism contracts.
3
+ #
4
+ # Part A: evidence-capture.js deterministically records command executions to
5
+ # .flow-agents/<slug>/command-log.jsonl (machine-recorded, not model-claimed).
6
+ # Part B: stop-goal-fit.js cross-references evidence.json claimed-pass command
7
+ # checks against the capture log, and re-runs a TRUSTED backstop command
8
+ # only when the log has no execution for a claimed-pass command.
9
+ set -uo pipefail
10
+
11
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
12
+ CAPTURE="$ROOT/scripts/hooks/evidence-capture.js"
13
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
14
+
15
+ # Disable the block escape hatch so repeated independent assertions never trip it.
16
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
17
+
18
+ TMP="$(mktemp -d)"
19
+ errors=0
20
+ _pass() { echo " ✓ $1"; }
21
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
22
+
23
+ # ---- helpers -------------------------------------------------------------
24
+ seed_repo() { # $1 dir, $2 slug
25
+ local p="$1" slug="$2"
26
+ mkdir -p "$p/.flow-agents/$slug"
27
+ printf '# Repo\n' > "$p/AGENTS.md"
28
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" > "$p/.flow-agents/$slug/state.json"
29
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" <<MD
30
+ # $slug
31
+
32
+ branch: main
33
+ status: delivered
34
+ type: deliver
35
+
36
+ ## Definition Of Done
37
+ - [x] tests pass
38
+
39
+ ## Goal Fit Gate
40
+ - [x] acceptance verified
41
+
42
+ ### Verdict: PASS
43
+ MD
44
+ }
45
+
46
+ capture() { # stdin = payload json
47
+ node "$CAPTURE" >/dev/null 2>&1
48
+ }
49
+
50
+ # ============================================================================
51
+ # Part A — deterministic capture
52
+ # ============================================================================
53
+ A="$TMP/capture"; seed_repo "$A" t1
54
+ echo "Part A: deterministic capture"
55
+
56
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0,"stdout":"ok"}}' "$A" | capture
57
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run lint"},"error":"command failed"}' "$A" | capture
58
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"make build"},"tool_response":{"exit_code":2}}' "$A" | capture
59
+ # A non-command tool (Write) must NOT be captured.
60
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Write","cwd":"%s","tool_input":{"file_path":"/tmp/x"}}' "$A" | capture
61
+
62
+ LOG="$A/.flow-agents/t1/command-log.jsonl"
63
+ if [[ -f "$LOG" ]]; then _pass "capture writes command-log.jsonl"; else _fail "capture did not write command-log.jsonl"; fi
64
+
65
+ lines=$(wc -l < "$LOG" | tr -d ' ')
66
+ if [[ "$lines" == "3" ]]; then _pass "capture records 3 command executions (Write tool excluded)"; else _fail "expected 3 log lines, got $lines"; fi
67
+
68
+ if rg -q '"command":"npm test","observedResult":"pass","exitCode":0' "$LOG"; then
69
+ _pass "clean exit 0 recorded as observedResult:pass exitCode:0"
70
+ else _fail "passing command not recorded correctly: $(cat "$LOG")"; fi
71
+
72
+ if rg -q '"command":"npm run lint","observedResult":"fail","exitCode":null' "$LOG"; then
73
+ _pass "error field with no exit code recorded as fail exitCode:null"
74
+ else _fail "errored command not recorded correctly"; fi
75
+
76
+ if rg -q '"command":"make build","observedResult":"fail","exitCode":2' "$LOG"; then
77
+ _pass "non-zero exit recorded as fail with exitCode"
78
+ else _fail "non-zero-exit command not recorded correctly"; fi
79
+
80
+ if rg -q '"source":"postToolUse-capture"' "$LOG"; then _pass "records source:postToolUse-capture"; else _fail "missing source field"; fi
81
+
82
+ # Capture is non-blocking: it always exits 0 and echoes stdin.
83
+ out=$(printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"echo hi"},"error":"boom"}' "$A" | node "$CAPTURE"; echo "EXIT=$?")
84
+ if rg -q 'EXIT=0' <<<"$out" && rg -q 'echo hi' <<<"$out"; then
85
+ _pass "capture is non-blocking (exit 0, echoes stdin) even on a failing command"
86
+ else _fail "capture should be non-blocking and echo stdin"; fi
87
+
88
+ # ============================================================================
89
+ # Part B1 — gate cross-references log: claimed pass but log shows FAIL → block
90
+ # ============================================================================
91
+ echo "Part B1: log contradicts claimed pass → block"
92
+ B="$TMP/contradict"; seed_repo "$B" t1
93
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$B/.flow-agents/t1/evidence.json"
94
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$B/.flow-agents/t1/command-log.jsonl"
95
+
96
+ if FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$GATE" >/dev/null 2>"$TMP/b1.err" <<JSON
97
+ {"hook_event_name":"Stop","cwd":"$B"}
98
+ JSON
99
+ then _fail "gate should BLOCK when capture log contradicts claimed pass"
100
+ else
101
+ status=$?
102
+ if [[ "$status" -eq 2 ]] && rg -q 'capture log CONTRADICTS claimed pass' "$TMP/b1.err" && rg -q 'caught false-completion' "$TMP/b1.err"; then
103
+ _pass "gate blocks (exit 2) caught false-completion via capture log"
104
+ else _fail "gate returned unexpected result: status=$status output=$(cat "$TMP/b1.err")"; fi
105
+ fi
106
+
107
+ # ============================================================================
108
+ # Part B2 — gate cross-references log: claimed pass and log shows PASS → no re-run
109
+ # ============================================================================
110
+ echo "Part B2: log confirms claimed pass → satisfied, no re-run"
111
+ C="$TMP/confirm"; seed_repo "$C" t1
112
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$C/.flow-agents/t1/evidence.json"
113
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$C/.flow-agents/t1/command-log.jsonl"
114
+ # A poisoned npm on PATH proves the gate does NOT re-run when the log confirms.
115
+ POISON="$TMP/poison"; mkdir -p "$POISON"
116
+ printf '#!/usr/bin/env bash\necho "npm should not run" >&2\nexit 99\n' > "$POISON/npm"; chmod +x "$POISON/npm"
117
+ PATH="$POISON:$PATH" FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b2.err" <<JSON
118
+ {"hook_event_name":"Stop","cwd":"$C"}
119
+ JSON
120
+ if rg -q 'CONTRADICTS|backstop|npm should not run' "$TMP/b2.err"; then
121
+ _fail "gate should NOT re-run or warn when the capture log confirms the pass: $(cat "$TMP/b2.err")"
122
+ else _pass "gate trusts the log on a confirmed pass and does not re-run the backstop"; fi
123
+
124
+ # ============================================================================
125
+ # Part B3 — never-captured claimed-pass command → trusted backstop re-run (declared manifest target FAILS) → block
126
+ # ============================================================================
127
+ echo "Part B3: never-captured claim → trusted manifest backstop catches a fail"
128
+ D="$TMP/backstop"; seed_repo "$D" t1
129
+ printf '%s' '{"name":"x","scripts":{"test":"exit 7"}}' > "$D/package.json"
130
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$D/.flow-agents/t1/evidence.json"
131
+ # command-log.jsonl intentionally absent — the command was never actually run.
132
+
133
+ if FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b3.err" <<JSON
134
+ {"hook_event_name":"Stop","cwd":"$D"}
135
+ JSON
136
+ then _fail "gate should BLOCK when trusted backstop re-run of declared manifest target fails"
137
+ else
138
+ status=$?
139
+ if [[ "$status" -eq 2 ]] && rg -q 'trusted backstop \(manifest\)' "$TMP/b3.err" && rg -q 'FAILED with exit 7' "$TMP/b3.err"; then
140
+ _pass "gate runs trusted declared manifest target as backstop and blocks on its failure"
141
+ else _fail "backstop did not catch declared-target failure: status=$status output=$(cat "$TMP/b3.err")"; fi
142
+ fi
143
+
144
+ # ============================================================================
145
+ # Part B4 — never-captured claim, no trusted command resolves → NOT_VERIFIED (never a silent pass)
146
+ # ============================================================================
147
+ echo "Part B4: never-captured claim, nothing trusted resolves → NOT_VERIFIED"
148
+ E="$TMP/notverified"; seed_repo "$E" t1
149
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"custom","kind":"command","status":"pass","command":"./my-thing.sh","summary":"ran custom"}]}' > "$E/.flow-agents/t1/evidence.json"
150
+
151
+ if FLOW_AGENTS_GOAL_FIT_MODE=block node "$GATE" >/dev/null 2>"$TMP/b4.err" <<JSON
152
+ {"hook_event_name":"Stop","cwd":"$E"}
153
+ JSON
154
+ then _fail "gate should not silently pass an un-captured, un-verifiable claimed-pass command"
155
+ else
156
+ status=$?
157
+ if [[ "$status" -eq 2 ]] && rg -q 'NOT_VERIFIED' "$TMP/b4.err" && rg -q 'no trusted command' "$TMP/b4.err"; then
158
+ _pass "gate records NOT_VERIFIED (never a guess) when no trusted command resolves"
159
+ else _fail "NOT_VERIFIED path returned unexpected result: status=$status output=$(cat "$TMP/b4.err")"; fi
160
+ fi
161
+
162
+ # ============================================================================
163
+ # Part B5 — arbitrary model command is opt-in only (FLOW_AGENTS_GOAL_FIT_RECHECK)
164
+ # ============================================================================
165
+ echo "Part B5: free-form model command re-run is opt-in only"
166
+ F="$TMP/recheck"; seed_repo "$F" t1
167
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"custom","kind":"command","status":"pass","command":"exit 5","summary":"ran custom"}]}' > "$F/.flow-agents/t1/evidence.json"
168
+ # Opt-in ON: the model's free-form "exit 5" is re-run and fails → block.
169
+ if FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_RECHECK=true node "$GATE" >/dev/null 2>"$TMP/b5.err" <<JSON
170
+ {"hook_event_name":"Stop","cwd":"$F"}
171
+ JSON
172
+ then _fail "with RECHECK=true the failing model command should block"
173
+ else
174
+ status=$?
175
+ if [[ "$status" -eq 2 ]] && rg -q 'FLOW_AGENTS_GOAL_FIT_RECHECK' "$TMP/b5.err"; then
176
+ _pass "FLOW_AGENTS_GOAL_FIT_RECHECK=true opts into re-running the model's free-form command"
177
+ else _fail "recheck opt-in path returned unexpected result: status=$status output=$(cat "$TMP/b5.err")"; fi
178
+ fi
179
+
180
+ if [[ "$errors" -eq 0 ]]; then
181
+ echo "Evidence capture hook integration passed."
182
+ exit 0
183
+ fi
184
+ echo "Evidence capture hook integration failed: $errors issue(s)."
185
+ exit 1
@@ -53,6 +53,7 @@ expect_fail() {
53
53
 
54
54
  echo "=== Flow Kit Repository Fixture Checks ==="
55
55
  expect_pass "valid-local-kit"
56
+ expect_pass "valid-unknown-extension"
56
57
  expect_fail "invalid-schema-version" '\.schema_version must be "1\.0"'
57
58
  expect_fail "invalid-missing-schema-version" '\.schema_version must be "1\.0"'
58
59
  expect_fail "invalid-id" '\.id must be a kebab-case string'
@@ -63,6 +64,7 @@ expect_fail "invalid-absolute-path" 'flows\[0\]\.path must be relative'
63
64
  expect_fail "invalid-traversal" "flows\\[0\\]\\.path must not contain"
64
65
  expect_fail "invalid-malformed-json" 'invalid JSON'
65
66
  expect_fail "invalid-asset-section" '\.docs must be a list'
67
+ expect_fail "invalid-missing-extension-asset" 'docs\[0\]\.path points at missing asset'
66
68
  expect_fail "invalid-duplicate-flow" "flows\\[1\\]\\.path duplicates"
67
69
 
68
70
  echo ""