@kontourai/flow-agents 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +103 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/console-learning-projection.d.ts +1 -0
  14. package/build/src/cli/effective-backlog-settings.d.ts +1 -0
  15. package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
  16. package/build/src/cli/init.d.ts +17 -0
  17. package/build/src/cli/init.js +242 -20
  18. package/build/src/cli/kit.d.ts +1 -0
  19. package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
  20. package/build/src/cli/publish-change-helper.d.ts +1 -0
  21. package/build/src/cli/pull-work-provider.d.ts +1 -0
  22. package/build/src/cli/runtime-adapter.d.ts +1 -0
  23. package/build/src/cli/telemetry-doctor.d.ts +1 -0
  24. package/build/src/cli/usage-feedback.d.ts +1 -0
  25. package/build/src/cli/utterance-check.d.ts +1 -0
  26. package/build/src/cli/validate-hook-influence.d.ts +1 -0
  27. package/build/src/cli/validate-source-tree.d.ts +1 -0
  28. package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
  29. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  30. package/build/src/cli/verify.d.ts +1 -0
  31. package/build/src/cli/verify.js +90 -0
  32. package/build/src/cli/veritas-governance.d.ts +1 -0
  33. package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
  34. package/build/src/cli/workflow-sidecar.d.ts +324 -0
  35. package/build/src/cli/workflow-sidecar.js +1973 -90
  36. package/build/src/cli.d.ts +2 -0
  37. package/build/src/cli.js +2 -3
  38. package/build/src/flow-kit/validate.d.ts +81 -0
  39. package/build/src/index.d.ts +5 -0
  40. package/build/src/index.js +36 -0
  41. package/build/src/lib/args.d.ts +8 -0
  42. package/build/src/lib/flow-resolver.d.ts +82 -0
  43. package/build/src/lib/flow-resolver.js +237 -0
  44. package/build/src/lib/fs.d.ts +7 -0
  45. package/build/src/lib/workflow-learning-projection.d.ts +132 -0
  46. package/build/src/runtime-adapters.d.ts +18 -0
  47. package/build/src/tools/build-universal-bundles.d.ts +2 -0
  48. package/build/src/tools/build-universal-bundles.js +34 -22
  49. package/build/src/tools/common.d.ts +9 -0
  50. package/build/src/tools/generate-context-map.d.ts +2 -0
  51. package/build/src/tools/generate-context-map.js +3 -16
  52. package/build/src/tools/validate-package.d.ts +2 -0
  53. package/build/src/tools/validate-source-tree.d.ts +2 -0
  54. package/build/src/tools/validate-source-tree.js +42 -162
  55. package/context/contracts/artifact-contract.md +10 -0
  56. package/context/contracts/delivery-contract.md +1 -0
  57. package/context/contracts/review-contract.md +1 -0
  58. package/context/contracts/verification-contract.md +2 -0
  59. package/context/gate-awareness.md +39 -0
  60. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  61. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  62. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  63. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  64. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  65. package/docs/adr/0007-skill-audit.md +1 -1
  66. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  67. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  68. package/docs/adr/0011-mcp-posture.md +100 -0
  69. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  70. package/docs/adr/0013-context-lifecycle.md +151 -0
  71. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  72. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  73. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  74. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  75. package/docs/agent-system-guidebook.md +5 -12
  76. package/docs/context-map.md +4 -10
  77. package/docs/developer-architecture.md +14 -0
  78. package/docs/index.md +3 -2
  79. package/docs/integrations/framework-adapter.md +19 -6
  80. package/docs/integrations/index.md +2 -2
  81. package/docs/north-star.md +4 -4
  82. package/docs/operating-layers.md +3 -3
  83. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  84. package/docs/repository-structure.md +2 -2
  85. package/docs/skills-map.md +1 -0
  86. package/docs/spec/runtime-hook-surface.md +78 -10
  87. package/docs/standards-register.md +3 -3
  88. package/docs/survey-utterance-check.md +1 -1
  89. package/docs/trust-anchor-adoption.md +197 -0
  90. package/docs/verifiable-trust.md +95 -0
  91. package/docs/veritas-integration.md +2 -2
  92. package/docs/workflow-usage-guide.md +69 -0
  93. package/evals/acceptance/DEMO-false-completion.md +144 -0
  94. package/evals/acceptance/demo-cast.sh +92 -0
  95. package/evals/acceptance/demo-false-completion.sh +72 -0
  96. package/evals/acceptance/demo-real-evidence.sh +104 -0
  97. package/evals/acceptance/demo.tape +29 -0
  98. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  99. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  100. package/evals/acceptance/prove-teeth.sh +105 -0
  101. package/evals/ci/antigaming-suite.sh +54 -0
  102. package/evals/ci/run-baseline.sh +2 -0
  103. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  104. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  105. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  106. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  107. package/evals/integration/test_builder_step_producers.sh +379 -0
  108. package/evals/integration/test_bundle_install.sh +35 -71
  109. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  110. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  111. package/evals/integration/test_checkpoint_signing.sh +489 -0
  112. package/evals/integration/test_claim_lookup.sh +352 -0
  113. package/evals/integration/test_command_log_integrity.sh +275 -0
  114. package/evals/integration/test_context_map.sh +0 -2
  115. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  116. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  117. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  118. package/evals/integration/test_flow_kit_repository.sh +2 -0
  119. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  120. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  121. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  122. package/evals/integration/test_gate_lockdown.sh +1137 -0
  123. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  124. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  125. package/evals/integration/test_goal_fit_hook.sh +69 -4
  126. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  127. package/evals/integration/test_hook_category_behaviors.sh +14 -0
  128. package/evals/integration/test_install_merge.sh +1176 -0
  129. package/evals/integration/test_mint_attestation.sh +373 -0
  130. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  131. package/evals/integration/test_publish_delivery.sh +269 -0
  132. package/evals/integration/test_reconcile_soundness.sh +528 -0
  133. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  134. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  135. package/evals/integration/test_trust_checkpoint.sh +325 -0
  136. package/evals/integration/test_trust_reconcile.sh +293 -0
  137. package/evals/integration/test_verify_cli.sh +208 -0
  138. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  139. package/evals/lib/node.sh +0 -6
  140. package/evals/run.sh +47 -0
  141. package/evals/static/test_library_exports.sh +85 -0
  142. package/evals/static/test_universal_bundles.sh +15 -0
  143. package/evals/static/test_workflow_skills.sh +6 -13
  144. package/install.sh +0 -7
  145. package/integrations/strands-ts/README.md +25 -15
  146. package/integrations/veritas/flow-agents.adapter.json +1 -2
  147. package/kits/builder/flows/build.flow.json +59 -12
  148. package/kits/builder/kit.json +85 -15
  149. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  150. package/kits/builder/skills/deliver/SKILL.md +36 -6
  151. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  152. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  153. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  154. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  155. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  156. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  157. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  158. package/kits/knowledge/adapters/default-store/index.js +38 -0
  159. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  160. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  161. package/kits/knowledge/docs/store-contract.md +314 -0
  162. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  163. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  164. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  165. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  166. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  167. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  168. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  169. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  170. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  171. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  172. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  173. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  174. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  175. package/kits/knowledge/kit.json +51 -1
  176. package/package.json +13 -4
  177. package/packaging/conformance/README.md +10 -2
  178. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  179. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  180. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  181. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  182. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  183. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  184. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  185. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  186. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  187. package/packaging/conformance/run-conformance.js +1 -1
  188. package/scripts/README.md +2 -1
  189. package/scripts/build-universal-bundles.js +0 -1
  190. package/scripts/ci/mint-attestation.js +221 -0
  191. package/scripts/ci/trust-reconcile.js +545 -0
  192. package/scripts/hooks/config-protection.js +423 -1
  193. package/scripts/hooks/evidence-capture.js +348 -0
  194. package/scripts/hooks/lib/liveness-read.js +113 -0
  195. package/scripts/hooks/run-hook.js +6 -1
  196. package/scripts/hooks/stop-goal-fit.js +1471 -79
  197. package/scripts/hooks/workflow-steering.js +135 -5
  198. package/scripts/install-codex-home.sh +39 -0
  199. package/scripts/install-merge.js +330 -0
  200. package/src/cli/init.ts +218 -20
  201. package/src/cli/validate-workflow-artifacts.ts +18 -2
  202. package/src/cli/verify.ts +100 -0
  203. package/src/cli/workflow-sidecar.ts +2093 -84
  204. package/src/cli.ts +2 -3
  205. package/src/index.ts +53 -0
  206. package/src/lib/flow-resolver.ts +284 -0
  207. package/src/tools/build-universal-bundles.ts +34 -21
  208. package/src/tools/generate-context-map.ts +3 -17
  209. package/src/tools/validate-source-tree.ts +44 -104
  210. package/tsconfig.json +1 -0
  211. package/build/src/tools/filter-installed-packs.js +0 -135
  212. package/packaging/packs.json +0 -49
  213. package/scripts/filter-installed-packs.js +0 -2
  214. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,335 @@
1
+ #!/usr/bin/env bash
2
+ # prove-capture-teeth-declared.sh — Permanent regression proof that the
3
+ # capture cross-reference gate BLOCKS declared-type false-completions.
4
+ #
5
+ # Bug closed: captureCrossReference previously called bundleClaimedPassCommandChecks
6
+ # WITHOUT declaredClaimTypes, so sessions with a FlowDefinition active (e.g.
7
+ # builder.build / verify step) could emit declared-type claims (builder.verify.tests)
8
+ # that the cross-reference was completely blind to. A command-log recording FAIL for
9
+ # "npm test" would NOT block even though the trust.bundle evidence claimed it passed.
10
+ # ADR 0016 P-c fix: captureCrossReference now accepts activeFlowStep and threads
11
+ # declaredClaimTypes into bundleClaimedPassCommandChecks, mirroring bundleEnforcement
12
+ # and sidecarGuidance.
13
+ #
14
+ # This eval:
15
+ # 1. Proves the fix BLOCKS (exit 2): declared-type evidence claims pass, command-log
16
+ # says FAIL → gate emits "caught false-completion".
17
+ # 2. Proves the control case SHIPS (exit 0): same fixture with a PASS log.
18
+ # 3. Proves the workflow.check.* path still BLOCKS (no regression on original case).
19
+ #
20
+ # Deterministic — no model spend, no bundle install required.
21
+ # Usage: bash evals/acceptance/prove-capture-teeth-declared.sh
22
+ set -uo pipefail
23
+
24
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
25
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
26
+
27
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
28
+
29
+ TMP="$(mktemp -d)"
30
+ errors=0
31
+ _pass() { echo " ✓ $1"; }
32
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
33
+
34
+ cleanup() { rm -rf "$TMP"; }
35
+ trap cleanup EXIT
36
+
37
+ # ─── helper: seed a minimal delivered workflow artifact ───────────────────────
38
+ seed_repo() { # $1=dir $2=slug
39
+ local p="$1" slug="$2"
40
+ mkdir -p "$p/.flow-agents/$slug"
41
+ printf '# Repo\n' > "$p/AGENTS.md"
42
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-27T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
43
+ > "$p/.flow-agents/$slug/state.json"
44
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
45
+ # $slug
46
+
47
+ branch: main
48
+ status: delivered
49
+ type: deliver
50
+
51
+ ## Definition Of Done
52
+ - [x] tests pass
53
+
54
+ ## Goal Fit Gate
55
+ - [x] acceptance verified
56
+
57
+ ### Verdict: PASS
58
+ MD
59
+ }
60
+
61
+ # ─── helper: write the declared-type trust.bundle ─────────────────────────────
62
+ # Evidence item has execution.label="npm test" linked to a builder.verify.tests claim
63
+ # that asserts pass. The cross-reference must catch the command-log contradiction.
64
+ write_declared_bundle() { # $1=bundle-path
65
+ python3 - "$1" << 'PY'
66
+ import json, sys
67
+ bundle = {
68
+ "schemaVersion": 3,
69
+ "source": "flow-agents/workflow-sidecar",
70
+ "claims": [{
71
+ "id": "c1",
72
+ "subjectId": "declared-false/tests",
73
+ "subjectType": "flow-step",
74
+ "claimType": "builder.verify.tests",
75
+ "fieldOrBehavior": "npm test",
76
+ "value": "pass",
77
+ "impactLevel": "high",
78
+ "status": "verified",
79
+ "createdAt": "2026-06-27T00:00:00Z",
80
+ "updatedAt": "2026-06-27T00:00:00Z"
81
+ }],
82
+ "evidence": [{
83
+ "id": "ev1",
84
+ "claimId": "c1",
85
+ "evidenceType": "command_output",
86
+ "method": "capture",
87
+ "sourceRef": "command-log.jsonl",
88
+ "excerptOrSummary": "npm test passed (agent claimed)",
89
+ "observedAt": "2026-06-27T00:00:00Z",
90
+ "collectedBy": "agent",
91
+ "passing": True,
92
+ "execution": {
93
+ "label": "npm test",
94
+ "exitCode": 0
95
+ }
96
+ }],
97
+ "policies": [],
98
+ "events": []
99
+ }
100
+ json.dump(bundle, open(sys.argv[1], 'w'))
101
+ PY
102
+ }
103
+
104
+ # Minimal FlowDefinition: verify-gate expects builder.verify.tests
105
+ # Using FLOW_AGENTS_FLOW_DEFS_DIR so the test is self-contained (no kits/ needed).
106
+ FLOW_DEFS_DIR="$TMP/flows"
107
+ mkdir -p "$FLOW_DEFS_DIR"
108
+ cat > "$FLOW_DEFS_DIR/builder.build.flow.json" << 'FLOWJSON'
109
+ {
110
+ "id": "builder.build",
111
+ "version": "1.0",
112
+ "gates": {
113
+ "verify-gate": {
114
+ "step": "verify",
115
+ "expects": [
116
+ {
117
+ "id": "tests-evidence",
118
+ "kind": "trust.bundle",
119
+ "required": true,
120
+ "bundle_claim": {
121
+ "claimType": "builder.verify.tests",
122
+ "subjectType": "flow-step",
123
+ "accepted_statuses": ["trusted", "accepted"]
124
+ }
125
+ }
126
+ ]
127
+ }
128
+ }
129
+ }
130
+ FLOWJSON
131
+
132
+ # ─── Test 1: declared-type false-completion MUST BLOCK ────────────────────────
133
+ echo "Test 1: declared-type evidence claims pass, command-log records FAIL → must BLOCK"
134
+ echo " (This is the hole: pre-fix the gate was blind to builder.verify.tests claims)"
135
+
136
+ T1="$TMP/t1"
137
+ seed_repo "$T1" "declared-false"
138
+
139
+ # current.json: active FlowDefinition
140
+ printf '%s' '{"artifact_dir":"declared-false","active_flow_id":"builder.build","active_step_id":"verify"}' \
141
+ > "$T1/.flow-agents/current.json"
142
+
143
+ write_declared_bundle "$T1/.flow-agents/declared-false/trust.bundle"
144
+
145
+ # command-log: npm test recorded as FAIL — the independent truth source says FAILED
146
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
147
+ > "$T1/.flow-agents/declared-false/command-log.jsonl"
148
+
149
+ set +e
150
+ t1_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block \
151
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
152
+ FLOW_AGENTS_FLOW_DEFS_DIR="$FLOW_DEFS_DIR" \
153
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T1\"}")"
154
+ t1_exit="$?"
155
+ set -e
156
+
157
+ if [ "$t1_exit" -eq 2 ]; then
158
+ _pass "declared-type false-completion BLOCKED (exit 2)"
159
+ else
160
+ _fail "declared-type false-completion NOT blocked: exit=$t1_exit output=$t1_out"
161
+ fi
162
+
163
+ if echo "$t1_out" | grep -q "caught false-completion"; then
164
+ _pass "emits 'caught false-completion' message"
165
+ else
166
+ _fail "missing 'caught false-completion' in output: $t1_out"
167
+ fi
168
+
169
+ if echo "$t1_out" | grep -q "capture log CONTRADICTS claimed pass"; then
170
+ _pass "emits 'capture log CONTRADICTS claimed pass' message"
171
+ else
172
+ _fail "missing contradicts message in output: $t1_out"
173
+ fi
174
+
175
+ if echo "$t1_out" | grep -q "npm test"; then
176
+ _pass "warning names the contradicted command (npm test)"
177
+ else
178
+ _fail "warning does not name the command: $t1_out"
179
+ fi
180
+
181
+ # ─── Test 2: control — matching PASS log should SHIP (no false-block) ─────────
182
+ echo ""
183
+ echo "Test 2: same fixture but command-log records PASS → must SHIP (exit 0)"
184
+
185
+ T2="$TMP/t2"
186
+ seed_repo "$T2" "declared-pass"
187
+
188
+ printf '%s' '{"artifact_dir":"declared-pass","active_flow_id":"builder.build","active_step_id":"verify"}' \
189
+ > "$T2/.flow-agents/current.json"
190
+
191
+ # Reuse same bundle (trusts pass) but command-log confirms pass
192
+ python3 - "$T2/.flow-agents/declared-pass/trust.bundle" << 'PY'
193
+ import json, sys
194
+ bundle = {
195
+ "schemaVersion": 3,
196
+ "source": "flow-agents/workflow-sidecar",
197
+ "claims": [{
198
+ "id": "c2",
199
+ "subjectId": "declared-pass/tests",
200
+ "subjectType": "flow-step",
201
+ "claimType": "builder.verify.tests",
202
+ "fieldOrBehavior": "npm test",
203
+ "value": "pass",
204
+ "impactLevel": "high",
205
+ "status": "verified",
206
+ "createdAt": "2026-06-27T00:00:00Z",
207
+ "updatedAt": "2026-06-27T00:00:00Z"
208
+ }],
209
+ "evidence": [{
210
+ "id": "ev2",
211
+ "claimId": "c2",
212
+ "evidenceType": "command_output",
213
+ "method": "capture",
214
+ "sourceRef": "command-log.jsonl",
215
+ "excerptOrSummary": "npm test passed",
216
+ "observedAt": "2026-06-27T00:00:00Z",
217
+ "collectedBy": "agent",
218
+ "passing": True,
219
+ "execution": {
220
+ "label": "npm test",
221
+ "exitCode": 0
222
+ }
223
+ }],
224
+ "policies": [],
225
+ "events": []
226
+ }
227
+ json.dump(bundle, open(sys.argv[1], 'w'))
228
+ PY
229
+
230
+ # command-log: npm test recorded as PASS — confirming evidence
231
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
232
+ > "$T2/.flow-agents/declared-pass/command-log.jsonl"
233
+
234
+ set +e
235
+ t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block \
236
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
237
+ FLOW_AGENTS_FLOW_DEFS_DIR="$FLOW_DEFS_DIR" \
238
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2\"}")"
239
+ t2_exit="$?"
240
+ set -e
241
+
242
+ if [ "$t2_exit" -ne 2 ]; then
243
+ _pass "confirming log clears the cross-reference (no false-block, exit $t2_exit)"
244
+ else
245
+ _fail "confirming log incorrectly blocked (exit 2): $t2_out"
246
+ fi
247
+
248
+ if echo "$t2_out" | grep -q "caught false-completion"; then
249
+ _fail "confirming log incorrectly emits false-completion: $t2_out"
250
+ else
251
+ _pass "confirming log does not emit false-completion"
252
+ fi
253
+
254
+ # ─── Test 3: workflow.check.* path still BLOCKS (regression guard) ────────────
255
+ echo ""
256
+ echo "Test 3: workflow.check.* false-completion still BLOCKS (no regression on original case)"
257
+
258
+ T3="$TMP/t3"
259
+ seed_repo "$T3" "wf-false"
260
+
261
+ # No current.json active flow → loadActiveFlowStep returns null → workflow.* fallback
262
+ printf '%s' '{"artifact_dir":"wf-false"}' \
263
+ > "$T3/.flow-agents/current.json"
264
+
265
+ python3 - "$T3/.flow-agents/wf-false/trust.bundle" << 'PY'
266
+ import json, sys
267
+ bundle = {
268
+ "schemaVersion": 3,
269
+ "source": "flow-agents/workflow-sidecar",
270
+ "claims": [{
271
+ "id": "c3",
272
+ "subjectId": "wf-false/unit-tests",
273
+ "subjectType": "workflow-check",
274
+ "claimType": "workflow.check.command",
275
+ "fieldOrBehavior": "npm test",
276
+ "value": "pass",
277
+ "impactLevel": "high",
278
+ "status": "verified",
279
+ "createdAt": "2026-06-27T00:00:00Z",
280
+ "updatedAt": "2026-06-27T00:00:00Z"
281
+ }],
282
+ "evidence": [{
283
+ "id": "ev3",
284
+ "claimId": "c3",
285
+ "evidenceType": "command_output",
286
+ "method": "capture",
287
+ "sourceRef": "command-log.jsonl",
288
+ "excerptOrSummary": "npm test passed (agent claimed)",
289
+ "observedAt": "2026-06-27T00:00:00Z",
290
+ "collectedBy": "agent",
291
+ "passing": True,
292
+ "execution": {
293
+ "label": "npm test",
294
+ "exitCode": 0
295
+ }
296
+ }],
297
+ "policies": [],
298
+ "events": []
299
+ }
300
+ json.dump(bundle, open(sys.argv[1], 'w'))
301
+ PY
302
+
303
+ # command-log: npm test recorded as FAIL
304
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-27T00:00:00Z","source":"postToolUse-capture"}' \
305
+ > "$T3/.flow-agents/wf-false/command-log.jsonl"
306
+
307
+ set +e
308
+ t3_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block \
309
+ FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
310
+ FLOW_AGENTS_FLOW_DEFS_DIR="$FLOW_DEFS_DIR" \
311
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3\"}")"
312
+ t3_exit="$?"
313
+ set -e
314
+
315
+ if [ "$t3_exit" -eq 2 ]; then
316
+ _pass "workflow.check.* false-completion still BLOCKS (no regression)"
317
+ else
318
+ _fail "workflow.check.* false-completion NOT blocked: exit=$t3_exit output=$t3_out"
319
+ fi
320
+
321
+ if echo "$t3_out" | grep -q "caught false-completion"; then
322
+ _pass "workflow.check.* path still emits 'caught false-completion'"
323
+ else
324
+ _fail "workflow.check.* path missing 'caught false-completion': $t3_out"
325
+ fi
326
+
327
+ # ─── Summary ──────────────────────────────────────────────────────────────────
328
+ echo ""
329
+ if [ "$errors" -eq 0 ]; then
330
+ echo "prove-capture-teeth-declared: all tests passed."
331
+ echo "PROOF: declared-type false-completions are blocked; workflow.check.* path unaffected."
332
+ exit 0
333
+ fi
334
+ echo "prove-capture-teeth-declared: FAILED ($errors issue(s))."
335
+ exit 1
@@ -0,0 +1,114 @@
1
+ #!/usr/bin/env bash
2
+ # prove-capture-teeth.sh — Deterministic proof (no model spend) that capture-first
3
+ # evidence determinism has teeth through the SHIPPED bundles: an agent claims a
4
+ # command passed, but the deterministically-captured command-log shows it actually
5
+ # FAILED → Stop is blocked. Also proves the trusted backstop catches a never-run
6
+ # claimed-pass command, and that a matching capture log lets Stop through.
7
+ #
8
+ # Mirrors prove-teeth.sh: installs each bundle and runs the installed hook commands
9
+ # with seeded .flow-agents state, exactly as the runtime would on PostToolUse / Stop.
10
+ #
11
+ # Usage: bash evals/acceptance/prove-capture-teeth.sh
12
+ set -uo pipefail
13
+
14
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
15
+ pass=0; fail=0
16
+ _p(){ echo " ✓ $1"; pass=$((pass+1)); }
17
+ _f(){ echo " ✗ $1"; fail=$((fail+1)); }
18
+
19
+ echo "Building bundles..."
20
+ (cd "$ROOT" && npm run build:bundles >/dev/null 2>&1) || { echo "build failed"; exit 1; }
21
+
22
+ hook_cmd(){ # $1 settings/hooks json, $2 event, $3 script needle
23
+ python3 - "$1" "$2" "$3" <<'PY'
24
+ import json,sys
25
+ s=json.load(open(sys.argv[1]))
26
+ for g in s.get("hooks",{}).get(sys.argv[2],[]):
27
+ for h in g["hooks"]:
28
+ if sys.argv[3] in h["command"]:
29
+ print(h["command"]); sys.exit(0)
30
+ sys.exit(0)
31
+ PY
32
+ }
33
+
34
+ # Seed: model CLAIMS the command passed (evidence.json) but the deterministic
35
+ # capture log recorded it as FAIL — a false-completion the gate must catch.
36
+ seed_capture_false_pass(){ # $1 project dir
37
+ local p="$1"; mkdir -p "$p/.flow-agents/cap-false"
38
+ [ -f "$p/AGENTS.md" ] || printf '# Repo\n' > "$p/AGENTS.md"
39
+ printf '%s' '{"schema_version":"1.0","task_slug":"cap-false","status":"delivered","phase":"done","updated_at":"2026-06-23T00:00:00Z","next_action":{"status":"done","summary":"done"}}' > "$p/.flow-agents/cap-false/state.json"
40
+ printf '%s' '{"schema_version":"1.0","task_slug":"cap-false","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"tests passed"}]}' > "$p/.flow-agents/cap-false/evidence.json"
41
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$p/.flow-agents/cap-false/command-log.jsonl"
42
+ cat > "$p/.flow-agents/cap-false/cap-false--deliver.md" <<'MD'
43
+ # Cap False
44
+
45
+ branch: main
46
+ status: delivered
47
+ type: deliver
48
+
49
+ ## Definition Of Done
50
+ - [x] all unit tests pass
51
+
52
+ ## Goal Fit Gate
53
+ - [x] acceptance criteria verified
54
+
55
+ ### Verdict: PASS
56
+ MD
57
+ }
58
+
59
+ is_block(){ grep -q '"decision":"block"'; }
60
+
61
+ run_bundle(){ # $1 label, $2 install.sh, $3 settings-json-rel, $4 home-env-name
62
+ local label="$1" installer="$2" cfgrel="$3" homevar="$4"
63
+ echo ""
64
+ echo "── $label: shipped bundle install ──"
65
+ local home proj
66
+ home="$(mktemp -d)"; proj="$(mktemp -d)"
67
+ bash "$installer" "$home" >/dev/null 2>&1 || { _f "$label install.sh failed"; return; }
68
+ local cfg="$home/$cfgrel"
69
+ [ -f "$cfg" ] || { _f "$label config not found at $cfgrel after install"; return; }
70
+ [ -f "$home/scripts/hooks/evidence-capture.js" ] || { _f "$label bundle missing evidence-capture.js after install"; return; }
71
+
72
+ # --- Capture hook is wired on PostToolUse in the shipped config ---
73
+ local capcmd; capcmd="$(hook_cmd "$cfg" PostToolUse evidence-capture)"
74
+ [ -n "$capcmd" ] || { _f "$label: no PostToolUse evidence-capture hook in shipped config"; return; }
75
+ _p "$label ships evidence-capture on PostToolUse"
76
+
77
+ # The capture hook deterministically records a real command result through the
78
+ # installed adapter path.
79
+ mkdir -p "$proj/.flow-agents/live-cap"
80
+ [ -f "$proj/AGENTS.md" ] || printf '# Repo\n' > "$proj/AGENTS.md"
81
+ printf '%s' '{"schema_version":"1.0","task_slug":"live-cap","status":"in_progress","phase":"verification","updated_at":"2026-06-23T00:00:00Z"}' > "$proj/.flow-agents/live-cap/state.json"
82
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run lint"},"error":"command failed"}' "$proj" \
83
+ | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" bash -c "$capcmd" >/dev/null 2>&1 || true
84
+ if rg -q '"command":"npm run lint","observedResult":"fail"' "$proj/.flow-agents/live-cap/command-log.jsonl" 2>/dev/null; then
85
+ _p "$label capture hook records a real FAIL to command-log.jsonl through the installed adapter"
86
+ else
87
+ _f "$label capture hook did not record the command result: $(cat "$proj/.flow-agents/live-cap/command-log.jsonl" 2>/dev/null)"
88
+ fi
89
+
90
+ # --- Teeth: claims-pass-but-log-shows-fail → Stop is BLOCKED ---
91
+ seed_capture_false_pass "$proj"
92
+ local stopcmd; stopcmd="$(hook_cmd "$cfg" Stop stop-goal-fit)"
93
+ [ -n "$stopcmd" ] || { _f "$label: no Stop stop-goal-fit hook in shipped config"; return; }
94
+ local blk; blk="$(printf '{"hook_event_name":"Stop","cwd":"%s"}' "$proj" | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip bash -c "$stopcmd" 2>/dev/null)"
95
+ echo "$blk" | is_block && _p "$label BLOCKS a claimed-pass command that the capture log recorded as FAIL" || _f "$label did NOT block the captured false-completion: $blk"
96
+
97
+ # control: a matching capture log (pass) lets Stop through on the capture axis.
98
+ printf '%s\n' '{"command":"npm test","observedResult":"pass","exitCode":0,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' > "$proj/.flow-agents/cap-false/command-log.jsonl"
99
+ local okblk; okblk="$(printf '{"hook_event_name":"Stop","cwd":"%s"}' "$proj" | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip bash -c "$stopcmd" 2>&1)"
100
+ if echo "$okblk" | grep -q 'caught false-completion'; then
101
+ _f "$label control: a confirming capture log should not raise a false-completion"
102
+ else
103
+ _p "$label control: a confirming capture log clears the false-completion (no re-run)"
104
+ fi
105
+ }
106
+
107
+ run_bundle "Claude Code" "$ROOT/dist/claude-code/install.sh" ".claude/settings.json" "CLAUDE_PROJECT_DIR"
108
+ run_bundle "Codex" "$ROOT/dist/codex/install.sh" ".codex/hooks.json" "CODEX_HOME"
109
+
110
+ echo ""
111
+ echo "──────────────────────────────────"
112
+ echo "prove-capture-teeth: $pass passed, $fail failed"
113
+ [ "$fail" -eq 0 ] && echo "PROOF: shipped bundles capture real command results and BLOCK claimed-pass-but-actually-failed completions." || true
114
+ exit $([ "$fail" -eq 0 ] && echo 0 || echo 1)
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env bash
2
+ # prove-teeth.sh — End-to-end proof that the SHIPPED bundles enforce goal fit
3
+ # (block mode) and re-ground active goals (SessionStart re-injection), through
4
+ # the real install + adapter path, for Claude Code and Codex.
5
+ #
6
+ # This is deterministic (no live model spend): it installs each bundle and runs
7
+ # the installed hook commands with seeded .flow-agents state, exactly as the
8
+ # runtime would on a Stop / SessionStart event.
9
+ #
10
+ # Usage: bash evals/acceptance/prove-teeth.sh
11
+ set -uo pipefail
12
+
13
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
14
+ pass=0; fail=0
15
+ _p(){ echo " ✓ $1"; pass=$((pass+1)); }
16
+ _f(){ echo " ✗ $1"; fail=$((fail+1)); }
17
+
18
+ echo "Building bundles..."
19
+ (cd "$ROOT" && npm run build:bundles >/dev/null 2>&1) || { echo "build failed"; exit 1; }
20
+
21
+ # Extract an installed hook command by event + script-name substring.
22
+ hook_cmd(){ # $1 settings/hooks json, $2 event, $3 script needle
23
+ python3 - "$1" "$2" "$3" <<'PY'
24
+ import json,sys
25
+ s=json.load(open(sys.argv[1]))
26
+ for g in s.get("hooks",{}).get(sys.argv[2],[]):
27
+ for h in g["hooks"]:
28
+ if sys.argv[3] in h["command"]:
29
+ print(h["command"]); sys.exit(0)
30
+ sys.exit(0)
31
+ PY
32
+ }
33
+
34
+ seed_false_completion(){ # $1 project dir — evidence FAIL but markdown claims PASS
35
+ local p="$1"; mkdir -p "$p/.flow-agents/false-done"
36
+ [ -f "$p/AGENTS.md" ] || printf '# Repo\n' > "$p/AGENTS.md"
37
+ printf '%s' '{"schema_version":"1.0","task_slug":"false-done","status":"in_progress","phase":"verification","updated_at":"2026-06-18T00:00:00Z","next_action":{"status":"continue","summary":"Make the failing tests pass."}}' > "$p/.flow-agents/false-done/state.json"
38
+ printf '%s' '{"schema_version":"1.0","task_slug":"false-done","verdict":"fail","checks":[{"id":"unit-tests","kind":"test","status":"fail","summary":"3 unit tests still failing"}]}' > "$p/.flow-agents/false-done/evidence.json"
39
+ cat > "$p/.flow-agents/false-done/false-done--deliver.md" <<'MD'
40
+ # False Done
41
+
42
+ branch: main
43
+ status: executing
44
+ type: deliver
45
+
46
+ ## Definition Of Done
47
+ - [x] all unit tests pass
48
+
49
+ ## Goal Fit Gate
50
+ - [x] acceptance criteria verified
51
+
52
+ ### Verdict: PASS
53
+ MD
54
+ }
55
+
56
+ seed_active_resume(){ # $1 project dir — active in_progress task with a concrete next step
57
+ local p="$1"; mkdir -p "$p/.flow-agents/resume-task"
58
+ [ -f "$p/AGENTS.md" ] || printf '# Repo\n' > "$p/AGENTS.md"
59
+ printf '%s' '{"schema_version":"1.0","task_slug":"resume-task","status":"in_progress","phase":"execution","updated_at":"2026-06-18T00:00:00Z","next_action":{"status":"continue","summary":"Create a file named RESUMED.txt containing the word resumed.","target_phase":"verification"}}' > "$p/.flow-agents/resume-task/state.json"
60
+ }
61
+
62
+ is_block(){ grep -q '"decision":"block"'; }
63
+ has_reground(){ # stdin = adapter json; assert additionalContext re-grounds the goal
64
+ python3 -c "import json,sys
65
+ d=json.load(sys.stdin); ctx=d.get('hookSpecificOutput',{}).get('additionalContext','')
66
+ sys.exit(0 if ('STATE:' in ctx and 'resume-task' in ctx and 'RESUMED.txt' in ctx) else 1)"
67
+ }
68
+
69
+ run_bundle(){ # $1 label, $2 install.sh, $3 settings-json-rel, $4 home-env-name
70
+ local label="$1" installer="$2" cfgrel="$3" homevar="$4"
71
+ echo ""
72
+ echo "── $label: shipped bundle install ──"
73
+ local home proj
74
+ home="$(mktemp -d)"; proj="$(mktemp -d)"
75
+ bash "$installer" "$home" >/dev/null 2>&1 || { _f "$label install.sh failed"; return; }
76
+ local cfg="$home/$cfgrel"
77
+ [ -f "$cfg" ] || { _f "$label config not found at $cfgrel after install"; return; }
78
+ [ -f "$home/scripts/hooks/stop-goal-fit.js" ] || { _f "$label bundle missing scripts/hooks after install"; return; }
79
+
80
+ # --- Teeth 1: false-completion block ---
81
+ seed_false_completion "$proj"
82
+ local stopcmd; stopcmd="$(hook_cmd "$cfg" Stop stop-goal-fit)"
83
+ [ -n "$stopcmd" ] || { _f "$label: no Stop stop-goal-fit hook in shipped config"; return; }
84
+ local blk; blk="$(printf '{"hook_event_name":"Stop","cwd":"%s"}' "$proj" | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" bash -c "$stopcmd" 2>/dev/null)"
85
+ echo "$blk" | is_block && _p "$label BLOCKS false completion by default (evidence=fail vs markdown PASS)" || _f "$label did NOT block: $blk"
86
+ # control: warn mode must pass through
87
+ local wrn; wrn="$(printf '{"hook_event_name":"Stop","cwd":"%s"}' "$proj" | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" FLOW_AGENTS_GOAL_FIT_MODE=warn bash -c "$stopcmd" 2>/dev/null)"
88
+ echo "$wrn" | is_block && _f "$label warn-mode override should NOT block" || _p "$label warn-mode override passes through (control)"
89
+
90
+ # --- Teeth 2: re-ground active goal on SessionStart ---
91
+ local sscmd; sscmd="$(hook_cmd "$cfg" SessionStart workflow-steering)"
92
+ [ -n "$sscmd" ] || { _f "$label: no SessionStart workflow-steering hook in shipped config"; return; }
93
+ seed_active_resume "$proj"
94
+ local rg; rg="$(printf '{"hook_event_name":"SessionStart","cwd":"%s","source":"compact"}' "$proj" | env "$homevar=$home" CLAUDE_PROJECT_DIR="$home" bash -c "$sscmd" 2>/dev/null)"
95
+ echo "$rg" | has_reground && _p "$label RE-GROUNDS the active goal on SessionStart (goal + next step survive compaction)" || _f "$label SessionStart did not re-ground: $rg"
96
+ }
97
+
98
+ run_bundle "Claude Code" "$ROOT/dist/claude-code/install.sh" ".claude/settings.json" "CLAUDE_PROJECT_DIR"
99
+ run_bundle "Codex" "$ROOT/dist/codex/install.sh" ".codex/hooks.json" "CODEX_HOME"
100
+
101
+ echo ""
102
+ echo "──────────────────────────────────"
103
+ echo "prove-teeth: $pass passed, $fail failed"
104
+ [ "$fail" -eq 0 ] && echo "PROOF: shipped Claude Code + Codex bundles enforce goal-fit and re-ground on compaction." || true
105
+ exit $([ "$fail" -eq 0 ] && echo 0 || echo 1)
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env bash
2
+ # Anti-gaming + trust regression suite — the tests that prove the gate, the CI
3
+ # anchor, and their protections actually work. These MUST run in a required CI
4
+ # lane so that weakening the gate code (stop-goal-fit.js, trust-reconcile.js,
5
+ # config-protection.js, evidence-capture.js, flow-resolver.ts, the producer) is
6
+ # caught: if any of these fail, the lane goes red and the change cannot merge.
7
+ #
8
+ # Convergence red-team (Round 5/7) found these ran in NO required check, so an
9
+ # agent could edit the gate and the proof-tests never ran. This closes that.
10
+ set -uo pipefail
11
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
12
+ cd "$ROOT_DIR"
13
+
14
+ TESTS=(
15
+ "evals/acceptance/prove-capture-teeth.sh"
16
+ "evals/acceptance/prove-capture-teeth-declared.sh"
17
+ "evals/integration/test_gate_lockdown.sh"
18
+ "evals/integration/test_gate_bypass_chain.sh"
19
+ "evals/integration/test_reconcile_soundness.sh"
20
+ "evals/integration/test_captured_fail_reconciliation.sh"
21
+ "evals/integration/test_command_log_integrity.sh"
22
+ "evals/integration/test_resolvefirststep_security.sh"
23
+ "evals/integration/test_enforcer_expects_driven.sh"
24
+ "evals/integration/test_goal_fit_rederive.sh"
25
+ "evals/integration/test_flowdef_session_activation.sh"
26
+ "evals/integration/test_trust_reconcile.sh"
27
+ "evals/integration/test_trust_checkpoint.sh"
28
+ "evals/integration/test_checkpoint_signing.sh"
29
+ "evals/integration/test_mint_attestation.sh"
30
+ "evals/integration/test_publish_delivery.sh"
31
+ "evals/integration/test_phase_map_and_gate_claim.sh"
32
+ )
33
+
34
+ fail=0
35
+ for t in "${TESTS[@]}"; do
36
+ if [[ ! -f "$t" ]]; then
37
+ echo "MISSING anti-gaming test: $t — refusing to pass (a removed regression test is a red flag)"
38
+ fail=1
39
+ continue
40
+ fi
41
+ echo "=== anti-gaming: $t ==="
42
+ if bash "$t"; then
43
+ echo " PASS: $t"
44
+ else
45
+ echo " FAIL: $t"
46
+ fail=1
47
+ fi
48
+ done
49
+
50
+ if [[ "$fail" -ne 0 ]]; then
51
+ echo "ANTI-GAMING SUITE FAILED — the gate / CI anchor / protections regressed or a regression test was removed."
52
+ exit 1
53
+ fi
54
+ echo "ANTI-GAMING SUITE PASSED (${#TESTS[@]} tests)."
@@ -39,6 +39,7 @@ CHECKS=(
39
39
  "Telemetry doctor integration|bash evals/integration/test_telemetry_doctor.sh"
40
40
  "Utterance check integration|bash evals/integration/test_utterance_check.sh"
41
41
  "Pull work provider integration|bash evals/integration/test_pull_work_provider.sh"
42
+ "Anti-gaming and trust suite|bash evals/ci/antigaming-suite.sh"
42
43
  "Usage feedback import integration|bash evals/integration/test_usage_feedback_import.sh"
43
44
  "Usage feedback outcomes integration|bash evals/integration/test_usage_feedback_outcomes.sh"
44
45
  "Usage feedback report integration|bash evals/integration/test_usage_feedback_report.sh"
@@ -82,6 +83,7 @@ LANE_RUNTIME_AND_KIT=(
82
83
  "Telemetry doctor integration"
83
84
  "Utterance check integration"
84
85
  "Pull work provider integration"
86
+ "Anti-gaming and trust suite"
85
87
  )
86
88
 
87
89
  LANE_USAGE_FEEDBACK=(
@@ -0,0 +1,26 @@
1
+ {
2
+ "id": "missing.extension.asset.review",
3
+ "version": "1.0",
4
+ "steps": [
5
+ { "id": "review", "next": "done" },
6
+ { "id": "done", "next": null }
7
+ ],
8
+ "gates": {
9
+ "review-gate": {
10
+ "step": "review",
11
+ "expects": [
12
+ {
13
+ "id": "review-evidence",
14
+ "kind": "trust.bundle",
15
+ "required": true,
16
+ "description": "Review evidence has been recorded.",
17
+ "bundle_claim": {
18
+ "claimType": "example.review.evidence",
19
+ "subjectType": "artifact",
20
+ "accepted_statuses": ["trusted", "accepted"]
21
+ }
22
+ }
23
+ ]
24
+ }
25
+ }
26
+ }