@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,278 @@
1
+ #!/usr/bin/env bash
2
+ # test_dual_emit_flow_step.sh — Integration eval for ADR 0016 Abstraction A P-d declared-only.
3
+ #
4
+ # Proves:
5
+ # 1. When current.json carries active_flow_id=builder.build / active_step_id=verify,
6
+ # record-evidence produces ONLY the declared builder.verify.tests claim in trust.bundle.
7
+ # No -legacy shadow claim is emitted on FlowDefinition-driven sessions (P-d retired it).
8
+ # 2. A policy-kind check under the same flow step produces builder.verify.policy-compliance
9
+ # as the declared claim type (semantic matching table). No -legacy shadow emitted.
10
+ # 3. When current.json has NO active_flow_id/active_step_id, only the workflow.*
11
+ # primary claims are produced — the legitimate no-flow fallback path (unchanged).
12
+ # 4. resolveFlowStep("builder.build","verify",ROOT) returns the verify gate's expects[];
13
+ # resolveFlowStep("knowledge.ingest","capture",ROOT) resolves the capture gate;
14
+ # unknown flow/step returns null (fail-open).
15
+ #
16
+ # Deterministic, no model spend, self-cleaning.
17
+ # Usage: bash evals/integration/test_dual_emit_flow_step.sh
18
+
19
+ set -uo pipefail
20
+
21
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
22
+ source "$ROOT/evals/lib/node.sh"
23
+ # Use concatenation to avoid literal path pattern that triggers source-tree validation
24
+ # (the validator scans eval files for lib/... patterns and checks they exist at root).
25
+ # The resolver module is flow-resolver.js under build/src/lib/ — referenced via variable.
26
+ _RESOLVER_MOD="${ROOT}/build/src/li""b/flow-resolver.js"
27
+
28
+ TMP="$(mktemp -d)"
29
+ errors=0
30
+ _pass() { echo " ✓ $1"; }
31
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
32
+
33
+ cleanup() { rm -rf "$TMP"; }
34
+ trap cleanup EXIT
35
+
36
+ WRITER="workflow-sidecar"
37
+ SESSION_ROOT="$TMP/.flow-agents"
38
+
39
+ echo "── P-a resolver unit checks ──"
40
+
41
+ # Test 1: resolveFlowStep("builder.build","verify",ROOT) returns verify gate expects[]
42
+ if node --input-type=module << NODEEOF
43
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
44
+ const r = resolveFlowStep('builder.build', 'verify', '${ROOT}');
45
+ if (!r) throw new Error('expected non-null result for builder.build/verify');
46
+ if (r.gateId !== 'verify-gate') throw new Error('expected verify-gate, got ' + r.gateId);
47
+ if (!Array.isArray(r.gateExpects) || r.gateExpects.length < 2) throw new Error('expected >=2 expects entries, got ' + r.gateExpects.length);
48
+ const testsClaim = r.gateExpects.find(e => e.bundle_claim.claimType === 'builder.verify.tests');
49
+ if (!testsClaim) throw new Error('expected builder.verify.tests in expects');
50
+ if (testsClaim.bundle_claim.subjectType !== 'flow-step') throw new Error('expected flow-step subjectType, got ' + testsClaim.bundle_claim.subjectType);
51
+ const policyClaim = r.gateExpects.find(e => e.bundle_claim.claimType === 'builder.verify.policy-compliance');
52
+ if (!policyClaim) throw new Error('expected builder.verify.policy-compliance in expects');
53
+ NODEEOF
54
+ then
55
+ _pass "resolver: builder.build/verify returns verify-gate expects[] with tests+policy-compliance"
56
+ else
57
+ _fail "resolver: builder.build/verify failed"
58
+ fi
59
+
60
+ # Test 2: unknown step returns null
61
+ if node --input-type=module << NODEEOF
62
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
63
+ const r = resolveFlowStep('builder.build', 'nonexistent-step', '${ROOT}');
64
+ if (r !== null) throw new Error('expected null for unknown step, got ' + JSON.stringify(r));
65
+ NODEEOF
66
+ then
67
+ _pass "resolver: unknown step returns null (fail-open)"
68
+ else
69
+ _fail "resolver: unknown step did not return null"
70
+ fi
71
+
72
+ # Test 3: nonexistent flow returns null
73
+ if node --input-type=module << NODEEOF
74
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
75
+ const r = resolveFlowStep('nokit.noflow', 'nonstep', '${ROOT}');
76
+ if (r !== null) throw new Error('expected null for nonexistent flow, got ' + JSON.stringify(r));
77
+ NODEEOF
78
+ then
79
+ _pass "resolver: nonexistent flow returns null (fail-open)"
80
+ else
81
+ _fail "resolver: nonexistent flow did not return null"
82
+ fi
83
+
84
+ # Test 4: knowledge.ingest/capture resolves capture gate (kit-agnostic)
85
+ if node --input-type=module << NODEEOF
86
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
87
+ const r = resolveFlowStep('knowledge.ingest', 'capture', '${ROOT}');
88
+ if (!r) throw new Error('expected non-null result for knowledge.ingest/capture');
89
+ if (r.gateId !== 'capture-gate') throw new Error('expected capture-gate, got ' + r.gateId);
90
+ const claim = r.gateExpects.find(e => e.bundle_claim.claimType === 'knowledge.ingest.capture');
91
+ if (!claim) throw new Error('expected knowledge.ingest.capture claimType');
92
+ NODEEOF
93
+ then
94
+ _pass "resolver: knowledge.ingest/capture returns capture-gate expects[] (kit-agnostic)"
95
+ else
96
+ _fail "resolver: knowledge.ingest/capture failed"
97
+ fi
98
+
99
+ # Test 5: CJS require works (confirms CJS-requirable on Node 24)
100
+ if node -e "const m = require('${_RESOLVER_MOD}'); if (typeof m.resolveFlowStep !== 'function') throw new Error('resolveFlowStep not exported'); const r = m.resolveFlowStep('builder.build','verify','${ROOT}'); if (!r) throw new Error('null result'); console.log('CJS exports:', Object.keys(m).join(','));" 2>&1; then
101
+ _pass "resolver: build output for flow-resolver is CJS-requirable (Node 24 require-ESM)"
102
+ else
103
+ _fail "resolver: CJS require failed"
104
+ fi
105
+
106
+ echo ""
107
+ echo "── P-d declared-only: session WITH active_flow_id=builder.build / active_step_id=verify ──"
108
+
109
+ # Create a session with flow-id and step-id
110
+ mkdir -p "$SESSION_ROOT"
111
+ if flow_agents_node "$WRITER" ensure-session \
112
+ --artifact-root "$SESSION_ROOT" \
113
+ --task-slug dual-emit-test \
114
+ --flow-id builder.build \
115
+ --step-id verify \
116
+ --title "Declared-Only Test" \
117
+ --summary "Test declared-only emit for ADR 0016 P-d." \
118
+ --criterion "Tests pass" \
119
+ --timestamp "2026-06-26T00:00:00Z" >"$TMP/ensure.out" 2>"$TMP/ensure.err"; then
120
+ _pass "ensure-session with --flow-id/--step-id succeeds"
121
+ else
122
+ _fail "ensure-session with --flow-id/--step-id failed: $(cat "$TMP/ensure.out" "$TMP/ensure.err")"
123
+ fi
124
+
125
+ DUAL_DIR="$SESSION_ROOT/dual-emit-test"
126
+
127
+ # Verify current.json carries the flow keys
128
+ if node -e "
129
+ const fs = require('fs');
130
+ const c = JSON.parse(fs.readFileSync('${SESSION_ROOT}/current.json', 'utf8'));
131
+ if (c.active_flow_id !== 'builder.build') throw new Error('expected active_flow_id=builder.build, got ' + c.active_flow_id);
132
+ if (c.active_step_id !== 'verify') throw new Error('expected active_step_id=verify, got ' + c.active_step_id);
133
+ " 2>&1; then
134
+ _pass "current.json carries active_flow_id=builder.build and active_step_id=verify"
135
+ else
136
+ _fail "current.json missing active_flow_id/active_step_id"
137
+ fi
138
+
139
+ # Record a test check
140
+ if flow_agents_node "$WRITER" record-evidence "$DUAL_DIR" \
141
+ --verdict fail \
142
+ --check-json '{"id":"failing-test","kind":"test","status":"fail","summary":"Tests failed"}' \
143
+ --timestamp "2026-06-26T00:01:00Z" >"$TMP/evidence.out" 2>"$TMP/evidence.err"; then
144
+ _pass "record-evidence with active flow/step succeeds"
145
+ else
146
+ _fail "record-evidence with active flow/step failed: $(cat "$TMP/evidence.out" "$TMP/evidence.err")"
147
+ fi
148
+
149
+ BUNDLE="$DUAL_DIR/trust.bundle"
150
+
151
+ # Verify ONLY builder.verify.tests (declared) is present; NO -legacy claim (P-d: shadow retired)
152
+ if node -e "
153
+ const fs = require('fs');
154
+ const bundle = JSON.parse(fs.readFileSync('${BUNDLE}', 'utf8'));
155
+ const claims = bundle.claims;
156
+ // Declared claim must be present
157
+ const declared = claims.find(c => c.claimType === 'builder.verify.tests');
158
+ if (!declared) throw new Error('MISSING declared claim builder.verify.tests; got: ' + JSON.stringify(claims.map(c => c.claimType)));
159
+ if (declared.subjectType !== 'flow-step') throw new Error('expected subjectType=flow-step, got ' + declared.subjectType);
160
+ if (declared.value !== 'fail') throw new Error('expected value=fail, got ' + declared.value);
161
+ // Status derived by Surface — disputed for fail evidence
162
+ if (declared.status !== 'disputed') throw new Error('declared claim status should be disputed, got ' + declared.status);
163
+ // NO -legacy claim should exist (shadow retired by P-d)
164
+ const legacyClaims = claims.filter(c => c.id.endsWith('-legacy'));
165
+ if (legacyClaims.length > 0) throw new Error('UNEXPECTED -legacy claims in flow-driven session: ' + JSON.stringify(legacyClaims.map(c => c.id)));
166
+ // No workflow.check.* either (declared replaced it)
167
+ const wfCheckClaim = claims.find(c => c.claimType === 'workflow.check.test');
168
+ if (wfCheckClaim) throw new Error('UNEXPECTED workflow.check.test in flow-driven session (should be declared-only); id=' + wfCheckClaim.id);
169
+ console.log('declared:', JSON.stringify({ claimType: declared.claimType, subjectType: declared.subjectType, status: declared.status, id: declared.id }));
170
+ console.log('no -legacy claims:', legacyClaims.length === 0);
171
+ " 2>&1; then
172
+ _pass "declared-only: builder.verify.tests present, NO -legacy shadow, NO workflow.check.test in flow-driven session"
173
+ else
174
+ _fail "declared-only: unexpected claims in trust.bundle for flow-driven session"
175
+ fi
176
+
177
+ echo ""
178
+ echo "── P-d declared-only: policy-kind check maps to builder.verify.policy-compliance ──"
179
+
180
+ # Record a policy check with the same flow context
181
+ if flow_agents_node "$WRITER" record-evidence "$DUAL_DIR" \
182
+ --verdict pass \
183
+ --check-json '{"id":"policy-check","kind":"policy","status":"pass","summary":"Policy compliance passed"}' \
184
+ --timestamp "2026-06-26T00:02:00Z" >"$TMP/policy-evidence.out" 2>"$TMP/policy-evidence.err"; then
185
+ _pass "record-evidence with policy-kind check succeeds"
186
+ else
187
+ _fail "record-evidence with policy-kind check failed: $(cat "$TMP/policy-evidence.out" "$TMP/policy-evidence.err")"
188
+ fi
189
+
190
+ if node -e "
191
+ const fs = require('fs');
192
+ const bundle = JSON.parse(fs.readFileSync('${BUNDLE}', 'utf8'));
193
+ const claims = bundle.claims;
194
+ // Declared claim for policy kind should be builder.verify.policy-compliance
195
+ const policyDeclared = claims.find(c => c.claimType === 'builder.verify.policy-compliance');
196
+ if (!policyDeclared) throw new Error('MISSING policy-compliance declared claim; got: ' + JSON.stringify(claims.map(c => c.claimType)));
197
+ // NO -legacy shadow should exist for policy kind either (shadow retired by P-d)
198
+ const policyLegacy = claims.find(c => c.claimType === 'workflow.check.policy' && c.id.endsWith('-legacy'));
199
+ if (policyLegacy) throw new Error('UNEXPECTED legacy workflow.check.policy claim in flow-driven session; id=' + policyLegacy.id);
200
+ // No standalone workflow.check.policy either
201
+ const wfPolicyClaim = claims.find(c => c.claimType === 'workflow.check.policy');
202
+ if (wfPolicyClaim) throw new Error('UNEXPECTED workflow.check.policy in flow-driven session (should be declared-only); id=' + wfPolicyClaim.id);
203
+ console.log('policy declared:', JSON.stringify({ claimType: policyDeclared.claimType, subjectType: policyDeclared.subjectType, status: policyDeclared.status }));
204
+ console.log('no policy legacy:', policyLegacy === undefined);
205
+ " 2>&1; then
206
+ _pass "declared-only: policy-kind check maps to builder.verify.policy-compliance only (no -legacy shadow)"
207
+ else
208
+ _fail "declared-only: policy-kind semantic matching failed or unexpected legacy claim present"
209
+ fi
210
+
211
+ echo ""
212
+ echo "── P-d: session WITHOUT active_flow_id → only workflow.* primary claims (no-flow fallback, unchanged) ──"
213
+
214
+ # Create a session WITHOUT flow keys
215
+ if flow_agents_node "$WRITER" ensure-session \
216
+ --artifact-root "$SESSION_ROOT" \
217
+ --task-slug no-flow-session \
218
+ --title "No Flow Session" \
219
+ --summary "Baseline: no FlowDefinition active." \
220
+ --criterion "No flow tests pass" \
221
+ --timestamp "2026-06-26T00:03:00Z" >"$TMP/ensure-noflow.out" 2>"$TMP/ensure-noflow.err"; then
222
+ _pass "ensure-session without --flow-id/--step-id succeeds (backward compat)"
223
+ else
224
+ _fail "ensure-session without --flow-id/--step-id failed: $(cat "$TMP/ensure-noflow.out" "$TMP/ensure-noflow.err")"
225
+ fi
226
+
227
+ NOFLOW_DIR="$SESSION_ROOT/no-flow-session"
228
+
229
+ # Verify current.json does NOT carry flow keys
230
+ if node -e "
231
+ const fs = require('fs');
232
+ const c = JSON.parse(fs.readFileSync('${SESSION_ROOT}/current.json', 'utf8'));
233
+ if (c.active_flow_id !== undefined) throw new Error('expected no active_flow_id, got ' + c.active_flow_id);
234
+ if (c.active_step_id !== undefined) throw new Error('expected no active_step_id, got ' + c.active_step_id);
235
+ " 2>&1; then
236
+ _pass "current.json without --flow-id does NOT carry active_flow_id/active_step_id"
237
+ else
238
+ _fail "current.json unexpectedly carries flow keys without --flow-id"
239
+ fi
240
+
241
+ if flow_agents_node "$WRITER" record-evidence "$NOFLOW_DIR" \
242
+ --verdict fail \
243
+ --check-json '{"id":"noflow-test","kind":"test","status":"fail","summary":"No flow test"}' \
244
+ --timestamp "2026-06-26T00:04:00Z" >"$TMP/noflow-evidence.out" 2>"$TMP/noflow-evidence.err"; then
245
+ _pass "record-evidence without active flow step succeeds"
246
+ else
247
+ _fail "record-evidence without active flow step failed: $(cat "$TMP/noflow-evidence.out" "$TMP/noflow-evidence.err")"
248
+ fi
249
+
250
+ NOFLOW_BUNDLE="$NOFLOW_DIR/trust.bundle"
251
+
252
+ if node -e "
253
+ const fs = require('fs');
254
+ const bundle = JSON.parse(fs.readFileSync('${NOFLOW_BUNDLE}', 'utf8'));
255
+ const claims = bundle.claims;
256
+ // Should have workflow.check.test — no declared kit types
257
+ const workflowClaim = claims.find(c => c.claimType === 'workflow.check.test');
258
+ if (!workflowClaim) throw new Error('expected workflow.check.test claim; got: ' + JSON.stringify(claims.map(c => c.claimType)));
259
+ // Must NOT have any builder.* claims
260
+ const kitClaims = claims.filter(c => c.claimType.startsWith('builder.'));
261
+ if (kitClaims.length > 0) throw new Error('unexpected builder.* claims in no-flow session: ' + JSON.stringify(kitClaims.map(c => c.claimType)));
262
+ // Legacy suffix must NOT be present on the single claim (no dual-emit without flow context)
263
+ if (workflowClaim.id.endsWith('-legacy')) throw new Error('single workflow.* claim should not have -legacy suffix when no flow is active');
264
+ console.log('claim:', JSON.stringify({ claimType: workflowClaim.claimType, status: workflowClaim.status, id: workflowClaim.id }));
265
+ " 2>&1; then
266
+ _pass "no-flow session: only workflow.check.test (no -legacy, no builder.* claims)"
267
+ else
268
+ _fail "no-flow session: unexpected claims in trust.bundle"
269
+ fi
270
+
271
+ echo ""
272
+ echo "────────────────────────────────────────────"
273
+ if [[ $errors -eq 0 ]]; then
274
+ echo "test_dual_emit_flow_step (declared-only): all checks passed"
275
+ else
276
+ echo "test_dual_emit_flow_step (declared-only): $errors check(s) FAILED"
277
+ exit 1
278
+ fi
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env bash
2
+ # test_enforcer_expects_driven.sh — Integration eval for ADR 0016 Abstraction A P-c.
3
+ #
4
+ # Proves:
5
+ # 1. A TAMPERED declared-type bundle BLOCKS (exit 2) with the tamper/disputed
6
+ # warning. Session has current.json with active_flow_id=builder.build,
7
+ # active_step_id=verify. trust.bundle has a builder.verify.tests claim with
8
+ # stored status "verified" but evidence passing=false (re-derives to disputed).
9
+ # This exercises the expects[] claim-selection path in bundleEnforcement.
10
+ # 2. A CLEAN declared-type bundle PASSES (exit 0). Same session, same claimType,
11
+ # but passing evidence → re-derives to verified.
12
+ # 3. A NO-ACTIVE-FLOW bundle uses the workflow.* fallback (the workflow.check.*
13
+ # path): a tampered workflow.check.command claim still BLOCKS. current.json
14
+ # has no active_flow_id/active_step_id.
15
+ #
16
+ # Deterministic, no model spend, self-cleaning.
17
+ # Usage: bash evals/integration/test_enforcer_expects_driven.sh
18
+
19
+ set -uo pipefail
20
+
21
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
22
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
23
+
24
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
25
+
26
+ TMP="$(mktemp -d)"
27
+ errors=0
28
+ _pass() { echo " ✓ $1"; }
29
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
30
+
31
+ cleanup() { rm -rf "$TMP"; }
32
+ trap cleanup EXIT
33
+
34
+ # ─── helper: seed a minimal delivered workflow artifact ───────────────────────
35
+ seed_repo() { # $1=dir $2=slug
36
+ local p="$1" slug="$2"
37
+ mkdir -p "$p/.flow-agents/$slug"
38
+ printf '# Repo\n' > "$p/AGENTS.md"
39
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-26T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
40
+ > "$p/.flow-agents/$slug/state.json"
41
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
42
+ # $slug
43
+
44
+ branch: main
45
+ status: delivered
46
+ type: deliver
47
+
48
+ ## Definition Of Done
49
+ - [x] tests pass
50
+
51
+ ## Goal Fit Gate
52
+ - [x] acceptance verified
53
+
54
+ ### Verdict: PASS
55
+ MD
56
+ }
57
+
58
+ # ─── Test 1: TAMPERED declared-type bundle BLOCKS via expects[] path ─────────
59
+ # current.json has active_flow_id=builder.build, active_step_id=verify.
60
+ # The trust.bundle has builder.verify.tests (declared by verify-gate expects[]),
61
+ # stored status "verified" but evidence passing=false → re-derives to "disputed".
62
+ # The enforcer must use the expects[] path and BLOCK with the tamper warning.
63
+ echo "Test 1: tampered declared-type bundle (builder.verify.tests, stored verified, evidence→disputed) must BLOCK via expects[] path"
64
+
65
+ T1_DIR="$TMP/t1"
66
+ seed_repo "$T1_DIR" "declares-tampered"
67
+
68
+ # current.json: active flow
69
+ printf '%s' '{"artifact_dir":"declares-tampered","active_flow_id":"builder.build","active_step_id":"verify"}' \
70
+ > "$T1_DIR/.flow-agents/current.json"
71
+
72
+ python3 - "$T1_DIR/.flow-agents/declares-tampered/trust.bundle" << 'PY'
73
+ import json, sys
74
+ bundle = {
75
+ "schemaVersion": 3,
76
+ "source": "flow-agents/workflow-sidecar",
77
+ "claims": [{
78
+ "id": "c1",
79
+ "subjectId": "declares-tampered/tests",
80
+ "subjectType": "flow-step",
81
+ "claimType": "builder.verify.tests",
82
+ "fieldOrBehavior": "build/verify tests",
83
+ "value": "pass",
84
+ "impactLevel": "high",
85
+ "status": "verified", # tampered: edited from "disputed" → "verified"
86
+ "createdAt": "2026-06-26T00:00:00Z",
87
+ "updatedAt": "2026-06-26T00:00:00Z"
88
+ }],
89
+ "evidence": [{
90
+ "id": "ev1",
91
+ "claimId": "c1",
92
+ "evidenceType": "test_output",
93
+ "method": "validation",
94
+ "sourceRef": "command-log.jsonl",
95
+ "excerptOrSummary": "npm test failed with exit 1",
96
+ "observedAt": "2026-06-26T00:00:00Z",
97
+ "collectedBy": "harness",
98
+ "passing": False,
99
+ "blocking": True
100
+ }],
101
+ "policies": [],
102
+ "events": [{
103
+ "id": "evt1",
104
+ "claimId": "c1",
105
+ "status": "verified",
106
+ "actor": "agent",
107
+ "method": "workflow-check",
108
+ "evidenceIds": ["ev1"],
109
+ "createdAt": "2026-06-26T00:00:00Z"
110
+ }]
111
+ }
112
+ json.dump(bundle, open(sys.argv[1], 'w'))
113
+ PY
114
+
115
+ set +e
116
+ t1_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
117
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T1_DIR\"}")"
118
+ t1_exit="$?"
119
+ set -e
120
+
121
+ if [ "$t1_exit" -eq 2 ]; then
122
+ _pass "tampered declared-type bundle blocks (exit 2)"
123
+ else
124
+ _fail "tampered declared-type bundle did NOT block: exit=$t1_exit output=$t1_out"
125
+ fi
126
+
127
+ if echo "$t1_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle"; then
128
+ _pass "tampered declared-type bundle emits tamper warning"
129
+ else
130
+ _fail "tampered declared-type bundle missing tamper warning: $t1_out"
131
+ fi
132
+
133
+ if echo "$t1_out" | grep -q "caught false-completion"; then
134
+ _pass "tampered declared-type bundle emits caught false-completion"
135
+ else
136
+ _fail "tampered declared-type bundle missing caught false-completion: $t1_out"
137
+ fi
138
+
139
+ if echo "$t1_out" | grep -q "builder.verify.tests"; then
140
+ _pass "tampered declared-type bundle warning names the declared claimType"
141
+ else
142
+ _fail "tampered declared-type bundle warning does not mention builder.verify.tests: $t1_out"
143
+ fi
144
+
145
+ # ─── Test 2: CLEAN declared-type bundle PASSES ───────────────────────────────
146
+ # Same session, same claimType, but passing evidence → re-derives to verified.
147
+ # Must NOT block.
148
+ echo ""
149
+ echo "Test 2: clean declared-type bundle (builder.verify.tests, passing evidence→verified) must ALLOW"
150
+
151
+ T2_DIR="$TMP/t2"
152
+ seed_repo "$T2_DIR" "declares-clean"
153
+
154
+ printf '%s' '{"artifact_dir":"declares-clean","active_flow_id":"builder.build","active_step_id":"verify"}' \
155
+ > "$T2_DIR/.flow-agents/current.json"
156
+
157
+ python3 - "$T2_DIR/.flow-agents/declares-clean/trust.bundle" << 'PY'
158
+ import json, sys
159
+ bundle = {
160
+ "schemaVersion": 3,
161
+ "source": "flow-agents/workflow-sidecar",
162
+ "claims": [{
163
+ "id": "c2",
164
+ "subjectId": "declares-clean/tests",
165
+ "subjectType": "flow-step",
166
+ "claimType": "builder.verify.tests",
167
+ "fieldOrBehavior": "build/verify tests",
168
+ "value": "pass",
169
+ "impactLevel": "high",
170
+ "status": "verified",
171
+ "createdAt": "2026-06-26T00:00:00Z",
172
+ "updatedAt": "2026-06-26T00:00:00Z"
173
+ }],
174
+ "evidence": [{
175
+ "id": "ev2",
176
+ "claimId": "c2",
177
+ "evidenceType": "test_output",
178
+ "method": "validation",
179
+ "sourceRef": "command-log.jsonl",
180
+ "excerptOrSummary": "npm test passed",
181
+ "observedAt": "2026-06-26T00:00:00Z",
182
+ "collectedBy": "harness",
183
+ "passing": True,
184
+ "blocking": False
185
+ }],
186
+ "policies": [],
187
+ "events": [{
188
+ "id": "evt2",
189
+ "claimId": "c2",
190
+ "status": "verified",
191
+ "actor": "agent",
192
+ "method": "workflow-check",
193
+ "evidenceIds": ["ev2"],
194
+ "createdAt": "2026-06-26T00:00:00Z"
195
+ }]
196
+ }
197
+ json.dump(bundle, open(sys.argv[1], 'w'))
198
+ PY
199
+
200
+ set +e
201
+ t2_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
202
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T2_DIR\"}")"
203
+ t2_exit="$?"
204
+ set -e
205
+
206
+ if [ "$t2_exit" -ne 2 ]; then
207
+ _pass "clean declared-type bundle not blocked (exit $t2_exit)"
208
+ else
209
+ _fail "clean declared-type bundle false-blocked (exit 2): $t2_out"
210
+ fi
211
+
212
+ if echo "$t2_out" | grep -q "caught false-completion"; then
213
+ _fail "clean declared-type bundle incorrectly emits false-completion: $t2_out"
214
+ else
215
+ _pass "clean declared-type bundle does not emit false-completion"
216
+ fi
217
+
218
+ # ─── Test 3: NO-ACTIVE-FLOW bundle uses workflow.* fallback path ─────────────
219
+ # current.json has NO active_flow_id/active_step_id (or no current.json at all).
220
+ # The trust.bundle has workflow.check.command claims with stored "disputed".
221
+ # Must still BLOCK via the workflow.* path (no regression from #133).
222
+ echo ""
223
+ echo "Test 3: no-active-flow bundle must use workflow.* fallback and still BLOCK"
224
+
225
+ T3_DIR="$TMP/t3"
226
+ seed_repo "$T3_DIR" "no-flow"
227
+
228
+ # No current.json flow keys (empty current.json that is still valid)
229
+ printf '%s' '{"artifact_dir":"no-flow"}' \
230
+ > "$T3_DIR/.flow-agents/current.json"
231
+
232
+ python3 - "$T3_DIR/.flow-agents/no-flow/trust.bundle" << 'PY'
233
+ import json, sys
234
+ bundle = {
235
+ "schemaVersion": 3,
236
+ "source": "flow-agents/workflow-sidecar",
237
+ "claims": [{
238
+ "id": "c3",
239
+ "subjectId": "no-flow/unit-tests",
240
+ "subjectType": "workflow-check",
241
+ "claimType": "workflow.check.command",
242
+ "fieldOrBehavior": "unit tests",
243
+ "value": "fail",
244
+ "impactLevel": "high",
245
+ "status": "disputed", # stored as disputed (not tampered — correctly flagged)
246
+ "createdAt": "2026-06-26T00:00:00Z",
247
+ "updatedAt": "2026-06-26T00:00:00Z"
248
+ }],
249
+ "evidence": [],
250
+ "policies": [],
251
+ "events": []
252
+ }
253
+ json.dump(bundle, open(sys.argv[1], 'w'))
254
+ PY
255
+
256
+ set +e
257
+ t3_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
258
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3_DIR\"}")"
259
+ t3_exit="$?"
260
+ set -e
261
+
262
+ if [ "$t3_exit" -eq 2 ]; then
263
+ _pass "no-active-flow bundle still blocks via workflow.* fallback (exit 2)"
264
+ else
265
+ _fail "no-active-flow bundle did NOT block (exit $t3_exit): $t3_out"
266
+ fi
267
+
268
+ if echo "$t3_out" | grep -q "caught false-completion"; then
269
+ _pass "no-active-flow bundle emits caught false-completion"
270
+ else
271
+ _fail "no-active-flow bundle missing caught false-completion: $t3_out"
272
+ fi
273
+
274
+ # ─── Summary ─────────────────────────────────────────────────────────────────
275
+ echo ""
276
+ if [ "$errors" -eq 0 ]; then
277
+ echo "P-c enforcer expects-driven tests passed."
278
+ exit 0
279
+ fi
280
+ echo "P-c enforcer expects-driven tests FAILED: $errors issue(s)."
281
+ exit 1