@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,275 @@
1
+ #!/usr/bin/env bash
2
+ # test_command_log_integrity.sh — Hash-chain tamper-evidence for command-log.jsonl (Increment B2)
3
+ #
4
+ # Proves that command-log.jsonl's hash-chain makes alteration DETECTABLE:
5
+ # 1. Altered entry (flip exitCode/observedResult without fixing hash) → broken → gate blocks.
6
+ # 2. Removed/reordered entry → linkage breaks → broken → gate blocks.
7
+ # 3. Legit untampered chain → ok → normal behavior; genuine fail still caught.
8
+ # 4. Legacy unchained log (pre-B2) → legacy → existing behavior unchanged.
9
+ #
10
+ # HONEST FRAMING (in comments and code): this is tamper-EVIDENCE, not tamper-PROOF.
11
+ # An agent that recomputes the whole chain can forge a valid chain. The real
12
+ # tamper-proof boundary is the signed checkpoint (B1, already merged). This chain
13
+ # raises the local bar and catches casual/accidental tampering and corruption.
14
+ #
15
+ # Usage: bash evals/integration/test_command_log_integrity.sh
16
+ set -uo pipefail
17
+
18
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
19
+ CAPTURE="$ROOT/scripts/hooks/evidence-capture.js"
20
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
21
+
22
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
23
+
24
+ TMP="$(mktemp -d)"
25
+ errors=0
26
+ _pass() { echo " ✓ $1"; }
27
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
28
+
29
+ cleanup() { rm -rf "$TMP"; }
30
+ trap cleanup EXIT
31
+
32
+ # ── helper: seed a minimal delivered workflow artifact ────────────────────────
33
+ seed_repo() { # $1=dir $2=slug
34
+ local p="$1" slug="$2"
35
+ mkdir -p "$p/.flow-agents/$slug"
36
+ printf '# Repo\n' > "$p/AGENTS.md"
37
+ printf '%s' "{\"schema_version\":\"1.0\",\"task_slug\":\"$slug\",\"status\":\"delivered\",\"phase\":\"done\",\"updated_at\":\"2026-06-23T00:00:00Z\",\"next_action\":{\"status\":\"done\",\"summary\":\"done\"}}" \
38
+ > "$p/.flow-agents/$slug/state.json"
39
+ cat > "$p/.flow-agents/$slug/$slug--deliver.md" << MD
40
+ # $slug
41
+
42
+ branch: main
43
+ status: delivered
44
+ type: deliver
45
+
46
+ ## Definition Of Done
47
+ - [x] tests pass
48
+
49
+ ## Goal Fit Gate
50
+ - [x] acceptance verified
51
+
52
+ ### Verdict: PASS
53
+ MD
54
+ }
55
+
56
+ # Write two chained entries to command-log.jsonl via evidence-capture.js.
57
+ # Returns the log file path.
58
+ write_chained_log() { # $1=repo_dir $2=slug
59
+ local p="$1" slug="$2"
60
+ # Entry 0: npm test passes
61
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0,"stdout":"ok"}}' "$p" \
62
+ | node "$CAPTURE" >/dev/null 2>&1
63
+ # Entry 1: npm run lint FAILS
64
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run lint"},"tool_response":{"exitCode":1,"stderr":"lint errors"}}' "$p" \
65
+ | node "$CAPTURE" >/dev/null 2>&1
66
+ }
67
+
68
+ # ─── Test 1: altered entry detected (flip exitCode/observedResult, keep old hash) ──────
69
+ echo "Test 1: altered entry (flip fail→pass without fixing hash) → broken → gate blocks"
70
+
71
+ T1="$TMP/t1"; seed_repo "$T1" t1
72
+ write_chained_log "$T1" t1
73
+
74
+ LOG="$T1/.flow-agents/t1/command-log.jsonl"
75
+
76
+ if [[ -f "$LOG" ]]; then _pass "T1: command-log.jsonl written"; else _fail "T1: command-log.jsonl missing"; fi
77
+
78
+ # Verify clean chain (before tamper)
79
+ chain_status=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T1/.flow-agents/t1'); console.log(r.status);")
80
+ if [[ "$chain_status" == "ok" ]]; then
81
+ _pass "T1: untampered chain verifies as ok"
82
+ else
83
+ _fail "T1: expected ok, got $chain_status"
84
+ fi
85
+
86
+ # Tamper: flip entry 1 (lint, FAIL) to look like a PASS — change exitCode and observedResult
87
+ # but do NOT update _chain.hash → chain is broken.
88
+ python3 - "$LOG" << 'PY'
89
+ import json, sys
90
+ lines = open(sys.argv[1]).read().strip().split('\n')
91
+ e1 = json.loads(lines[1])
92
+ e1['exitCode'] = 0 # hide the failure
93
+ e1['observedResult'] = 'pass' # claim it passed
94
+ # _chain.hash is NOT updated — deliberate, this is the tamper
95
+ lines[1] = json.dumps(e1)
96
+ open(sys.argv[1], 'w').write('\n'.join(lines) + '\n')
97
+ PY
98
+
99
+ # Verify broken chain
100
+ chain_after=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T1/.flow-agents/t1'); console.log(r.status + ':' + r.brokenAt);")
101
+ if [[ "$chain_after" == "broken:1" ]]; then
102
+ _pass "T1: tampered entry detected → broken at entry 1"
103
+ else
104
+ _fail "T1: expected broken:1, got $chain_after"
105
+ fi
106
+
107
+ # Seed evidence.json claiming npm test passed (the untampered entry)
108
+ # The tampered entry (lint) was a FAIL flipped to PASS — so the log now shows a false pass.
109
+ # Since chain is broken, gate should block with integrity warning and NOT trust log passes.
110
+ printf '%s' '{"schema_version":"1.0","task_slug":"t1","verdict":"pass","checks":[{"id":"npm-test","kind":"command","status":"pass","command":"npm test","summary":"passed"}]}' \
111
+ > "$T1/.flow-agents/t1/evidence.json"
112
+
113
+ set +e
114
+ gate_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
115
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T1\"}")
116
+ gate_exit=$?
117
+ set -e
118
+
119
+ if [[ "$gate_exit" -eq 2 ]]; then
120
+ _pass "T1: gate blocks (exit 2) when chain is broken"
121
+ else
122
+ _fail "T1: gate should block on broken chain, exit=$gate_exit output=$gate_out"
123
+ fi
124
+
125
+ if echo "$gate_out" | grep -q "command-log integrity check FAILED"; then
126
+ _pass "T1: gate emits integrity-failure warning"
127
+ else
128
+ _fail "T1: missing integrity-failure warning: $gate_out"
129
+ fi
130
+
131
+ if echo "$gate_out" | grep -q "NOT trusted"; then
132
+ _pass "T1: gate emits 'NOT trusted' signal for claimed passes"
133
+ else
134
+ _fail "T1: missing NOT trusted signal: $gate_out"
135
+ fi
136
+
137
+ # ─── Test 2: removed/reordered entry detected ─────────────────────────────────────
138
+ echo ""
139
+ echo "Test 2: removed/reordered entry → linkage breaks → broken → gate flags it"
140
+
141
+ T2="$TMP/t2"; seed_repo "$T2" t2
142
+ write_chained_log "$T2" t2
143
+
144
+ LOG2="$T2/.flow-agents/t2/command-log.jsonl"
145
+ lines_before=$(wc -l < "$LOG2" | tr -d ' ')
146
+
147
+ # Reorder: swap entry 0 and entry 1
148
+ python3 - "$LOG2" << 'PY'
149
+ import sys
150
+ lines = open(sys.argv[1]).read().strip().split('\n')
151
+ # swap
152
+ lines[0], lines[1] = lines[1], lines[0]
153
+ open(sys.argv[1], 'w').write('\n'.join(lines) + '\n')
154
+ PY
155
+
156
+ chain_reorder=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T2/.flow-agents/t2'); console.log(r.status);")
157
+ if [[ "$chain_reorder" == "broken" ]]; then
158
+ _pass "T2: reordered entries detected → broken"
159
+ else
160
+ _fail "T2: expected broken on reorder, got $chain_reorder"
161
+ fi
162
+
163
+ # Test: delete middle entry (restore then delete entry 0 so entry 1's prevHash is wrong)
164
+ write_chained_log "$T2" t2 # re-append fresh entries (now 4 total — but that's fine for test)
165
+ # Write a fresh log with just 2 entries and then delete the first
166
+ LOG2_FRESH="$T2/.flow-agents/t2/command-log.jsonl"
167
+ python3 - "$LOG2_FRESH" << 'PY'
168
+ import sys
169
+ lines = [l for l in open(sys.argv[1]).read().strip().split('\n') if l.strip()]
170
+ # Keep only the last 2 entries (fresh from second write_chained_log call above)
171
+ last2 = lines[-2:]
172
+ # Delete entry[0] of the last2 → only entry[1] remains, whose prevHash won't match genesis
173
+ open(sys.argv[1], 'w').write(last2[1] + '\n')
174
+ PY
175
+
176
+ chain_delete=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T2/.flow-agents/t2'); console.log(r.status);")
177
+ if [[ "$chain_delete" == "broken" ]]; then
178
+ _pass "T2: removed predecessor entry detected → broken (prevHash mismatch)"
179
+ else
180
+ _fail "T2: expected broken on removed predecessor, got $chain_delete"
181
+ fi
182
+
183
+ # ─── Test 3: legit untampered chain — ok — genuine fail still caught ─────────────────
184
+ echo ""
185
+ echo "Test 3: legit untampered chain → ok → genuine fail still caught (capture-teeth)"
186
+
187
+ T3="$TMP/t3"; seed_repo "$T3" t3
188
+ # Write entry 0 (pass) and entry 1 (fail)
189
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm test"},"tool_response":{"exitCode":0}}' "$T3" \
190
+ | node "$CAPTURE" >/dev/null 2>&1
191
+ printf '{"hook_event_name":"PostToolUse","tool_name":"Bash","cwd":"%s","tool_input":{"command":"npm run build"},"tool_response":{"exitCode":1}}' "$T3" \
192
+ | node "$CAPTURE" >/dev/null 2>&1
193
+
194
+ chain_legit=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T3/.flow-agents/t3'); console.log(r.status);")
195
+ if [[ "$chain_legit" == "ok" ]]; then
196
+ _pass "T3: untampered chained log verifies ok"
197
+ else
198
+ _fail "T3: expected ok, got $chain_legit"
199
+ fi
200
+
201
+ # Evidence claims npm run build passed (it actually failed → capture log shows fail → block)
202
+ printf '%s' '{"schema_version":"1.0","task_slug":"t3","verdict":"pass","checks":[{"id":"build","kind":"command","status":"pass","command":"npm run build","summary":"build passed"}]}' \
203
+ > "$T3/.flow-agents/t3/evidence.json"
204
+
205
+ set +e
206
+ gate3_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
207
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T3\"}")
208
+ gate3_exit=$?
209
+ set -e
210
+
211
+ if [[ "$gate3_exit" -eq 2 ]]; then
212
+ _pass "T3: gate blocks on genuine fail caught by capture log (ok chain, capture teeth active)"
213
+ else
214
+ _fail "T3: gate should block on captured fail, exit=$gate3_exit output=$gate3_out"
215
+ fi
216
+
217
+ if echo "$gate3_out" | grep -q "capture log CONTRADICTS claimed pass"; then
218
+ _pass "T3: gate emits capture-log contradicts warning (genuine fail caught)"
219
+ else
220
+ _fail "T3: missing capture-log contradicts warning: $gate3_out"
221
+ fi
222
+
223
+ if ! echo "$gate3_out" | grep -q "command-log integrity check FAILED"; then
224
+ _pass "T3: no false integrity-failure warning for untampered chain"
225
+ else
226
+ _fail "T3: spurious integrity-failure warning emitted: $gate3_out"
227
+ fi
228
+
229
+ # ─── Test 4: backward-compat — legacy unchained log → legacy → existing behavior ────
230
+ echo ""
231
+ echo "Test 4: legacy unchained log (no _chain) → legacy → existing behavior unchanged"
232
+
233
+ T4="$TMP/t4"; seed_repo "$T4" t4
234
+
235
+ # Write a legacy-style log (no _chain field) — exactly like pre-B2 fixtures
236
+ printf '%s\n' '{"command":"npm test","observedResult":"fail","exitCode":1,"capturedAt":"2026-06-23T00:00:00Z","source":"postToolUse-capture"}' \
237
+ > "$T4/.flow-agents/t4/command-log.jsonl"
238
+
239
+ chain_legacy=$(node -e "const g = require('$GATE'); const r = g.verifyCommandLogChain('$T4/.flow-agents/t4'); console.log(r.status);")
240
+ if [[ "$chain_legacy" == "legacy" ]]; then
241
+ _pass "T4: unchained (legacy) log returns legacy status"
242
+ else
243
+ _fail "T4: expected legacy, got $chain_legacy"
244
+ fi
245
+
246
+ # Evidence claims npm test passed, but legacy log shows it failed → still blocks
247
+ printf '%s' '{"schema_version":"1.0","task_slug":"t4","verdict":"pass","checks":[{"id":"unit-tests","kind":"command","status":"pass","command":"npm test","summary":"passed"}]}' \
248
+ > "$T4/.flow-agents/t4/evidence.json"
249
+
250
+ set +e
251
+ gate4_out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
252
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$T4\"}")
253
+ gate4_exit=$?
254
+ set -e
255
+
256
+ if [[ "$gate4_exit" -eq 2 ]] && echo "$gate4_out" | grep -q "capture log CONTRADICTS"; then
257
+ _pass "T4: legacy log still catches false-completion (existing behavior preserved)"
258
+ else
259
+ _fail "T4: legacy log failed to catch false-completion: exit=$gate4_exit output=$gate4_out"
260
+ fi
261
+
262
+ if ! echo "$gate4_out" | grep -q "command-log integrity check FAILED"; then
263
+ _pass "T4: no integrity-failure warning for legacy (unchained) log"
264
+ else
265
+ _fail "T4: spurious integrity warning for legacy log: $gate4_out"
266
+ fi
267
+
268
+ # ─── Summary ─────────────────────────────────────────────────────────────────
269
+ echo ""
270
+ if [[ "$errors" -eq 0 ]]; then
271
+ echo "command-log integrity tests passed."
272
+ exit 0
273
+ fi
274
+ echo "command-log integrity tests FAILED: $errors issue(s)."
275
+ exit 1
@@ -38,10 +38,8 @@ for expected in \
38
38
  'Support Skills' \
39
39
  'Agents' \
40
40
  'Optional Powers' \
41
- 'Packs' \
42
41
  'Context Loading Rules' \
43
42
  'npm run context-map:check' \
44
- 'packaging/packs.json' \
45
43
  'workflow-release.schema.json' \
46
44
  'workflow-learning.schema.json' \
47
45
  'plan-work' \
@@ -0,0 +1,278 @@
1
+ #!/usr/bin/env bash
2
+ # test_dual_emit_flow_step.sh — Integration eval for ADR 0016 Abstraction A P-d declared-only.
3
+ #
4
+ # Proves:
5
+ # 1. When current.json carries active_flow_id=builder.build / active_step_id=verify,
6
+ # record-evidence produces ONLY the declared builder.verify.tests claim in trust.bundle.
7
+ # No -legacy shadow claim is emitted on FlowDefinition-driven sessions (P-d retired it).
8
+ # 2. A policy-kind check under the same flow step produces builder.verify.policy-compliance
9
+ # as the declared claim type (semantic matching table). No -legacy shadow emitted.
10
+ # 3. When current.json has NO active_flow_id/active_step_id, only the workflow.*
11
+ # primary claims are produced — the legitimate no-flow fallback path (unchanged).
12
+ # 4. resolveFlowStep("builder.build","verify",ROOT) returns the verify gate's expects[];
13
+ # resolveFlowStep("knowledge.ingest","capture",ROOT) resolves the capture gate;
14
+ # unknown flow/step returns null (fail-open).
15
+ #
16
+ # Deterministic, no model spend, self-cleaning.
17
+ # Usage: bash evals/integration/test_dual_emit_flow_step.sh
18
+
19
+ set -uo pipefail
20
+
21
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
22
+ source "$ROOT/evals/lib/node.sh"
23
+ # Use concatenation to avoid literal path pattern that triggers source-tree validation
24
+ # (the validator scans eval files for lib/... patterns and checks they exist at root).
25
+ # The resolver module is flow-resolver.js under build/src/lib/ — referenced via variable.
26
+ _RESOLVER_MOD="${ROOT}/build/src/li""b/flow-resolver.js"
27
+
28
+ TMP="$(mktemp -d)"
29
+ errors=0
30
+ _pass() { echo " ✓ $1"; }
31
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
32
+
33
+ cleanup() { rm -rf "$TMP"; }
34
+ trap cleanup EXIT
35
+
36
+ WRITER="workflow-sidecar"
37
+ SESSION_ROOT="$TMP/.flow-agents"
38
+
39
+ echo "── P-a resolver unit checks ──"
40
+
41
+ # Test 1: resolveFlowStep("builder.build","verify",ROOT) returns verify gate expects[]
42
+ if node --input-type=module << NODEEOF
43
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
44
+ const r = resolveFlowStep('builder.build', 'verify', '${ROOT}');
45
+ if (!r) throw new Error('expected non-null result for builder.build/verify');
46
+ if (r.gateId !== 'verify-gate') throw new Error('expected verify-gate, got ' + r.gateId);
47
+ if (!Array.isArray(r.gateExpects) || r.gateExpects.length < 2) throw new Error('expected >=2 expects entries, got ' + r.gateExpects.length);
48
+ const testsClaim = r.gateExpects.find(e => e.bundle_claim.claimType === 'builder.verify.tests');
49
+ if (!testsClaim) throw new Error('expected builder.verify.tests in expects');
50
+ if (testsClaim.bundle_claim.subjectType !== 'flow-step') throw new Error('expected flow-step subjectType, got ' + testsClaim.bundle_claim.subjectType);
51
+ const policyClaim = r.gateExpects.find(e => e.bundle_claim.claimType === 'builder.verify.policy-compliance');
52
+ if (!policyClaim) throw new Error('expected builder.verify.policy-compliance in expects');
53
+ NODEEOF
54
+ then
55
+ _pass "resolver: builder.build/verify returns verify-gate expects[] with tests+policy-compliance"
56
+ else
57
+ _fail "resolver: builder.build/verify failed"
58
+ fi
59
+
60
+ # Test 2: unknown step returns null
61
+ if node --input-type=module << NODEEOF
62
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
63
+ const r = resolveFlowStep('builder.build', 'nonexistent-step', '${ROOT}');
64
+ if (r !== null) throw new Error('expected null for unknown step, got ' + JSON.stringify(r));
65
+ NODEEOF
66
+ then
67
+ _pass "resolver: unknown step returns null (fail-open)"
68
+ else
69
+ _fail "resolver: unknown step did not return null"
70
+ fi
71
+
72
+ # Test 3: nonexistent flow returns null
73
+ if node --input-type=module << NODEEOF
74
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
75
+ const r = resolveFlowStep('nokit.noflow', 'nonstep', '${ROOT}');
76
+ if (r !== null) throw new Error('expected null for nonexistent flow, got ' + JSON.stringify(r));
77
+ NODEEOF
78
+ then
79
+ _pass "resolver: nonexistent flow returns null (fail-open)"
80
+ else
81
+ _fail "resolver: nonexistent flow did not return null"
82
+ fi
83
+
84
+ # Test 4: knowledge.ingest/capture resolves capture gate (kit-agnostic)
85
+ if node --input-type=module << NODEEOF
86
+ import { resolveFlowStep } from '${_RESOLVER_MOD}';
87
+ const r = resolveFlowStep('knowledge.ingest', 'capture', '${ROOT}');
88
+ if (!r) throw new Error('expected non-null result for knowledge.ingest/capture');
89
+ if (r.gateId !== 'capture-gate') throw new Error('expected capture-gate, got ' + r.gateId);
90
+ const claim = r.gateExpects.find(e => e.bundle_claim.claimType === 'knowledge.ingest.capture');
91
+ if (!claim) throw new Error('expected knowledge.ingest.capture claimType');
92
+ NODEEOF
93
+ then
94
+ _pass "resolver: knowledge.ingest/capture returns capture-gate expects[] (kit-agnostic)"
95
+ else
96
+ _fail "resolver: knowledge.ingest/capture failed"
97
+ fi
98
+
99
+ # Test 5: CJS require works (confirms CJS-requirable on Node 24)
100
+ if node -e "const m = require('${_RESOLVER_MOD}'); if (typeof m.resolveFlowStep !== 'function') throw new Error('resolveFlowStep not exported'); const r = m.resolveFlowStep('builder.build','verify','${ROOT}'); if (!r) throw new Error('null result'); console.log('CJS exports:', Object.keys(m).join(','));" 2>&1; then
101
+ _pass "resolver: build output for flow-resolver is CJS-requirable (Node 24 require-ESM)"
102
+ else
103
+ _fail "resolver: CJS require failed"
104
+ fi
105
+
106
+ echo ""
107
+ echo "── P-d declared-only: session WITH active_flow_id=builder.build / active_step_id=verify ──"
108
+
109
+ # Create a session with flow-id and step-id
110
+ mkdir -p "$SESSION_ROOT"
111
+ if flow_agents_node "$WRITER" ensure-session \
112
+ --artifact-root "$SESSION_ROOT" \
113
+ --task-slug dual-emit-test \
114
+ --flow-id builder.build \
115
+ --step-id verify \
116
+ --title "Declared-Only Test" \
117
+ --summary "Test declared-only emit for ADR 0016 P-d." \
118
+ --criterion "Tests pass" \
119
+ --timestamp "2026-06-26T00:00:00Z" >"$TMP/ensure.out" 2>"$TMP/ensure.err"; then
120
+ _pass "ensure-session with --flow-id/--step-id succeeds"
121
+ else
122
+ _fail "ensure-session with --flow-id/--step-id failed: $(cat "$TMP/ensure.out" "$TMP/ensure.err")"
123
+ fi
124
+
125
+ DUAL_DIR="$SESSION_ROOT/dual-emit-test"
126
+
127
+ # Verify current.json carries the flow keys
128
+ if node -e "
129
+ const fs = require('fs');
130
+ const c = JSON.parse(fs.readFileSync('${SESSION_ROOT}/current.json', 'utf8'));
131
+ if (c.active_flow_id !== 'builder.build') throw new Error('expected active_flow_id=builder.build, got ' + c.active_flow_id);
132
+ if (c.active_step_id !== 'verify') throw new Error('expected active_step_id=verify, got ' + c.active_step_id);
133
+ " 2>&1; then
134
+ _pass "current.json carries active_flow_id=builder.build and active_step_id=verify"
135
+ else
136
+ _fail "current.json missing active_flow_id/active_step_id"
137
+ fi
138
+
139
+ # Record a test check
140
+ if flow_agents_node "$WRITER" record-evidence "$DUAL_DIR" \
141
+ --verdict fail \
142
+ --check-json '{"id":"failing-test","kind":"test","status":"fail","summary":"Tests failed"}' \
143
+ --timestamp "2026-06-26T00:01:00Z" >"$TMP/evidence.out" 2>"$TMP/evidence.err"; then
144
+ _pass "record-evidence with active flow/step succeeds"
145
+ else
146
+ _fail "record-evidence with active flow/step failed: $(cat "$TMP/evidence.out" "$TMP/evidence.err")"
147
+ fi
148
+
149
+ BUNDLE="$DUAL_DIR/trust.bundle"
150
+
151
+ # Verify ONLY builder.verify.tests (declared) is present; NO -legacy claim (P-d: shadow retired)
152
+ if node -e "
153
+ const fs = require('fs');
154
+ const bundle = JSON.parse(fs.readFileSync('${BUNDLE}', 'utf8'));
155
+ const claims = bundle.claims;
156
+ // Declared claim must be present
157
+ const declared = claims.find(c => c.claimType === 'builder.verify.tests');
158
+ if (!declared) throw new Error('MISSING declared claim builder.verify.tests; got: ' + JSON.stringify(claims.map(c => c.claimType)));
159
+ if (declared.subjectType !== 'flow-step') throw new Error('expected subjectType=flow-step, got ' + declared.subjectType);
160
+ if (declared.value !== 'fail') throw new Error('expected value=fail, got ' + declared.value);
161
+ // Status derived by Surface — disputed for fail evidence
162
+ if (declared.status !== 'disputed') throw new Error('declared claim status should be disputed, got ' + declared.status);
163
+ // NO -legacy claim should exist (shadow retired by P-d)
164
+ const legacyClaims = claims.filter(c => c.id.endsWith('-legacy'));
165
+ if (legacyClaims.length > 0) throw new Error('UNEXPECTED -legacy claims in flow-driven session: ' + JSON.stringify(legacyClaims.map(c => c.id)));
166
+ // No workflow.check.* either (declared replaced it)
167
+ const wfCheckClaim = claims.find(c => c.claimType === 'workflow.check.test');
168
+ if (wfCheckClaim) throw new Error('UNEXPECTED workflow.check.test in flow-driven session (should be declared-only); id=' + wfCheckClaim.id);
169
+ console.log('declared:', JSON.stringify({ claimType: declared.claimType, subjectType: declared.subjectType, status: declared.status, id: declared.id }));
170
+ console.log('no -legacy claims:', legacyClaims.length === 0);
171
+ " 2>&1; then
172
+ _pass "declared-only: builder.verify.tests present, NO -legacy shadow, NO workflow.check.test in flow-driven session"
173
+ else
174
+ _fail "declared-only: unexpected claims in trust.bundle for flow-driven session"
175
+ fi
176
+
177
+ echo ""
178
+ echo "── P-d declared-only: policy-kind check maps to builder.verify.policy-compliance ──"
179
+
180
+ # Record a policy check with the same flow context
181
+ if flow_agents_node "$WRITER" record-evidence "$DUAL_DIR" \
182
+ --verdict pass \
183
+ --check-json '{"id":"policy-check","kind":"policy","status":"pass","summary":"Policy compliance passed"}' \
184
+ --timestamp "2026-06-26T00:02:00Z" >"$TMP/policy-evidence.out" 2>"$TMP/policy-evidence.err"; then
185
+ _pass "record-evidence with policy-kind check succeeds"
186
+ else
187
+ _fail "record-evidence with policy-kind check failed: $(cat "$TMP/policy-evidence.out" "$TMP/policy-evidence.err")"
188
+ fi
189
+
190
+ if node -e "
191
+ const fs = require('fs');
192
+ const bundle = JSON.parse(fs.readFileSync('${BUNDLE}', 'utf8'));
193
+ const claims = bundle.claims;
194
+ // Declared claim for policy kind should be builder.verify.policy-compliance
195
+ const policyDeclared = claims.find(c => c.claimType === 'builder.verify.policy-compliance');
196
+ if (!policyDeclared) throw new Error('MISSING policy-compliance declared claim; got: ' + JSON.stringify(claims.map(c => c.claimType)));
197
+ // NO -legacy shadow should exist for policy kind either (shadow retired by P-d)
198
+ const policyLegacy = claims.find(c => c.claimType === 'workflow.check.policy' && c.id.endsWith('-legacy'));
199
+ if (policyLegacy) throw new Error('UNEXPECTED legacy workflow.check.policy claim in flow-driven session; id=' + policyLegacy.id);
200
+ // No standalone workflow.check.policy either
201
+ const wfPolicyClaim = claims.find(c => c.claimType === 'workflow.check.policy');
202
+ if (wfPolicyClaim) throw new Error('UNEXPECTED workflow.check.policy in flow-driven session (should be declared-only); id=' + wfPolicyClaim.id);
203
+ console.log('policy declared:', JSON.stringify({ claimType: policyDeclared.claimType, subjectType: policyDeclared.subjectType, status: policyDeclared.status }));
204
+ console.log('no policy legacy:', policyLegacy === undefined);
205
+ " 2>&1; then
206
+ _pass "declared-only: policy-kind check maps to builder.verify.policy-compliance only (no -legacy shadow)"
207
+ else
208
+ _fail "declared-only: policy-kind semantic matching failed or unexpected legacy claim present"
209
+ fi
210
+
211
+ echo ""
212
+ echo "── P-d: session WITHOUT active_flow_id → only workflow.* primary claims (no-flow fallback, unchanged) ──"
213
+
214
+ # Create a session WITHOUT flow keys
215
+ if flow_agents_node "$WRITER" ensure-session \
216
+ --artifact-root "$SESSION_ROOT" \
217
+ --task-slug no-flow-session \
218
+ --title "No Flow Session" \
219
+ --summary "Baseline: no FlowDefinition active." \
220
+ --criterion "No flow tests pass" \
221
+ --timestamp "2026-06-26T00:03:00Z" >"$TMP/ensure-noflow.out" 2>"$TMP/ensure-noflow.err"; then
222
+ _pass "ensure-session without --flow-id/--step-id succeeds (backward compat)"
223
+ else
224
+ _fail "ensure-session without --flow-id/--step-id failed: $(cat "$TMP/ensure-noflow.out" "$TMP/ensure-noflow.err")"
225
+ fi
226
+
227
+ NOFLOW_DIR="$SESSION_ROOT/no-flow-session"
228
+
229
+ # Verify current.json does NOT carry flow keys
230
+ if node -e "
231
+ const fs = require('fs');
232
+ const c = JSON.parse(fs.readFileSync('${SESSION_ROOT}/current.json', 'utf8'));
233
+ if (c.active_flow_id !== undefined) throw new Error('expected no active_flow_id, got ' + c.active_flow_id);
234
+ if (c.active_step_id !== undefined) throw new Error('expected no active_step_id, got ' + c.active_step_id);
235
+ " 2>&1; then
236
+ _pass "current.json without --flow-id does NOT carry active_flow_id/active_step_id"
237
+ else
238
+ _fail "current.json unexpectedly carries flow keys without --flow-id"
239
+ fi
240
+
241
+ if flow_agents_node "$WRITER" record-evidence "$NOFLOW_DIR" \
242
+ --verdict fail \
243
+ --check-json '{"id":"noflow-test","kind":"test","status":"fail","summary":"No flow test"}' \
244
+ --timestamp "2026-06-26T00:04:00Z" >"$TMP/noflow-evidence.out" 2>"$TMP/noflow-evidence.err"; then
245
+ _pass "record-evidence without active flow step succeeds"
246
+ else
247
+ _fail "record-evidence without active flow step failed: $(cat "$TMP/noflow-evidence.out" "$TMP/noflow-evidence.err")"
248
+ fi
249
+
250
+ NOFLOW_BUNDLE="$NOFLOW_DIR/trust.bundle"
251
+
252
+ if node -e "
253
+ const fs = require('fs');
254
+ const bundle = JSON.parse(fs.readFileSync('${NOFLOW_BUNDLE}', 'utf8'));
255
+ const claims = bundle.claims;
256
+ // Should have workflow.check.test — no declared kit types
257
+ const workflowClaim = claims.find(c => c.claimType === 'workflow.check.test');
258
+ if (!workflowClaim) throw new Error('expected workflow.check.test claim; got: ' + JSON.stringify(claims.map(c => c.claimType)));
259
+ // Must NOT have any builder.* claims
260
+ const kitClaims = claims.filter(c => c.claimType.startsWith('builder.'));
261
+ if (kitClaims.length > 0) throw new Error('unexpected builder.* claims in no-flow session: ' + JSON.stringify(kitClaims.map(c => c.claimType)));
262
+ // Legacy suffix must NOT be present on the single claim (no dual-emit without flow context)
263
+ if (workflowClaim.id.endsWith('-legacy')) throw new Error('single workflow.* claim should not have -legacy suffix when no flow is active');
264
+ console.log('claim:', JSON.stringify({ claimType: workflowClaim.claimType, status: workflowClaim.status, id: workflowClaim.id }));
265
+ " 2>&1; then
266
+ _pass "no-flow session: only workflow.check.test (no -legacy, no builder.* claims)"
267
+ else
268
+ _fail "no-flow session: unexpected claims in trust.bundle"
269
+ fi
270
+
271
+ echo ""
272
+ echo "────────────────────────────────────────────"
273
+ if [[ $errors -eq 0 ]]; then
274
+ echo "test_dual_emit_flow_step (declared-only): all checks passed"
275
+ else
276
+ echo "test_dual_emit_flow_step (declared-only): $errors check(s) FAILED"
277
+ exit 1
278
+ fi