@kontourai/flow-agents 1.4.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/runtime-compat.yml +1 -1
  8. package/.github/workflows/trust-reconcile.yml +113 -0
  9. package/AGENTS.md +13 -0
  10. package/CHANGELOG.md +103 -0
  11. package/CONTRIBUTING.md +4 -4
  12. package/README.md +1 -0
  13. package/agents/tool-planner.json +1 -1
  14. package/build/src/cli/init.js +242 -20
  15. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  16. package/build/src/cli/verify.d.ts +1 -0
  17. package/build/src/cli/verify.js +90 -0
  18. package/build/src/cli/workflow-sidecar.d.ts +316 -8
  19. package/build/src/cli/workflow-sidecar.js +1996 -91
  20. package/build/src/cli.js +2 -3
  21. package/build/src/lib/flow-resolver.d.ts +111 -0
  22. package/build/src/lib/flow-resolver.js +308 -0
  23. package/build/src/tools/build-universal-bundles.js +34 -22
  24. package/build/src/tools/generate-context-map.js +3 -16
  25. package/build/src/tools/validate-source-tree.d.ts +1 -1
  26. package/build/src/tools/validate-source-tree.js +42 -162
  27. package/context/contracts/artifact-contract.md +10 -0
  28. package/context/contracts/delivery-contract.md +1 -0
  29. package/context/contracts/review-contract.md +1 -0
  30. package/context/contracts/verification-contract.md +2 -0
  31. package/context/gate-awareness.md +39 -0
  32. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  33. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  34. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  35. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  36. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  37. package/docs/adr/0007-skill-audit.md +1 -1
  38. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  39. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  40. package/docs/adr/0011-mcp-posture.md +100 -0
  41. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  42. package/docs/adr/0013-context-lifecycle.md +151 -0
  43. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  44. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  45. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  46. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  47. package/docs/agent-system-guidebook.md +5 -12
  48. package/docs/context-map.md +4 -10
  49. package/docs/index.md +3 -2
  50. package/docs/integrations/framework-adapter.md +19 -6
  51. package/docs/integrations/index.md +2 -2
  52. package/docs/north-star.md +4 -4
  53. package/docs/operating-layers.md +3 -3
  54. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  55. package/docs/repository-structure.md +2 -2
  56. package/docs/skills-map.md +1 -0
  57. package/docs/spec/runtime-hook-surface.md +62 -9
  58. package/docs/standards-register.md +3 -3
  59. package/docs/survey-utterance-check.md +1 -1
  60. package/docs/trust-anchor-adoption.md +197 -0
  61. package/docs/verifiable-trust.md +95 -0
  62. package/docs/veritas-integration.md +2 -2
  63. package/docs/workflow-usage-guide.md +69 -0
  64. package/evals/acceptance/DEMO-false-completion.md +144 -0
  65. package/evals/acceptance/demo-cast.sh +92 -0
  66. package/evals/acceptance/demo-false-completion.sh +72 -0
  67. package/evals/acceptance/demo-real-evidence.sh +104 -0
  68. package/evals/acceptance/demo.tape +29 -0
  69. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  70. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  71. package/evals/acceptance/prove-teeth.sh +105 -0
  72. package/evals/ci/antigaming-suite.sh +55 -0
  73. package/evals/ci/run-baseline.sh +2 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  75. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  77. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  78. package/evals/integration/test_builder_step_producers.sh +379 -0
  79. package/evals/integration/test_bundle_install.sh +35 -71
  80. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  81. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  82. package/evals/integration/test_checkpoint_signing.sh +489 -0
  83. package/evals/integration/test_claim_lookup.sh +352 -0
  84. package/evals/integration/test_command_log_fork_classification.sh +134 -0
  85. package/evals/integration/test_command_log_integrity.sh +275 -0
  86. package/evals/integration/test_context_map.sh +0 -2
  87. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  88. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  89. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  90. package/evals/integration/test_flow_kit_repository.sh +2 -0
  91. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  92. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  93. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  94. package/evals/integration/test_gate_lockdown.sh +1137 -0
  95. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  96. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  97. package/evals/integration/test_goal_fit_hook.sh +69 -4
  98. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  99. package/evals/integration/test_install_merge.sh +1176 -0
  100. package/evals/integration/test_kit_identity_trust.sh +393 -0
  101. package/evals/integration/test_mint_attestation.sh +373 -0
  102. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  103. package/evals/integration/test_publish_delivery.sh +269 -0
  104. package/evals/integration/test_reconcile_soundness.sh +528 -0
  105. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  106. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  107. package/evals/integration/test_trust_checkpoint.sh +325 -0
  108. package/evals/integration/test_trust_reconcile.sh +293 -0
  109. package/evals/integration/test_verify_cli.sh +208 -0
  110. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  111. package/evals/lib/node.sh +0 -6
  112. package/evals/run.sh +47 -0
  113. package/evals/static/test_workflow_skills.sh +6 -13
  114. package/install.sh +0 -7
  115. package/integrations/strands-ts/README.md +25 -15
  116. package/integrations/veritas/flow-agents.adapter.json +1 -2
  117. package/kits/builder/flows/build.flow.json +59 -12
  118. package/kits/builder/kit.json +85 -15
  119. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  120. package/kits/builder/skills/deliver/SKILL.md +36 -6
  121. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  122. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  123. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  124. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  125. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  126. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  127. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  128. package/kits/knowledge/adapters/default-store/index.js +38 -0
  129. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  130. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  131. package/kits/knowledge/docs/store-contract.md +314 -0
  132. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  133. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  134. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  135. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  136. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  137. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  138. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  139. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  140. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  141. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  142. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  143. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  144. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  145. package/kits/knowledge/kit.json +51 -1
  146. package/package.json +6 -6
  147. package/packaging/conformance/README.md +10 -2
  148. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  151. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  152. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  153. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  154. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  155. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  156. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  157. package/packaging/conformance/run-conformance.js +1 -1
  158. package/scripts/README.md +2 -1
  159. package/scripts/build-universal-bundles.js +0 -1
  160. package/scripts/ci/mint-attestation.js +221 -0
  161. package/scripts/ci/trust-reconcile.js +545 -0
  162. package/scripts/hooks/config-protection.js +423 -1
  163. package/scripts/hooks/evidence-capture.js +348 -0
  164. package/scripts/hooks/lib/liveness-read.js +113 -0
  165. package/scripts/hooks/run-hook.js +6 -1
  166. package/scripts/hooks/stop-goal-fit.js +1524 -79
  167. package/scripts/hooks/workflow-steering.js +135 -5
  168. package/scripts/install-codex-home.sh +39 -0
  169. package/scripts/install-merge.js +330 -0
  170. package/scripts/repair-command-log.js +115 -0
  171. package/src/cli/init.ts +218 -20
  172. package/src/cli/validate-workflow-artifacts.ts +18 -2
  173. package/src/cli/verify.ts +100 -0
  174. package/src/cli/workflow-sidecar.ts +2127 -84
  175. package/src/cli.ts +2 -3
  176. package/src/lib/flow-resolver.ts +369 -0
  177. package/src/tools/build-universal-bundles.ts +34 -21
  178. package/src/tools/generate-context-map.ts +3 -17
  179. package/src/tools/validate-source-tree.ts +44 -104
  180. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  181. package/build/src/tools/filter-installed-packs.js +0 -135
  182. package/packaging/packs.json +0 -49
  183. package/scripts/filter-installed-packs.js +0 -2
  184. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env bash
2
+ # test_flowdef_session_activation.sh — Integration eval for ADR 0016 Step 1.
3
+ #
4
+ # Proves that ensure-session --flow-id builder.build activates the FlowDefinition-
5
+ # driven path so producers fire, gates enforce on builder.* claims, and advance-state
6
+ # correctly sets active_step_id via the phase_map at each phase.
7
+ #
8
+ # Tests:
9
+ # 1. ensure-session --flow-id builder.build writes active_flow_id + default
10
+ # active_step_id (pull-work) to current.json.
11
+ # 2. advance-state through phases (planning→execution→verification) sets correct
12
+ # active_step_id via phase_map at each transition.
13
+ # 3. At the verify step, record-gate-claim for tests-evidence produces
14
+ # builder.verify.tests (status=verified) in the bundle — producer fires.
15
+ # 4. A TAMPERED builder.verify.tests bundle at the verify step BLOCKS (exit 2)
16
+ # with the tamper warning naming the declared claimType.
17
+ # 5. Fallback: session without --flow-id produces only workflow.* claims (the
18
+ # retained safety net for non-flow sessions).
19
+ #
20
+ # Deterministic, no model spend, self-cleaning.
21
+ # Usage: bash evals/integration/test_flowdef_session_activation.sh
22
+
23
+ set -uo pipefail
24
+
25
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
26
+ source "$ROOT/evals/lib/node.sh"
27
+ GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
28
+
29
+ export FLOW_AGENTS_GOAL_FIT_MAX_BLOCKS=100000
30
+
31
+ TMP="$(mktemp -d)"
32
+ errors=0
33
+
34
+ _pass() { echo " ✓ $1"; }
35
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
36
+
37
+ cleanup() { rm -rf "$TMP"; }
38
+ trap cleanup EXIT
39
+
40
+ WRITER="workflow-sidecar"
41
+
42
+ # ─── TEST 1: ensure-session --flow-id activates the flow ─────────────────────
43
+ echo ""
44
+ echo "=== 1. ensure-session --flow-id builder.build activates FlowDefinition-driven path ==="
45
+
46
+ MAIN_AROOT="$TMP/main-aroot"
47
+ SLUG="activation-test"
48
+ SESSION_DIR="$MAIN_AROOT/$SLUG"
49
+ mkdir -p "$MAIN_AROOT"
50
+
51
+ flow_agents_node "$WRITER" ensure-session \
52
+ --artifact-root "$MAIN_AROOT" \
53
+ --task-slug "$SLUG" \
54
+ --title "Step 1 activation test" \
55
+ --summary "Test that --flow-id builder.build activates the FlowDefinition-driven path." \
56
+ --criterion "All gates produce declared claims" \
57
+ --flow-id builder.build \
58
+ --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
59
+
60
+ node -e "
61
+ const fs = require('fs');
62
+ const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
63
+ if (c.active_flow_id !== 'builder.build') throw new Error('expected active_flow_id=builder.build, got ' + c.active_flow_id);
64
+ if (!c.active_step_id) throw new Error('expected active_step_id to be set (first step default), got ' + c.active_step_id);
65
+ console.log('current.json: active_flow_id=' + c.active_flow_id + ' active_step_id=' + c.active_step_id);
66
+ " 2>&1 \
67
+ && _pass "ensure-session --flow-id builder.build writes active_flow_id + default active_step_id to current.json" \
68
+ || _fail "ensure-session --flow-id builder.build did NOT write active_flow_id to current.json"
69
+
70
+ # ─── TEST 2: advance-state sets active_step_id via phase_map ─────────────────
71
+ echo ""
72
+ echo "=== 2. advance-state through phases sets active_step_id via phase_map ==="
73
+
74
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
75
+ --source-request "Test" --summary "Testing" \
76
+ --timestamp "2026-06-01T00:00:30Z" >/dev/null 2>&1
77
+
78
+ test_phase_step() {
79
+ local phase="$1" expected_step="$2"
80
+ flow_agents_node "$WRITER" advance-state "$SESSION_DIR" \
81
+ --status in_progress --phase "$phase" \
82
+ --summary "Testing phase $phase." \
83
+ --next-action "Continue." \
84
+ --flow-definition builder.build \
85
+ --timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
86
+ local actual
87
+ actual=$(node -e "
88
+ const fs = require('fs');
89
+ const c = JSON.parse(fs.readFileSync('$MAIN_AROOT/current.json', 'utf8'));
90
+ console.log(c.active_step_id || '');
91
+ " 2>/dev/null)
92
+ if [ "$actual" = "$expected_step" ]; then
93
+ _pass "advance-state phase=$phase → active_step_id=$expected_step"
94
+ else
95
+ _fail "advance-state phase=$phase → got active_step_id=$actual (expected $expected_step)"
96
+ fi
97
+ }
98
+
99
+ test_phase_step "planning" "plan"
100
+ test_phase_step "execution" "execute"
101
+ test_phase_step "verification" "verify"
102
+
103
+ # ─── TEST 3: at verify step, record-gate-claim produces builder.verify.tests ──
104
+ echo ""
105
+ echo "=== 3. verify step: producer fires — record-gate-claim produces builder.verify.tests ==="
106
+
107
+ if flow_agents_node "$WRITER" record-gate-claim "$SESSION_DIR" \
108
+ --status pass \
109
+ --summary "All tests pass." \
110
+ --expectation "tests-evidence" \
111
+ --timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1; then
112
+ _pass "record-gate-claim at verify step succeeds (expectation=tests-evidence)"
113
+ else
114
+ _fail "record-gate-claim at verify step FAILED"
115
+ fi
116
+
117
+ node -e "
118
+ const fs = require('fs');
119
+ const bundlePath = '$SESSION_DIR/trust.bundle';
120
+ if (!fs.existsSync(bundlePath)) throw new Error('trust.bundle not found');
121
+ const bundle = JSON.parse(fs.readFileSync(bundlePath, 'utf8'));
122
+ const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
123
+ if (!declared) throw new Error('MISSING builder.verify.tests; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
124
+ if (declared.status !== 'verified') throw new Error('expected status=verified, got ' + declared.status);
125
+ console.log('builder.verify.tests: subjectType=' + declared.subjectType + ' status=' + declared.status + ' value=' + declared.value);
126
+ " 2>&1 \
127
+ && _pass "bundle contains builder.verify.tests (subjectType=flow-step, status=verified, value=pass)" \
128
+ || _fail "bundle missing or incorrect builder.verify.tests claim"
129
+
130
+ # ─── TEST 4: tampered bundle at verify step BLOCKS ────────────────────────────
131
+ echo ""
132
+ echo "=== 4. tamper-blocks: builder.verify.tests — tampered bundle triggers gate exit 2 ==="
133
+
134
+ TAMPER_DIR="$TMP/tamper-verify"
135
+ TAMPER_SLUG="tamper-verify-test"
136
+ mkdir -p "$TAMPER_DIR"
137
+ printf '# Test repo\n' > "$TAMPER_DIR/AGENTS.md"
138
+ mkdir -p "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG"
139
+
140
+ flow_agents_node "$WRITER" ensure-session \
141
+ --artifact-root "$TAMPER_DIR/.flow-agents" \
142
+ --task-slug "$TAMPER_SLUG" \
143
+ --title "Tamper verify test" \
144
+ --summary "Testing tamper detection at verify step." \
145
+ --flow-id builder.build \
146
+ --step-id verify \
147
+ --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
148
+
149
+ flow_agents_node "$WRITER" init-plan "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/$TAMPER_SLUG--deliver.md" \
150
+ --source-request "Test" --summary "Tamper test" \
151
+ --timestamp "2026-06-01T02:00:00Z" >/dev/null 2>&1
152
+
153
+ flow_agents_node "$WRITER" advance-state "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG" \
154
+ --status in_progress --phase verification \
155
+ --summary "At verify." --next-action "Continue." \
156
+ --flow-definition builder.build \
157
+ --timestamp "2026-06-01T02:00:30Z" >/dev/null 2>&1
158
+
159
+ # Write TAMPERED trust.bundle: stored verified, evidence passing=false
160
+ python3 - "$TAMPER_DIR/.flow-agents/$TAMPER_SLUG/trust.bundle" << 'PY'
161
+ import json, sys
162
+ bundle = {
163
+ "schemaVersion": 3,
164
+ "source": "flow-agents/workflow-sidecar",
165
+ "claims": [{
166
+ "id": "c1",
167
+ "subjectId": "tamper-verify-test/verify-tests",
168
+ "subjectType": "flow-step",
169
+ "claimType": "builder.verify.tests",
170
+ "fieldOrBehavior": "Tests pass",
171
+ "value": "pass",
172
+ "impactLevel": "high",
173
+ "status": "verified",
174
+ "createdAt": "2026-06-01T02:00:00Z",
175
+ "updatedAt": "2026-06-01T02:00:00Z"
176
+ }],
177
+ "evidence": [{
178
+ "id": "ev1",
179
+ "claimId": "c1",
180
+ "evidenceType": "test_output",
181
+ "method": "validation",
182
+ "sourceRef": "command-log.jsonl",
183
+ "excerptOrSummary": "tests FAILED",
184
+ "observedAt": "2026-06-01T02:00:00Z",
185
+ "collectedBy": "harness",
186
+ "passing": False,
187
+ "blocking": True
188
+ }],
189
+ "policies": [],
190
+ "events": [{
191
+ "id": "evt1",
192
+ "claimId": "c1",
193
+ "status": "verified",
194
+ "actor": "agent",
195
+ "method": "workflow-check",
196
+ "evidenceIds": ["ev1"],
197
+ "createdAt": "2026-06-01T02:00:00Z"
198
+ }]
199
+ }
200
+ json.dump(bundle, open(sys.argv[1], 'w'))
201
+ PY
202
+
203
+ set +e
204
+ tamper_out="$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip \
205
+ node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$TAMPER_DIR\"}")"
206
+ tamper_exit="$?"
207
+ set -e
208
+
209
+ if [ "$tamper_exit" -eq 2 ]; then
210
+ _pass "gate BLOCKS tampered builder.verify.tests bundle (exit 2)"
211
+ else
212
+ _fail "gate did NOT block tampered bundle: exit=$tamper_exit"
213
+ fi
214
+
215
+ if echo "$tamper_out" | grep -qE "stored status.*does not match recompute|possible tampered bundle|caught false-completion"; then
216
+ _pass "gate emits tamper warning for builder.verify.tests"
217
+ else
218
+ _fail "gate tamper warning missing from output: $tamper_out"
219
+ fi
220
+
221
+ if echo "$tamper_out" | grep -q "builder.verify.tests"; then
222
+ _pass "gate tamper warning names declared claimType builder.verify.tests"
223
+ else
224
+ _fail "gate tamper warning does not name builder.verify.tests: $tamper_out"
225
+ fi
226
+
227
+ # ─── TEST 5: Fallback — session without --flow-id (workflow.* only, safety net) ─
228
+ echo ""
229
+ echo "=== 5. Fallback: session without --flow-id produces only workflow.* claims (safety net intact) ==="
230
+
231
+ FALLBACK_AROOT="$TMP/fallback-aroot"
232
+ FALLBACK_SLUG="fallback-test"
233
+ FALLBACK_DIR="$FALLBACK_AROOT/$FALLBACK_SLUG"
234
+ mkdir -p "$FALLBACK_AROOT"
235
+
236
+ flow_agents_node "$WRITER" ensure-session \
237
+ --artifact-root "$FALLBACK_AROOT" \
238
+ --task-slug "$FALLBACK_SLUG" \
239
+ --title "Fallback no-flow test" \
240
+ --summary "No --flow-id: workflow.* fallback is the safety net for non-flow sessions." \
241
+ --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
242
+
243
+ flow_agents_node "$WRITER" init-plan "$FALLBACK_DIR/$FALLBACK_SLUG--deliver.md" \
244
+ --source-request "Test" --summary "Testing fallback." \
245
+ --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
246
+
247
+ flow_agents_node "$WRITER" record-evidence "$FALLBACK_DIR" \
248
+ --verdict pass \
249
+ --check-json '{"id":"fallback-check","kind":"test","status":"pass","summary":"Fallback test passes"}' \
250
+ --timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
251
+
252
+ node -e "
253
+ const fs = require('fs');
254
+ const bundle = JSON.parse(fs.readFileSync('$FALLBACK_DIR/trust.bundle', 'utf8'));
255
+ const claims = bundle.claims || [];
256
+ const wfClaim = claims.find(c => c.claimType === 'workflow.check.test');
257
+ const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
258
+ if (!wfClaim) throw new Error('MISSING workflow.check.test in fallback session');
259
+ if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in fallback session: ' + builderClaims.map(c=>c.claimType).join(', '));
260
+ if (wfClaim.id.endsWith('-legacy')) throw new Error('workflow.check.test should not have -legacy suffix when no flow active');
261
+ console.log('fallback: only workflow.check.test present (no builder.* claims, no -legacy suffix)');
262
+ " 2>&1 \
263
+ && _pass "fallback (no --flow-id): only workflow.check.test produced, builder.* absent (producers dormant)" \
264
+ || _fail "fallback (no --flow-id): unexpected claims in trust.bundle"
265
+
266
+ # ─── Summary ──────────────────────────────────────────────────────────────────
267
+ echo ""
268
+ if [ "$errors" -eq 0 ]; then
269
+ echo "test_flowdef_session_activation: all checks passed."
270
+ exit 0
271
+ fi
272
+ echo "test_flowdef_session_activation: $errors check(s) FAILED."
273
+ exit 1
@@ -0,0 +1,250 @@
1
+ #!/usr/bin/env bash
2
+ # test_flowdef_session_history_preservation.sh — Integration eval for ADR 0016 Step 0.
3
+ #
4
+ # Proves:
5
+ # 1. A FlowDefinition-driven session (ensure-session --flow-id builder.build, step=verify)
6
+ # records a check via the declared builder.verify.tests path, then record-critique and
7
+ # record-learning PRESERVE the prior declared check + critique claims in the rebuilt
8
+ # bundle (no history loss).
9
+ # 2. A workflow.* session (no --flow-id) record-critique/record-learning round-trip is
10
+ # UNCHANGED — only workflow.check.* and workflow.critique.review claims survive.
11
+ # 3. evidenceClean/critiqueClean return correct results for a builder.* bundle:
12
+ # checked by running dogfood-pass --verdict pass on a clean builder.build session.
13
+ #
14
+ # Deterministic, no model spend, self-cleaning.
15
+ # Usage: bash evals/integration/test_flowdef_session_history_preservation.sh
16
+
17
+ set -uo pipefail
18
+
19
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
20
+ source "$ROOT/evals/lib/node.sh"
21
+
22
+ TMP="$(mktemp -d)"
23
+ errors=0
24
+
25
+ _pass() { echo " ✓ $1"; }
26
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
27
+
28
+ cleanup() { rm -rf "$TMP"; }
29
+ trap cleanup EXIT
30
+
31
+ WRITER="workflow-sidecar"
32
+
33
+ # ─── TEST 1: FlowDefinition-driven session round-trip (no history loss) ────────
34
+ echo ""
35
+ echo "=== 1. FlowDefinition-driven session: record-critique/record-learning preserve declared claims ==="
36
+
37
+ FLOW_AROOT="$TMP/flow-aroot"
38
+ SLUG="history-flow-test"
39
+ SESSION_DIR="$FLOW_AROOT/$SLUG"
40
+ mkdir -p "$FLOW_AROOT"
41
+
42
+ # Create a FlowDefinition-driven session at the verify step (builder.verify.tests is declared)
43
+ flow_agents_node "$WRITER" ensure-session \
44
+ --artifact-root "$FLOW_AROOT" \
45
+ --task-slug "$SLUG" \
46
+ --title "History preservation test" \
47
+ --summary "Test that declared builder.* claims survive round-trips." \
48
+ --flow-id builder.build \
49
+ --step-id verify \
50
+ --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
51
+
52
+ flow_agents_node "$WRITER" init-plan "$SESSION_DIR/$SLUG--deliver.md" \
53
+ --source-request "Test" --summary "Testing" \
54
+ --timestamp "2026-06-01T00:00:00Z" >/dev/null 2>&1
55
+
56
+ # Record a passing check (produces ONLY builder.verify.tests declared claim — no legacy shadow, P-d)
57
+ flow_agents_node "$WRITER" record-evidence "$SESSION_DIR" \
58
+ --verdict pass \
59
+ --check-json '{"id":"unit-tests","kind":"test","status":"pass","summary":"Unit tests pass"}' \
60
+ --timestamp "2026-06-01T00:01:00Z" >/dev/null 2>&1
61
+
62
+ # Verify declared claim is in bundle before round-trip
63
+ node -e "
64
+ const fs = require('fs');
65
+ const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
66
+ const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
67
+ if (!declared) throw new Error('MISSING builder.verify.tests before round-trip; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
68
+ console.log('before round-trip: builder.verify.tests status=' + declared.status);
69
+ " 2>&1 \
70
+ && _pass "builder.verify.tests declared claim present before round-trip" \
71
+ || _fail "builder.verify.tests declared claim MISSING before round-trip"
72
+
73
+ # Now do record-critique (the round-trip: checksFromBundle + critiquesFromBundle rebuild)
74
+ flow_agents_node "$WRITER" record-critique "$SESSION_DIR" \
75
+ --id "code-review" \
76
+ --verdict pass \
77
+ --summary "Code review passed." \
78
+ --timestamp "2026-06-01T00:02:00Z" >/dev/null 2>&1
79
+
80
+ # Assert builder.verify.tests survived the record-critique round-trip
81
+ node -e "
82
+ const fs = require('fs');
83
+ const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
84
+ const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
85
+ if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-critique; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
86
+ console.log('after record-critique: builder.verify.tests status=' + declared.status);
87
+ " 2>&1 \
88
+ && _pass "builder.verify.tests declared claim preserved after record-critique (no history loss)" \
89
+ || _fail "builder.verify.tests declared claim LOST after record-critique (history loss)"
90
+
91
+ # Also verify the critique claim itself is present.
92
+ # In a flow-driven session (verify step), critique maps to the declared builder.verify.policy-compliance
93
+ # (the critique heuristic matches: subjectType=artifact + claimType contains "compliance").
94
+ # workflow.critique.review is emitted in no-flow sessions only (P-d: shadow retired).
95
+ node -e "
96
+ const fs = require('fs');
97
+ const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
98
+ const claims = bundle.claims || [];
99
+ // Declared critique claim for verify-step: builder.verify.policy-compliance
100
+ const crit = claims.find(c => c.claimType === 'builder.verify.policy-compliance');
101
+ if (!crit) throw new Error('MISSING builder.verify.policy-compliance critique claim after record-critique; claims: ' + claims.map(c=>c.claimType).join(', '));
102
+ // Must NOT have workflow.critique.review in a flow-driven session (no shadow, P-d)
103
+ const legacy = claims.find(c => c.claimType === 'workflow.critique.review');
104
+ if (legacy) throw new Error('UNEXPECTED workflow.critique.review in flow-driven session (P-d retired shadow); id=' + legacy.id);
105
+ console.log('declared critique claim: claimType=' + crit.claimType + ' value=' + crit.value);
106
+ " 2>&1 \
107
+ && _pass "builder.verify.policy-compliance declared critique claim present (no workflow.critique.review shadow, P-d)" \
108
+ || _fail "declared critique claim MISSING or unexpected workflow.critique.review found after record-critique"
109
+
110
+ # Now do record-learning (second round-trip)
111
+ flow_agents_node "$WRITER" record-learning "$SESSION_DIR" \
112
+ --status learned \
113
+ --record-json '{
114
+ "outcome": "success",
115
+ "source_refs": [],
116
+ "facts": ["Tests passed clean."],
117
+ "routing": [{"target":"none","status":"completed","summary":"No routing needed."}],
118
+ "correction": {"needed": false, "evidence": "All checks passed cleanly."}
119
+ }' \
120
+ --summary "Learning recorded." \
121
+ --timestamp "2026-06-01T00:03:00Z" >/dev/null 2>&1
122
+
123
+ # Assert builder.verify.tests survived the record-learning round-trip
124
+ node -e "
125
+ const fs = require('fs');
126
+ const bundle = JSON.parse(fs.readFileSync('$SESSION_DIR/trust.bundle', 'utf8'));
127
+ const declared = (bundle.claims || []).find(c => c.claimType === 'builder.verify.tests');
128
+ if (!declared) throw new Error('HISTORY LOSS: builder.verify.tests MISSING after record-learning; claims: ' + (bundle.claims||[]).map(c=>c.claimType).join(', '));
129
+ console.log('after record-learning: builder.verify.tests status=' + declared.status);
130
+ " 2>&1 \
131
+ && _pass "builder.verify.tests declared claim preserved after record-learning (no history loss)" \
132
+ || _fail "builder.verify.tests declared claim LOST after record-learning (history loss)"
133
+
134
+ # ─── TEST 2: workflow.* session round-trip is UNCHANGED ────────────────────────
135
+ echo ""
136
+ echo "=== 2. workflow.* session (no --flow-id): round-trip unchanged ==="
137
+
138
+ NOFLOW_AROOT="$TMP/noflow-aroot"
139
+ NOFLOW_SLUG="history-noflow-test"
140
+ NOFLOW_DIR="$NOFLOW_AROOT/$NOFLOW_SLUG"
141
+ mkdir -p "$NOFLOW_AROOT"
142
+
143
+ flow_agents_node "$WRITER" ensure-session \
144
+ --artifact-root "$NOFLOW_AROOT" \
145
+ --task-slug "$NOFLOW_SLUG" \
146
+ --title "No-flow session history test" \
147
+ --summary "Baseline: no FlowDefinition. Round-trip must be unchanged." \
148
+ --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
149
+
150
+ flow_agents_node "$WRITER" init-plan "$NOFLOW_DIR/$NOFLOW_SLUG--deliver.md" \
151
+ --source-request "Test" --summary "Testing" \
152
+ --timestamp "2026-06-01T10:00:00Z" >/dev/null 2>&1
153
+
154
+ # Record a check (produces only workflow.check.test — no declared claims)
155
+ flow_agents_node "$WRITER" record-evidence "$NOFLOW_DIR" \
156
+ --verdict pass \
157
+ --check-json '{"id":"noflow-unit-tests","kind":"test","status":"pass","summary":"No-flow tests pass"}' \
158
+ --timestamp "2026-06-01T10:01:00Z" >/dev/null 2>&1
159
+
160
+ # record-critique round-trip
161
+ flow_agents_node "$WRITER" record-critique "$NOFLOW_DIR" \
162
+ --id "noflow-review" \
163
+ --verdict pass \
164
+ --summary "Review passed." \
165
+ --timestamp "2026-06-01T10:02:00Z" >/dev/null 2>&1
166
+
167
+ # Assert only workflow.* claims survived (no builder.* contamination)
168
+ node -e "
169
+ const fs = require('fs');
170
+ const bundle = JSON.parse(fs.readFileSync('$NOFLOW_DIR/trust.bundle', 'utf8'));
171
+ const claims = bundle.claims || [];
172
+ const wfCheck = claims.find(c => c.claimType === 'workflow.check.test');
173
+ const wfCritique = claims.find(c => c.claimType === 'workflow.critique.review');
174
+ const builderClaims = claims.filter(c => c.claimType.startsWith('builder.'));
175
+ if (!wfCheck) throw new Error('MISSING workflow.check.test after record-critique');
176
+ if (!wfCritique) throw new Error('MISSING workflow.critique.review after record-critique');
177
+ if (builderClaims.length > 0) throw new Error('UNEXPECTED builder.* claims in no-flow session after round-trip: ' + builderClaims.map(c=>c.claimType).join(', '));
178
+ console.log('after record-critique: workflow.check.test + workflow.critique.review, no builder.*');
179
+ " 2>&1 \
180
+ && _pass "no-flow session: workflow.* only after record-critique round-trip (unchanged)" \
181
+ || _fail "no-flow session: unexpected claims after record-critique round-trip"
182
+
183
+ # ─── TEST 3: evidenceClean/critiqueClean correct for builder.* bundle ──────────
184
+ echo ""
185
+ echo "=== 3. evidenceClean/critiqueClean correct for builder.* bundle ==="
186
+
187
+ # Create a fresh builder.build session at verify step for dogfood-pass test
188
+ DOGFOOD_AROOT="$TMP/dogfood-aroot"
189
+ DOGFOOD_SLUG="dogfood-clean-test"
190
+ DOGFOOD_DIR="$DOGFOOD_AROOT/$DOGFOOD_SLUG"
191
+ mkdir -p "$DOGFOOD_AROOT"
192
+
193
+ flow_agents_node "$WRITER" ensure-session \
194
+ --artifact-root "$DOGFOOD_AROOT" \
195
+ --task-slug "$DOGFOOD_SLUG" \
196
+ --title "Dogfood clean test" \
197
+ --summary "Test evidenceClean/critiqueClean on builder.build session." \
198
+ --flow-id builder.build \
199
+ --step-id verify \
200
+ --timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
201
+
202
+ flow_agents_node "$WRITER" init-plan "$DOGFOOD_DIR/$DOGFOOD_SLUG--deliver.md" \
203
+ --source-request "Test" --summary "Testing" \
204
+ --timestamp "2026-06-01T20:00:00Z" >/dev/null 2>&1
205
+
206
+ # Record pass evidence (produces builder.verify.tests declared claim, status=verified)
207
+ flow_agents_node "$WRITER" record-evidence "$DOGFOOD_DIR" \
208
+ --verdict pass \
209
+ --check-json '{"id":"ev-check","kind":"test","status":"pass","summary":"Evidence check passes"}' \
210
+ --timestamp "2026-06-01T20:01:00Z" >/dev/null 2>&1
211
+
212
+ # Record pass critique
213
+ flow_agents_node "$WRITER" record-critique "$DOGFOOD_DIR" \
214
+ --id "ev-critique" \
215
+ --verdict pass \
216
+ --summary "Critique passed." \
217
+ --timestamp "2026-06-01T20:02:00Z" >/dev/null 2>&1
218
+
219
+ # dogfood-pass --verdict pass should succeed: evidenceClean=true (builder.verify.tests passes)
220
+ # and critiqueClean=true (builder.verify.policy-compliance passes — declared critique for verify step).
221
+ flow_agents_node "$WRITER" dogfood-pass \
222
+ --artifact-root "$DOGFOOD_AROOT" \
223
+ --artifact-dir "$DOGFOOD_DIR" \
224
+ --verdict pass \
225
+ --check-json '{"id":"dogfood-ev-check","kind":"test","status":"pass","summary":"Dogfood evidence check"}' \
226
+ --summary "Dogfood pass for builder.build session." \
227
+ --timestamp "2026-06-01T20:03:00Z" >/dev/null 2>&1 \
228
+ && _pass "dogfood-pass succeeds: evidenceClean returns true for builder.verify.tests declared claim" \
229
+ || _fail "dogfood-pass FAILED: evidenceClean did not recognize builder.verify.tests as passing evidence"
230
+
231
+ # Verify directly that the bundle has builder.verify.tests as the evidence claim
232
+ node -e "
233
+ const fs = require('fs');
234
+ const bundle = JSON.parse(fs.readFileSync('$DOGFOOD_DIR/trust.bundle', 'utf8'));
235
+ const claims = bundle.claims || [];
236
+ const builderCheck = claims.find(c => c.claimType === 'builder.verify.tests' && c.value === 'pass');
237
+ if (!builderCheck) throw new Error('MISSING builder.verify.tests (pass) in bundle; claims: ' + claims.map(c=>c.claimType+'='+c.value).join(', '));
238
+ console.log('builder.verify.tests evidence claim present with value=pass, status=' + builderCheck.status);
239
+ " 2>&1 \
240
+ && _pass "bundle contains builder.verify.tests with value=pass (declared claim recognized by evidenceClean)" \
241
+ || _fail "bundle missing builder.verify.tests with value=pass"
242
+
243
+ # ─── Summary ──────────────────────────────────────────────────────────────────
244
+ echo ""
245
+ if [ "$errors" -eq 0 ]; then
246
+ echo "test_flowdef_session_history_preservation: all checks passed."
247
+ exit 0
248
+ fi
249
+ echo "test_flowdef_session_history_preservation: $errors check(s) FAILED."
250
+ exit 1