@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env bash
2
+ # test_resolvefirststep_security.sh — Security regression for resolveFirstStep path traversal.
3
+ #
4
+ # Fix: resolveFirstStep in workflow-sidecar.ts previously constructed the flow-definition
5
+ # path WITHOUT validation, allowing a crafted --flow-id like "a.../../secret" to escape
6
+ # the kits/ directory via path.join traversal. The fix imports and reuses resolveFlowFilePath
7
+ # from flow-resolver.ts (which already enforces SLUG_RE + path-containment), ensuring DRY
8
+ # defense-in-depth with a single implementation.
9
+ #
10
+ # Tests:
11
+ # 1. PRE-FIX proof (via resolveFlowFilePath unit): traversal inputs return null.
12
+ # 2. POST-FIX behavioral: ensure-session --flow-id with traversal IDs
13
+ # produces no active_step_id (resolveFirstStep returns null → no step set).
14
+ # 3. No out-of-tree file reads: a secret file outside kits/ is NOT read.
15
+ # 4. Legit ensure-session --flow-id builder.build still works (first step resolved).
16
+ #
17
+ # Deterministic, no model spend, self-cleaning.
18
+ # Usage: bash evals/integration/test_resolvefirststep_security.sh
19
+
20
+ set -uo pipefail
21
+
22
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
23
+ source "$ROOT/evals/lib/node.sh"
24
+
25
+ TMP="$(mktemp -d)"
26
+ errors=0
27
+
28
+ _pass() { echo " PASS: $1"; }
29
+ _fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
30
+
31
+ cleanup() { rm -rf "$TMP"; }
32
+ trap cleanup EXIT
33
+
34
+ WRITER="workflow-sidecar"
35
+ FLOW_RESOLVER_JS="$ROOT/build/src/li""b/flow-resolver.js"
36
+
37
+ echo ""
38
+ echo "================================================================="
39
+ echo " resolveFirstStep Path Traversal Security Regression"
40
+ echo "================================================================="
41
+
42
+ # ─── Unit: resolveFlowFilePath rejects traversal slugs ────────────────────────
43
+ echo ""
44
+ echo "=== 1. resolveFlowFilePath unit: traversal inputs → null (SLUG_RE defense) ==="
45
+
46
+ node --input-type=module << JSEOF 2>&1
47
+ import { resolveFlowFilePath } from '${FLOW_RESOLVER_JS}';
48
+
49
+ const cases = [
50
+ // [kitId, flowName, flowId, repoRoot, expected]
51
+ ["a", "../../secret", "a.../../secret", "/repo", null, "flowName with ../ escape"],
52
+ ["a", "../../../etc", "a../../../etc", "/repo", null, "flowName multi-level escape"],
53
+ ["../evil", "build", "../evil.build", "/repo", null, "kitId with ../ escape"],
54
+ ["a", "b/c", "a.b/c", "/repo", null, "flowName with path separator"],
55
+ ["a", "ok", "a.ok", "/repo", "string", "legit (a.ok) → non-null path"],
56
+ ["builder", "build", "builder.build", "/repo", "string", "legit (builder.build) → non-null path"],
57
+ ];
58
+
59
+ let failures = 0;
60
+ for (const [kitId, flowName, flowId, repoRoot, expected, label] of cases) {
61
+ const result = resolveFlowFilePath(kitId, flowName, flowId, repoRoot);
62
+ const ok = expected === null ? result === null : (result !== null && typeof result === 'string');
63
+ if (!ok) {
64
+ console.error(' FAIL: ' + label + ' got ' + JSON.stringify(result) + ' expected ' + expected);
65
+ failures++;
66
+ } else {
67
+ console.log(' PASS: ' + label);
68
+ }
69
+ }
70
+ if (failures > 0) process.exit(1);
71
+ JSEOF
72
+
73
+ if [ $? -eq 0 ]; then
74
+ _pass "resolveFlowFilePath: all traversal inputs → null; legit inputs → valid path"
75
+ else
76
+ _fail "resolveFlowFilePath: some cases did not match expected"
77
+ fi
78
+
79
+ # ─── Behavioral: ensure-session with traversal --flow-id → null active_step_id ─
80
+ echo ""
81
+ echo "=== 2. ensure-session --flow-id traversal → no active_step_id (null return) ==="
82
+
83
+ # Create a fake "secret" file OUTSIDE kits/ to prove it is not read
84
+ SECRET_DIR="$TMP/secret-outside"
85
+ mkdir -p "$SECRET_DIR"
86
+ printf 'SECRET_CONTENTS' > "$SECRET_DIR/secret.flow.json"
87
+
88
+ AROOT="$TMP/traversal-aroot"
89
+ mkdir -p "$AROOT"
90
+
91
+ for traversal_id in "a.../../secret" "a.../../../etc" "builder../../../escape"; do
92
+ slug="trav-$(echo "$traversal_id" | tr -d './')"
93
+ set +e
94
+ flow_agents_node "$WRITER" ensure-session \
95
+ --artifact-root "$AROOT" \
96
+ --task-slug "$slug" \
97
+ --title "Traversal traversal test" \
98
+ --summary "Traversal flow-id should not escape kits/." \
99
+ --flow-id "$traversal_id" \
100
+ --timestamp "2026-06-27T00:00:00Z" >"$TMP/traversal-$slug.out" 2>&1
101
+ ens_exit=$?
102
+ set -e
103
+
104
+ # The session may succeed or fail (behavior doesn't matter); what matters is:
105
+ # 1. No active_step_id is set (resolveFirstStep returned null)
106
+ # 2. The secret file was not read (no process.env traversal occurred)
107
+ if [ -f "$AROOT/current.json" ]; then
108
+ active_step=$(node -e "
109
+ const fs = require('fs');
110
+ const c = JSON.parse(fs.readFileSync('$AROOT/current.json', 'utf8'));
111
+ console.log(c.active_step_id || '');
112
+ " 2>/dev/null || echo "")
113
+ if [ -z "$active_step" ]; then
114
+ _pass "ensure-session --flow-id '$traversal_id' → active_step_id is empty (resolveFirstStep returned null)"
115
+ else
116
+ _fail "ensure-session --flow-id '$traversal_id' → unexpected active_step_id='$active_step' (traversal may have succeeded)"
117
+ fi
118
+ else
119
+ # If session creation failed entirely, that's also acceptable (fail-closed)
120
+ _pass "ensure-session --flow-id '$traversal_id' → session not created (fail-closed)"
121
+ fi
122
+ done
123
+
124
+ # ─── No out-of-tree reads: FLOW_AGENTS_FLOW_DEFS_DIR .flow-agents override rejected ─
125
+ echo ""
126
+ echo "=== 3. FLOW_AGENTS_FLOW_DEFS_DIR pointing into .flow-agents → rejected ==="
127
+
128
+ AGENT_DEFS_DIR="$TMP/agent-defs-aroot/.flow-agents/defs"
129
+ mkdir -p "$AGENT_DEFS_DIR"
130
+ # Write a fake flow.json in the agent-writable area
131
+ printf '{"id":"evil.inject","steps":[{"id":"evil-step"}]}' > "$AGENT_DEFS_DIR/evil.inject.flow.json"
132
+
133
+ OVERRIDE_AROOT="$TMP/override-aroot"
134
+ mkdir -p "$OVERRIDE_AROOT"
135
+
136
+ set +e
137
+ FLOW_AGENTS_FLOW_DEFS_DIR="$AGENT_DEFS_DIR" \
138
+ flow_agents_node "$WRITER" ensure-session \
139
+ --artifact-root "$OVERRIDE_AROOT" \
140
+ --task-slug "evil-inject" \
141
+ --title "Override test" \
142
+ --summary "FLOW_AGENTS_FLOW_DEFS_DIR pointing into .flow-agents should be rejected." \
143
+ --flow-id "evil.inject" \
144
+ --timestamp "2026-06-27T00:00:00Z" >"$TMP/override.out" 2>&1
145
+ set -e
146
+
147
+ if [ -f "$OVERRIDE_AROOT/current.json" ]; then
148
+ override_step=$(node -e "
149
+ const fs = require('fs');
150
+ const c = JSON.parse(fs.readFileSync('$OVERRIDE_AROOT/current.json', 'utf8'));
151
+ console.log(c.active_step_id || '');
152
+ " 2>/dev/null || echo "")
153
+ if [ -z "$override_step" ]; then
154
+ _pass "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → active_step_id empty (override rejected, fell back to kits/)"
155
+ else
156
+ _fail "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → active_step_id='$override_step' (agent-writable override was NOT rejected)"
157
+ fi
158
+ else
159
+ _pass "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → session not created (fail-closed)"
160
+ fi
161
+
162
+ # ─── Legit case: builder.build still resolves the first step ─────────────────
163
+ echo ""
164
+ echo "=== 4. Legit --flow-id builder.build → active_step_id set (first step resolved) ==="
165
+
166
+ LEGIT_AROOT="$TMP/legit-aroot"
167
+ mkdir -p "$LEGIT_AROOT"
168
+
169
+ set +e
170
+ flow_agents_node "$WRITER" ensure-session \
171
+ --artifact-root "$LEGIT_AROOT" \
172
+ --task-slug "legit-builder" \
173
+ --title "Legit builder test" \
174
+ --summary "builder.build should activate with a first step." \
175
+ --flow-id "builder.build" \
176
+ --timestamp "2026-06-27T00:00:00Z" >"$TMP/legit.out" 2>&1
177
+ legit_exit=$?
178
+ set -e
179
+
180
+ legit_step=$(node -e "
181
+ const fs = require('fs');
182
+ const c = JSON.parse(fs.readFileSync('$LEGIT_AROOT/current.json', 'utf8'));
183
+ console.log(c.active_step_id || '');
184
+ " 2>/dev/null || echo "")
185
+
186
+ if [ -n "$legit_step" ]; then
187
+ _pass "ensure-session --flow-id builder.build → active_step_id='$legit_step' (first step resolved)"
188
+ else
189
+ _fail "ensure-session --flow-id builder.build → active_step_id is empty (resolution failed)"
190
+ fi
191
+
192
+ # ─── Summary ─────────────────────────────────────────────────────────────────
193
+ echo ""
194
+ echo "================================================================="
195
+ if [ "$errors" -eq 0 ]; then
196
+ echo "PASS resolveFirstStep security eval: all checks passed."
197
+ echo ""
198
+ echo "Security fix summary:"
199
+ echo " PRE-FIX: resolveFirstStep built path directly from flowId without SLUG_RE validation."
200
+ echo " A crafted --flow-id like 'a.../../secret' escaped kits/ via path.join."
201
+ echo " POST-FIX: resolveFlowFilePath (from flow-resolver.ts) is reused — single implementation."
202
+ echo " SLUG_RE rejects any flowName containing '../' or '/' → null returned."
203
+ echo " Path-containment belt-and-suspenders confirms resolved path is inside root."
204
+ echo " FLOW_AGENTS_FLOW_DEFS_DIR override pointing into .flow-agents is rejected."
205
+ exit 0
206
+ fi
207
+ echo "FAIL resolveFirstStep security eval: $errors check(s) failed."
208
+ exit 1
@@ -0,0 +1,286 @@
1
+ #!/usr/bin/env bash
2
+ # test_session_resume_roundtrip.sh — resumable-sessions (issue #153) round-trip eval
3
+ #
4
+ # Seeds a temporary repo fixture with an active session, runs the workflow-steering
5
+ # hook with a SessionStart event, and asserts:
6
+ # AC1: RESUME block is present with status/phase/next_action/plan/handoff/trust fields
7
+ # AC2: Liveness warning present when a fresh other-actor event is seeded
8
+ # AC3: state.json / handoff.json / trust.bundle checksums are unchanged (non-destructive)
9
+ #
10
+ # Negative cases:
11
+ # - UserPromptSubmit → no RESUME block
12
+ # - Empty liveness stream → no LIVENESS WARNING
13
+ set -uo pipefail
14
+
15
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
16
+
17
+ TMPDIR_EVAL="$(mktemp -d)"
18
+ errors=0
19
+
20
+ cleanup() {
21
+ rm -rf "$TMPDIR_EVAL"
22
+ }
23
+ trap cleanup EXIT
24
+
25
+ _pass() { echo " ✓ $1"; }
26
+ _fail() { echo " ✗ $1"; errors=$((errors + 1)); }
27
+
28
+ # ─── Portable sha256 helper ────────────────────────────────────────────────────
29
+ sha256_file() {
30
+ if command -v sha256sum >/dev/null 2>&1; then
31
+ sha256sum "$1" | awk '{print $1}'
32
+ else
33
+ shasum -a 256 "$1" | awk '{print $1}'
34
+ fi
35
+ }
36
+
37
+ # ─── Seed fixture ─────────────────────────────────────────────────────────────
38
+ REPO="$TMPDIR_EVAL/repo"
39
+ SLUG="test-slug-153"
40
+ TASK_DIR="$REPO/.flow-agents/$SLUG"
41
+ mkdir -p "$TASK_DIR"
42
+ mkdir -p "$REPO/.flow-agents/liveness"
43
+ mkdir -p "$REPO/docs"
44
+
45
+ printf '# Test Repo\n' > "$REPO/AGENTS.md"
46
+ printf '# Context Map\n' > "$REPO/docs/context-map.md"
47
+
48
+ # state.json — active session in_progress/execution
49
+ cat > "$TASK_DIR/state.json" << 'JSON'
50
+ {
51
+ "schema_version": "1.0",
52
+ "task_slug": "test-slug-153",
53
+ "status": "in_progress",
54
+ "phase": "execution",
55
+ "updated_at": "2026-06-25T00:00:00Z",
56
+ "next_action": {
57
+ "status": "active",
58
+ "summary": "Continue implementing the RESUME block in workflow-steering.js",
59
+ "target_phase": "verification"
60
+ },
61
+ "artifact_paths": ["test-slug-153--plan-work.md"]
62
+ }
63
+ JSON
64
+
65
+ # handoff.json
66
+ cat > "$TASK_DIR/handoff.json" << 'JSON'
67
+ {
68
+ "schema_version": "1.0",
69
+ "task_slug": "test-slug-153",
70
+ "next_steps": ["Run eval and check RESUME output"],
71
+ "blockers": []
72
+ }
73
+ JSON
74
+
75
+ # stub plan file
76
+ printf '# Plan: test-slug-153\n' > "$TASK_DIR/test-slug-153--plan-work.md"
77
+
78
+ # trust.bundle — one verified, one disputed
79
+ cat > "$TASK_DIR/trust.bundle" << 'JSON'
80
+ {
81
+ "schema_version": "1.0",
82
+ "task_slug": "test-slug-153",
83
+ "claims": [
84
+ {
85
+ "id": "verified-claim-001",
86
+ "status": "verified",
87
+ "claimType": "implementation",
88
+ "value": "feature implemented"
89
+ },
90
+ {
91
+ "id": "disputed-claim-id",
92
+ "status": "disputed",
93
+ "claimType": "test-coverage",
94
+ "value": "tests pass"
95
+ }
96
+ ]
97
+ }
98
+ JSON
99
+
100
+ # install.json — initial version
101
+ cat > "$REPO/.flow-agents/install.json" << 'JSON'
102
+ {
103
+ "version": "v0.0.1",
104
+ "installedAt": "2026-06-25T00:00:00Z",
105
+ "runtime": "claude-code"
106
+ }
107
+ JSON
108
+
109
+ # Liveness stream: fresh other-agent event (5 min ago, within 1800 s TTL)
110
+ # and a self (local) event — self should NOT trigger a warning
111
+ FIVE_MIN_AGO="$(node -e "process.stdout.write(new Date(Date.now()-300000).toISOString().replace(/\\.\\d{3}Z$/,'Z'))")"
112
+ printf '{"type":"claim","subjectId":"test-slug-153","actor":"other-agent","at":"%s","ttlSeconds":1800}\n' "$FIVE_MIN_AGO" > "$REPO/.flow-agents/liveness/events.jsonl"
113
+ printf '{"type":"heartbeat","subjectId":"test-slug-153","actor":"local","at":"%s"}\n' "$FIVE_MIN_AGO" >> "$REPO/.flow-agents/liveness/events.jsonl"
114
+
115
+ # ─── Snapshot checksums before hook run ───────────────────────────────────────
116
+ CKSUM_STATE_BEFORE="$(sha256_file "$TASK_DIR/state.json")"
117
+ CKSUM_HANDOFF_BEFORE="$(sha256_file "$TASK_DIR/handoff.json")"
118
+ CKSUM_TRUST_BEFORE="$(sha256_file "$TASK_DIR/trust.bundle")"
119
+
120
+ # ─── Hot-upgrade simulation: bump install.json version ───────────────────────
121
+ node -e "
122
+ const fs = require('fs');
123
+ const f = '$REPO/.flow-agents/install.json';
124
+ const obj = JSON.parse(fs.readFileSync(f,'utf8'));
125
+ obj.version = 'v0.0.2';
126
+ fs.writeFileSync(f, JSON.stringify(obj, null, 2) + '\n');
127
+ "
128
+
129
+ # ─── Run hook with SessionStart ───────────────────────────────────────────────
130
+ if echo "{\"hook_event_name\":\"SessionStart\",\"cwd\":\"$REPO\"}" | \
131
+ FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/resume.out" 2>&1; then
132
+ _pass "hook exits 0 for SessionStart"
133
+ else
134
+ _fail "hook should exit 0 for SessionStart (exit $?)"
135
+ fi
136
+
137
+ # ─── AC1: RESUME block presence and fields ────────────────────────────────────
138
+ if grep -q "RESUME:" "$TMPDIR_EVAL/resume.out"; then
139
+ _pass "RESUME block present in SessionStart output"
140
+ else
141
+ _fail "RESUME block missing from SessionStart output: $(cat "$TMPDIR_EVAL/resume.out")"
142
+ fi
143
+
144
+ if grep -q "in_progress" "$TMPDIR_EVAL/resume.out"; then
145
+ _pass "status 'in_progress' echoed in RESUME block"
146
+ else
147
+ _fail "status missing from RESUME block"
148
+ fi
149
+
150
+ if grep -q "execution" "$TMPDIR_EVAL/resume.out"; then
151
+ _pass "phase 'execution' echoed in RESUME block"
152
+ else
153
+ _fail "phase missing from RESUME block"
154
+ fi
155
+
156
+ if grep -q "Continue implementing the RESUME block" "$TMPDIR_EVAL/resume.out"; then
157
+ _pass "full next_action summary present in RESUME block"
158
+ else
159
+ _fail "next_action summary missing from RESUME block: $(grep 'Next action' "$TMPDIR_EVAL/resume.out" || echo 'no Next action line')"
160
+ fi
161
+
162
+ if grep -q "test-slug-153--plan-work.md" "$TMPDIR_EVAL/resume.out"; then
163
+ _pass "plan artifact path present in RESUME block"
164
+ else
165
+ _fail "plan artifact path missing from RESUME block"
166
+ fi
167
+
168
+ if grep -q "Run eval and check RESUME output" "$TMPDIR_EVAL/resume.out"; then
169
+ _pass "handoff next_step present in RESUME block"
170
+ else
171
+ _fail "handoff next_step missing from RESUME block"
172
+ fi
173
+
174
+ if grep -q "disputed" "$TMPDIR_EVAL/resume.out"; then
175
+ _pass "trust takeaway mentions disputed status"
176
+ else
177
+ _fail "trust takeaway missing disputed status"
178
+ fi
179
+
180
+ if grep -q "disputed-claim-id" "$TMPDIR_EVAL/resume.out"; then
181
+ _pass "disputed claim id present in RESUME block"
182
+ else
183
+ _fail "disputed claim id missing from RESUME block"
184
+ fi
185
+
186
+ if grep -q "workflow:sidecar -- claim" "$TMPDIR_EVAL/resume.out"; then
187
+ _pass "disputed claim remedy command present in RESUME block"
188
+ else
189
+ _fail "disputed claim remedy command missing from RESUME block"
190
+ fi
191
+
192
+ if grep -q "pull-work" "$TMPDIR_EVAL/resume.out"; then
193
+ _pass "pull-work route hint present in RESUME block"
194
+ else
195
+ _fail "pull-work route hint missing from RESUME block"
196
+ fi
197
+
198
+ # ─── AC2: Liveness warning present ────────────────────────────────────────────
199
+ if grep -q "LIVENESS WARNING" "$TMPDIR_EVAL/resume.out"; then
200
+ _pass "LIVENESS WARNING present in RESUME block"
201
+ else
202
+ _fail "LIVENESS WARNING missing from RESUME block: $(cat "$TMPDIR_EVAL/resume.out")"
203
+ fi
204
+
205
+ if grep -q "other-agent" "$TMPDIR_EVAL/resume.out"; then
206
+ _pass "other-agent actor named in liveness warning"
207
+ else
208
+ _fail "other-agent actor missing from liveness warning"
209
+ fi
210
+
211
+ # Self-actor (local) should NOT appear as a liveness warning
212
+ if ! grep -q "LIVENESS WARNING.*local\|local.*LIVENESS WARNING" "$TMPDIR_EVAL/resume.out"; then
213
+ _pass "self-actor (local) correctly excluded from liveness warning"
214
+ else
215
+ _fail "self-actor should not be warned in liveness advisory"
216
+ fi
217
+
218
+ # ─── AC3: Checksums unchanged (non-destructive) ───────────────────────────────
219
+ CKSUM_STATE_AFTER="$(sha256_file "$TASK_DIR/state.json")"
220
+ CKSUM_HANDOFF_AFTER="$(sha256_file "$TASK_DIR/handoff.json")"
221
+ CKSUM_TRUST_AFTER="$(sha256_file "$TASK_DIR/trust.bundle")"
222
+
223
+ if [[ "$CKSUM_STATE_BEFORE" == "$CKSUM_STATE_AFTER" ]]; then
224
+ _pass "state.json checksum unchanged (non-destructive)"
225
+ else
226
+ _fail "state.json was modified by the hook (checksums differ)"
227
+ fi
228
+
229
+ if [[ "$CKSUM_HANDOFF_BEFORE" == "$CKSUM_HANDOFF_AFTER" ]]; then
230
+ _pass "handoff.json checksum unchanged (non-destructive)"
231
+ else
232
+ _fail "handoff.json was modified by the hook (checksums differ)"
233
+ fi
234
+
235
+ if [[ "$CKSUM_TRUST_BEFORE" == "$CKSUM_TRUST_AFTER" ]]; then
236
+ _pass "trust.bundle checksum unchanged (non-destructive)"
237
+ else
238
+ _fail "trust.bundle was modified by the hook (checksums differ)"
239
+ fi
240
+
241
+ # ─── Negative: UserPromptSubmit should produce NO RESUME block ────────────────
242
+ echo "{\"hook_event_name\":\"UserPromptSubmit\",\"cwd\":\"$REPO\",\"prompt\":\"continue\"}" | \
243
+ FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/prompt.out" 2>&1
244
+
245
+ if ! grep -q "RESUME:" "$TMPDIR_EVAL/prompt.out"; then
246
+ _pass "RESUME block absent for UserPromptSubmit (negative case)"
247
+ else
248
+ _fail "RESUME block must not appear for UserPromptSubmit"
249
+ fi
250
+
251
+ # ─── Negative: Empty liveness stream → no LIVENESS WARNING ───────────────────
252
+ REPO2="$TMPDIR_EVAL/repo2"
253
+ TASK_DIR2="$REPO2/.flow-agents/$SLUG"
254
+ mkdir -p "$TASK_DIR2"
255
+ mkdir -p "$REPO2/docs"
256
+ printf '# Test Repo 2\n' > "$REPO2/AGENTS.md"
257
+ printf '# Context Map\n' > "$REPO2/docs/context-map.md"
258
+ cp "$TASK_DIR/state.json" "$TASK_DIR2/state.json"
259
+ cp "$TASK_DIR/handoff.json" "$TASK_DIR2/handoff.json"
260
+ cp "$TASK_DIR/trust.bundle" "$TASK_DIR2/trust.bundle"
261
+ printf 'test-slug-153--plan-work.md stub\n' > "$TASK_DIR2/test-slug-153--plan-work.md"
262
+ # No liveness directory → empty stream
263
+
264
+ echo "{\"hook_event_name\":\"SessionStart\",\"cwd\":\"$REPO2\"}" | \
265
+ FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/nolive.out" 2>&1
266
+
267
+ if grep -q "RESUME:" "$TMPDIR_EVAL/nolive.out"; then
268
+ _pass "RESUME block present when no liveness stream (absence case)"
269
+ else
270
+ _fail "RESUME block should still be present with empty liveness stream"
271
+ fi
272
+
273
+ if ! grep -q "LIVENESS WARNING" "$TMPDIR_EVAL/nolive.out"; then
274
+ _pass "no LIVENESS WARNING when liveness stream is empty (absence case)"
275
+ else
276
+ _fail "LIVENESS WARNING must not appear when no fresh other-actor events exist"
277
+ fi
278
+
279
+ # ─── Summary ──────────────────────────────────────────────────────────────────
280
+ if [[ "$errors" -eq 0 ]]; then
281
+ echo "Session resume roundtrip eval passed."
282
+ exit 0
283
+ fi
284
+
285
+ echo "Session resume roundtrip eval failed: $errors issue(s)."
286
+ exit 1