@kontourai/flow-agents 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.github/dependabot.yml +23 -0
  2. package/.github/workflows/release-please.yml +31 -0
  3. package/.github/workflows/runtime-compat.yml +118 -0
  4. package/CHANGELOG.md +23 -0
  5. package/CONTRIBUTING.md +4 -0
  6. package/README.md +53 -10
  7. package/build/src/cli/init.js +215 -5
  8. package/build/src/cli/utterance-check.js +65 -1
  9. package/build/src/tools/build-universal-bundles.js +268 -0
  10. package/build/src/tools/filter-installed-packs.js +3 -0
  11. package/build/src/tools/validate-source-tree.js +5 -1
  12. package/context/scripts/telemetry/lib/config.sh +5 -1
  13. package/context/settings/flow-agents-settings.json +7 -0
  14. package/docs/context-map.md +1 -0
  15. package/docs/index.md +45 -4
  16. package/docs/integrations/conformance.md +246 -0
  17. package/docs/integrations/framework-adapter.md +275 -0
  18. package/docs/integrations/harness-install.md +213 -0
  19. package/docs/integrations/index.md +54 -0
  20. package/docs/north-star.md +2 -2
  21. package/docs/spec/runtime-hook-surface.md +472 -0
  22. package/docs/survey-utterance-check.md +211 -94
  23. package/docs/vision.md +45 -0
  24. package/evals/acceptance/run.sh +4 -2
  25. package/evals/acceptance/test_opencode_harness.sh +121 -0
  26. package/evals/acceptance/test_pi_harness.sh +98 -0
  27. package/evals/integration/test_bundle_install.sh +226 -1
  28. package/evals/integration/test_bundle_lifecycle.sh +641 -0
  29. package/evals/integration/test_utterance_check.sh +291 -44
  30. package/evals/run.sh +2 -0
  31. package/evals/static/test_universal_bundles.sh +137 -2
  32. package/integrations/strands/README.md +256 -0
  33. package/integrations/strands/example.py +74 -0
  34. package/integrations/strands/flow_agents_strands/__init__.py +27 -0
  35. package/integrations/strands/flow_agents_strands/hooks.py +194 -0
  36. package/integrations/strands/flow_agents_strands/policy.py +348 -0
  37. package/integrations/strands/flow_agents_strands/steering.py +172 -0
  38. package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
  39. package/integrations/strands/pyproject.toml +38 -0
  40. package/integrations/strands/tests/__init__.py +0 -0
  41. package/integrations/strands/tests/test_hooks.py +304 -0
  42. package/integrations/strands/tests/test_policy.py +315 -0
  43. package/integrations/strands/tests/test_telemetry.py +184 -0
  44. package/integrations/strands-ts/README.md +224 -0
  45. package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
  46. package/integrations/strands-ts/package.json +53 -0
  47. package/integrations/strands-ts/src/hooks.ts +208 -0
  48. package/integrations/strands-ts/src/index.ts +22 -0
  49. package/integrations/strands-ts/src/policy.ts +345 -0
  50. package/integrations/strands-ts/src/telemetry.ts +251 -0
  51. package/integrations/strands-ts/test/test-policy.ts +322 -0
  52. package/integrations/strands-ts/test/test-telemetry.ts +226 -0
  53. package/integrations/strands-ts/tsconfig.json +20 -0
  54. package/package.json +7 -2
  55. package/packaging/conformance/README.md +142 -0
  56. package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
  57. package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
  58. package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
  59. package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
  60. package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
  61. package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
  62. package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
  63. package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
  64. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
  65. package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
  66. package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
  67. package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
  68. package/packaging/conformance/package.json +4 -0
  69. package/packaging/conformance/run-conformance.js +322 -0
  70. package/packaging/manifest.json +59 -0
  71. package/schemas/flow-agents-settings.schema.json +48 -0
  72. package/scripts/README.md +4 -0
  73. package/scripts/dogfood.js +16 -0
  74. package/scripts/hooks/opencode-hook-adapter.js +123 -0
  75. package/scripts/hooks/opencode-telemetry-hook.js +101 -0
  76. package/scripts/hooks/pi-hook-adapter.js +123 -0
  77. package/scripts/hooks/pi-telemetry-hook.js +105 -0
  78. package/scripts/hooks/run-hook.js +8 -0
  79. package/scripts/hooks/utterance-check.js +124 -22
  80. package/scripts/telemetry/lib/config.sh +5 -1
  81. package/src/cli/init.ts +219 -6
  82. package/src/cli/utterance-check.ts +71 -1
  83. package/src/tools/build-universal-bundles.ts +266 -0
  84. package/src/tools/filter-installed-packs.ts +3 -0
  85. package/src/tools/validate-source-tree.ts +5 -1
@@ -0,0 +1,641 @@
1
+ #!/usr/bin/env bash
2
+ # test_bundle_lifecycle.sh — Bundle install lifecycle tests
3
+ #
4
+ # Covers:
5
+ # 1. Idempotent re-install: install a bundle twice; assert identical layout,
6
+ # no duplicated hook entries in generated config.
7
+ # 2. Upgrade over existing: install from original bundle, then re-install from
8
+ # a modified copy; assert the change propagates.
9
+ # 3. User-file preservation semantics: after install, create user-owned files
10
+ # and modify an installed file; re-install and verify:
11
+ # - user-owned unknown files survive (rsync does not remove them).
12
+ # - modified installed files ARE overwritten (rsync overwrites — this is
13
+ # the expected behavior; assertion pins the semantics).
14
+ # 4. Scope-collision detection: fake a $HOME with colliding user-level
15
+ # .claude/settings.json; run install, assert WARNING appears; assert no
16
+ # warning on a clean $HOME.
17
+ # 5. Dogfood smoke test: run `flow-agents dogfood --runtime claude-code` into
18
+ # a temp dir, assert valid JSON, assert hook commands execute correctly
19
+ # with a realistic payload, assert permission keys are absent.
20
+ #
21
+ # Runtimes tested: claude-code, codex, opencode (the three config-generating runtimes).
22
+ #
23
+ set -euo pipefail
24
+
25
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
26
+ TMPDIR_EVAL="$(mktemp -d /tmp/bundle-lifecycle.XXXXXX)"
27
+ pass=0
28
+ fail=0
29
+
30
+ cleanup() {
31
+ rm -rf "$TMPDIR_EVAL"
32
+ }
33
+ trap cleanup EXIT
34
+
35
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
36
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
37
+
38
+ echo "=== Layer 2B: Bundle Lifecycle Tests ==="
39
+ echo ""
40
+
41
+ echo "--- Build ---"
42
+ # Ensure bundles are built; re-use existing dist if already present.
43
+ if (cd "$ROOT_DIR" && npm run build:bundles >/dev/null 2>&1); then
44
+ _pass "bundle build completed"
45
+ else
46
+ _fail "bundle build failed"
47
+ fi
48
+ echo ""
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # 1. IDEMPOTENT RE-INSTALL
52
+ # ---------------------------------------------------------------------------
53
+ echo "--- Idempotent Re-install ---"
54
+
55
+ CLAUDE_IDEM="$TMPDIR_EVAL/idem-claude"
56
+ CODEX_IDEM="$TMPDIR_EVAL/idem-codex"
57
+ OPENCODE_IDEM="$TMPDIR_EVAL/idem-opencode"
58
+
59
+ # First installs
60
+ (cd "$ROOT_DIR/dist/claude-code" && bash install.sh "$CLAUDE_IDEM" >/dev/null 2>&1) || true
61
+ (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_IDEM" >/dev/null 2>&1) || true
62
+ (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$OPENCODE_IDEM" >/dev/null 2>&1) || true
63
+
64
+ # Capture hook array lengths before second install
65
+ CLAUDE_HOOKS_BEFORE=""
66
+ CODEX_HOOKS_BEFORE=""
67
+ if [[ -f "$CLAUDE_IDEM/.claude/settings.json" ]]; then
68
+ CLAUDE_HOOKS_BEFORE=$(node -e "
69
+ const s = JSON.parse(require('fs').readFileSync('$CLAUDE_IDEM/.claude/settings.json','utf8'));
70
+ const hooks = s.hooks || {};
71
+ let count = 0;
72
+ for (const k of Object.keys(hooks)) count += (hooks[k] || []).length;
73
+ console.log(count);
74
+ " 2>/dev/null || echo "0")
75
+ fi
76
+ if [[ -f "$CODEX_IDEM/.codex/hooks.json" ]]; then
77
+ CODEX_HOOKS_BEFORE=$(node -e "
78
+ const s = JSON.parse(require('fs').readFileSync('$CODEX_IDEM/.codex/hooks.json','utf8'));
79
+ const hooks = s.hooks || {};
80
+ let count = 0;
81
+ for (const k of Object.keys(hooks)) count += (hooks[k] || []).length;
82
+ console.log(count);
83
+ " 2>/dev/null || echo "0")
84
+ fi
85
+
86
+ # Second installs (idempotent)
87
+ if (cd "$ROOT_DIR/dist/claude-code" && bash install.sh "$CLAUDE_IDEM" >/dev/null 2>&1); then
88
+ _pass "claude-code second install succeeded"
89
+ else
90
+ _fail "claude-code second install failed"
91
+ fi
92
+
93
+ if (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_IDEM" >/dev/null 2>&1); then
94
+ _pass "codex second install succeeded"
95
+ else
96
+ _fail "codex second install failed"
97
+ fi
98
+
99
+ if (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$OPENCODE_IDEM" >/dev/null 2>&1); then
100
+ _pass "opencode second install succeeded"
101
+ else
102
+ _fail "opencode second install failed"
103
+ fi
104
+
105
+ # Assert hook arrays did not grow after second install
106
+ if [[ -f "$CLAUDE_IDEM/.claude/settings.json" ]]; then
107
+ CLAUDE_HOOKS_AFTER=$(node -e "
108
+ const s = JSON.parse(require('fs').readFileSync('$CLAUDE_IDEM/.claude/settings.json','utf8'));
109
+ const hooks = s.hooks || {};
110
+ let count = 0;
111
+ for (const k of Object.keys(hooks)) count += (hooks[k] || []).length;
112
+ console.log(count);
113
+ " 2>/dev/null || echo "0")
114
+ if [[ "$CLAUDE_HOOKS_BEFORE" == "$CLAUDE_HOOKS_AFTER" && -n "$CLAUDE_HOOKS_BEFORE" ]]; then
115
+ _pass "claude-code re-install: hooks array did not grow ($CLAUDE_HOOKS_AFTER entries)"
116
+ else
117
+ _fail "claude-code re-install: hooks array changed (before=$CLAUDE_HOOKS_BEFORE after=$CLAUDE_HOOKS_AFTER)"
118
+ fi
119
+ fi
120
+
121
+ if [[ -f "$CODEX_IDEM/.codex/hooks.json" ]]; then
122
+ CODEX_HOOKS_AFTER=$(node -e "
123
+ const s = JSON.parse(require('fs').readFileSync('$CODEX_IDEM/.codex/hooks.json','utf8'));
124
+ const hooks = s.hooks || {};
125
+ let count = 0;
126
+ for (const k of Object.keys(hooks)) count += (hooks[k] || []).length;
127
+ console.log(count);
128
+ " 2>/dev/null || echo "0")
129
+ if [[ "$CODEX_HOOKS_BEFORE" == "$CODEX_HOOKS_AFTER" && -n "$CODEX_HOOKS_BEFORE" ]]; then
130
+ _pass "codex re-install: hooks.json did not change hook count ($CODEX_HOOKS_AFTER entries)"
131
+ else
132
+ _fail "codex re-install: hooks changed (before=$CODEX_HOOKS_BEFORE after=$CODEX_HOOKS_AFTER)"
133
+ fi
134
+ fi
135
+
136
+ # Verify opencode plugin is still correct after re-install
137
+ if [[ -f "$OPENCODE_IDEM/.opencode/plugins/flow-agents.js" ]]; then
138
+ _pass "opencode re-install: plugin file still present"
139
+ else
140
+ _fail "opencode re-install: plugin file missing"
141
+ fi
142
+
143
+ echo ""
144
+ echo "--- Upgrade Over Existing ---"
145
+
146
+ # Create modified bundle copies in temp dirs (never mutate dist/ in place)
147
+ CLAUDE_BUNDLE_COPY="$TMPDIR_EVAL/claude-bundle-copy"
148
+ CODEX_BUNDLE_COPY="$TMPDIR_EVAL/codex-bundle-copy"
149
+ OPENCODE_BUNDLE_COPY="$TMPDIR_EVAL/opencode-bundle-copy"
150
+
151
+ rsync -a "$ROOT_DIR/dist/claude-code/" "$CLAUDE_BUNDLE_COPY/"
152
+ rsync -a "$ROOT_DIR/dist/codex/" "$CODEX_BUNDLE_COPY/"
153
+ rsync -a "$ROOT_DIR/dist/opencode/" "$OPENCODE_BUNDLE_COPY/"
154
+
155
+ CLAUDE_UPGRADE="$TMPDIR_EVAL/upgrade-claude"
156
+ CODEX_UPGRADE="$TMPDIR_EVAL/upgrade-codex"
157
+ OPENCODE_UPGRADE="$TMPDIR_EVAL/upgrade-opencode"
158
+
159
+ # First install from originals
160
+ (cd "$ROOT_DIR/dist/claude-code" && bash install.sh "$CLAUDE_UPGRADE" >/dev/null 2>&1) || true
161
+ (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_UPGRADE" >/dev/null 2>&1) || true
162
+ (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$OPENCODE_UPGRADE" >/dev/null 2>&1) || true
163
+
164
+ # Touch a marker into skill files in the COPIES (not dist/ originals)
165
+ UPGRADE_MARKER="# flow-agents-upgrade-test-marker"
166
+ CLAUDE_SKILL_FILE="$CLAUDE_BUNDLE_COPY/.claude/skills/plan-work/SKILL.md"
167
+ CODEX_SKILL_FILE="$CODEX_BUNDLE_COPY/.codex/skills/plan-work/SKILL.md"
168
+ OPENCODE_SKILL_FILE="$OPENCODE_BUNDLE_COPY/.opencode/skills/plan-work/SKILL.md"
169
+
170
+ if [[ -f "$CLAUDE_SKILL_FILE" ]]; then
171
+ echo "$UPGRADE_MARKER" >> "$CLAUDE_SKILL_FILE"
172
+ fi
173
+ if [[ -f "$CODEX_SKILL_FILE" ]]; then
174
+ echo "$UPGRADE_MARKER" >> "$CODEX_SKILL_FILE"
175
+ fi
176
+ if [[ -f "$OPENCODE_SKILL_FILE" ]]; then
177
+ echo "$UPGRADE_MARKER" >> "$OPENCODE_SKILL_FILE"
178
+ fi
179
+
180
+ # Re-install from modified copies
181
+ (cd "$CLAUDE_BUNDLE_COPY" && bash install.sh "$CLAUDE_UPGRADE" >/dev/null 2>&1) || true
182
+ (cd "$CODEX_BUNDLE_COPY" && bash install.sh "$CODEX_UPGRADE" >/dev/null 2>&1) || true
183
+ (cd "$OPENCODE_BUNDLE_COPY" && bash install.sh "$OPENCODE_UPGRADE" >/dev/null 2>&1) || true
184
+
185
+ # Assert the change propagated
186
+ if [[ -f "$CLAUDE_SKILL_FILE" ]] && grep -qF "$UPGRADE_MARKER" "$CLAUDE_UPGRADE/.claude/skills/plan-work/SKILL.md" 2>/dev/null; then
187
+ _pass "claude-code upgrade: modified skill file propagated to workspace"
188
+ elif [[ ! -f "$CLAUDE_SKILL_FILE" ]]; then
189
+ _pass "claude-code upgrade: skill file not in bundle (skipped)"
190
+ else
191
+ _fail "claude-code upgrade: skill change did not propagate to workspace"
192
+ fi
193
+
194
+ if [[ -f "$CODEX_SKILL_FILE" ]] && grep -qF "$UPGRADE_MARKER" "$CODEX_UPGRADE/.codex/skills/plan-work/SKILL.md" 2>/dev/null; then
195
+ _pass "codex upgrade: modified skill file propagated to workspace"
196
+ elif [[ ! -f "$CODEX_SKILL_FILE" ]]; then
197
+ _pass "codex upgrade: skill file not in bundle (skipped)"
198
+ else
199
+ _fail "codex upgrade: skill change did not propagate to workspace"
200
+ fi
201
+
202
+ if [[ -f "$OPENCODE_SKILL_FILE" ]] && grep -qF "$UPGRADE_MARKER" "$OPENCODE_UPGRADE/.opencode/skills/plan-work/SKILL.md" 2>/dev/null; then
203
+ _pass "opencode upgrade: modified skill file propagated to workspace"
204
+ elif [[ ! -f "$OPENCODE_SKILL_FILE" ]]; then
205
+ _pass "opencode upgrade: skill file not in bundle (skipped)"
206
+ else
207
+ _fail "opencode upgrade: skill change did not propagate to workspace"
208
+ fi
209
+
210
+ echo ""
211
+ echo "--- User-file Preservation Semantics ---"
212
+ # SEMANTICS (documented):
213
+ # - rsync -a copies files from bundle to dest without --delete, so unknown user-
214
+ # owned files in dest are NOT removed.
215
+ # - rsync -a overwrites existing files that differ from the bundle source.
216
+ # This means: modified installed files ARE overwritten on re-install.
217
+ # - Summary: user-added files survive; user-modified installed files are reset.
218
+
219
+ CLAUDE_USER="$TMPDIR_EVAL/user-claude"
220
+ CODEX_USER="$TMPDIR_EVAL/user-codex"
221
+ OPENCODE_USER="$TMPDIR_EVAL/user-opencode"
222
+
223
+ # Initial install
224
+ (cd "$ROOT_DIR/dist/claude-code" && bash install.sh "$CLAUDE_USER" >/dev/null 2>&1) || true
225
+ (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_USER" >/dev/null 2>&1) || true
226
+ (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$OPENCODE_USER" >/dev/null 2>&1) || true
227
+
228
+ # Create user-owned files (unknown to the bundle)
229
+ mkdir -p "$CLAUDE_USER/.claude/custom"
230
+ echo "# user custom agent" > "$CLAUDE_USER/.claude/custom/my-custom-agent.md"
231
+ mkdir -p "$CLAUDE_USER/.flow-agents/my-session"
232
+ echo '{"custom":"data"}' > "$CLAUDE_USER/.flow-agents/my-session/state.json"
233
+
234
+ mkdir -p "$CODEX_USER/.codex/custom"
235
+ printf 'name = "my-custom-agent"\n' > "$CODEX_USER/.codex/custom/my-custom-agent.toml"
236
+ mkdir -p "$CODEX_USER/.flow-agents/my-session"
237
+ echo '{"custom":"data"}' > "$CODEX_USER/.flow-agents/my-session/state.json"
238
+
239
+ mkdir -p "$OPENCODE_USER/.opencode/custom"
240
+ echo "# user custom agent" > "$OPENCODE_USER/.opencode/custom/my-custom-agent.md"
241
+ mkdir -p "$OPENCODE_USER/.flow-agents/my-session"
242
+ echo '{"custom":"data"}' > "$OPENCODE_USER/.flow-agents/my-session/state.json"
243
+
244
+ # Modify an installed skill file to simulate user edits
245
+ CLAUDE_INSTALLED_SKILL="$CLAUDE_USER/.claude/skills/plan-work/SKILL.md"
246
+ CODEX_INSTALLED_SKILL="$CODEX_USER/.codex/skills/plan-work/SKILL.md"
247
+ OPENCODE_INSTALLED_SKILL="$OPENCODE_USER/.opencode/skills/plan-work/SKILL.md"
248
+
249
+ USER_EDIT_MARKER="# USER EDIT - should be overwritten by re-install"
250
+ [[ -f "$CLAUDE_INSTALLED_SKILL" ]] && echo "$USER_EDIT_MARKER" >> "$CLAUDE_INSTALLED_SKILL"
251
+ [[ -f "$CODEX_INSTALLED_SKILL" ]] && echo "$USER_EDIT_MARKER" >> "$CODEX_INSTALLED_SKILL"
252
+ [[ -f "$OPENCODE_INSTALLED_SKILL" ]] && echo "$USER_EDIT_MARKER" >> "$OPENCODE_INSTALLED_SKILL"
253
+
254
+ # Re-install from original bundles
255
+ (cd "$ROOT_DIR/dist/claude-code" && bash install.sh "$CLAUDE_USER" >/dev/null 2>&1) || true
256
+ (cd "$ROOT_DIR/dist/codex" && bash install.sh "$CODEX_USER" >/dev/null 2>&1) || true
257
+ (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$OPENCODE_USER" >/dev/null 2>&1) || true
258
+
259
+ # Assert: user-owned unknown files survive
260
+ if [[ -f "$CLAUDE_USER/.claude/custom/my-custom-agent.md" && -f "$CLAUDE_USER/.flow-agents/my-session/state.json" ]]; then
261
+ _pass "claude-code re-install: user-owned files not removed by rsync"
262
+ else
263
+ _fail "claude-code re-install: user-owned files were removed"
264
+ fi
265
+
266
+ if [[ -f "$CODEX_USER/.codex/custom/my-custom-agent.toml" && -f "$CODEX_USER/.flow-agents/my-session/state.json" ]]; then
267
+ _pass "codex re-install: user-owned files not removed by rsync"
268
+ else
269
+ _fail "codex re-install: user-owned files were removed"
270
+ fi
271
+
272
+ if [[ -f "$OPENCODE_USER/.opencode/custom/my-custom-agent.md" && -f "$OPENCODE_USER/.flow-agents/my-session/state.json" ]]; then
273
+ _pass "opencode re-install: user-owned files not removed by rsync"
274
+ else
275
+ _fail "opencode re-install: user-owned files were removed"
276
+ fi
277
+
278
+ # Assert: MODIFIED INSTALLED FILES ARE OVERWRITTEN by rsync (pinning this semantic).
279
+ # This is expected rsync behavior: the bundle is the authority on its own files.
280
+ # Users who want to keep local edits to bundle files should fork the bundle.
281
+ if [[ -f "$CLAUDE_INSTALLED_SKILL" ]] && ! grep -qF "$USER_EDIT_MARKER" "$CLAUDE_INSTALLED_SKILL" 2>/dev/null; then
282
+ _pass "claude-code re-install: modified installed skill file was overwritten by rsync (expected)"
283
+ elif [[ ! -f "$CLAUDE_INSTALLED_SKILL" ]]; then
284
+ _pass "claude-code re-install: skill file absent (skipped overwrite check)"
285
+ else
286
+ _fail "claude-code re-install: user edits to installed file persisted — rsync did NOT overwrite (unexpected)"
287
+ fi
288
+
289
+ if [[ -f "$CODEX_INSTALLED_SKILL" ]] && ! grep -qF "$USER_EDIT_MARKER" "$CODEX_INSTALLED_SKILL" 2>/dev/null; then
290
+ _pass "codex re-install: modified installed skill file was overwritten by rsync (expected)"
291
+ elif [[ ! -f "$CODEX_INSTALLED_SKILL" ]]; then
292
+ _pass "codex re-install: skill file absent (skipped overwrite check)"
293
+ else
294
+ _fail "codex re-install: user edits to installed file persisted — rsync did NOT overwrite (unexpected)"
295
+ fi
296
+
297
+ if [[ -f "$OPENCODE_INSTALLED_SKILL" ]] && ! grep -qF "$USER_EDIT_MARKER" "$OPENCODE_INSTALLED_SKILL" 2>/dev/null; then
298
+ _pass "opencode re-install: modified installed skill file was overwritten by rsync (expected)"
299
+ elif [[ ! -f "$OPENCODE_INSTALLED_SKILL" ]]; then
300
+ _pass "opencode re-install: skill file absent (skipped overwrite check)"
301
+ else
302
+ _fail "opencode re-install: user edits to installed file persisted — rsync did NOT overwrite (unexpected)"
303
+ fi
304
+
305
+ echo ""
306
+ echo "--- Scope-Collision Detection ---"
307
+ # The collision check looks at the file pointed to by FLOW_AGENTS_USER_CLAUDE_SETTINGS
308
+ # (if set) or $HOME/.claude/settings.json. We use FLOW_AGENTS_USER_CLAUDE_SETTINGS
309
+ # to override the path for test isolation without touching the real $HOME.
310
+
311
+ # Case 1: colliding user-level settings (contains the Flow Agents marker —
312
+ # the distinctive statusMessage emitted by the bundle generator, NOT a script
313
+ # filename, because sibling products from the same lineage ship identically named
314
+ # hook scripts).
315
+ FAKE_HOME_COLLIDE="$TMPDIR_EVAL/fake-home-collide"
316
+ mkdir -p "$FAKE_HOME_COLLIDE/.claude"
317
+ cat > "$FAKE_HOME_COLLIDE/.claude/settings.json" << 'JSON'
318
+ {
319
+ "hooks": {
320
+ "UserPromptSubmit": [
321
+ {
322
+ "hooks": [
323
+ {
324
+ "type": "command",
325
+ "command": "bash -lc 'node \"$root/scripts/hooks/claude-telemetry-hook.js\" UserPromptSubmit dev'",
326
+ "statusMessage": "Recording Flow Agents telemetry"
327
+ }
328
+ ]
329
+ }
330
+ ]
331
+ }
332
+ }
333
+ JSON
334
+
335
+ CLAUDE_COLLISION_DEST="$TMPDIR_EVAL/collision-claude"
336
+ COLLISION_OUTPUT=$(FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_COLLIDE/.claude/settings.json" \
337
+ node "$ROOT_DIR/build/src/cli.js" init --runtime claude-code --dest "$CLAUDE_COLLISION_DEST" --yes 2>&1 || true)
338
+
339
+ if echo "$COLLISION_OUTPUT" | grep -q "WARNING: Flow Agents scope collision"; then
340
+ _pass "scope-collision: WARNING emitted when user-level settings contains flow-agents hooks"
341
+ else
342
+ _fail "scope-collision: no WARNING emitted despite colliding user-level settings"
343
+ echo " Output was: $COLLISION_OUTPUT" | head -5
344
+ fi
345
+
346
+ # Assert install still succeeded (collision is warning-only, not blocking)
347
+ if [[ -d "$CLAUDE_COLLISION_DEST/.claude" ]]; then
348
+ _pass "scope-collision: install continued despite WARNING (non-blocking)"
349
+ else
350
+ _fail "scope-collision: install was blocked by WARNING (should be advisory only)"
351
+ fi
352
+
353
+ # Case 1b: sibling-product settings (sibling-tool-shaped — same script filenames,
354
+ # no Flow Agents marker) must NOT trigger the warning. Regression test for the
355
+ # false positive where COLLISION_MARKER matched shared script names.
356
+ FAKE_HOME_SIBLING="$TMPDIR_EVAL/fake-home-sibling"
357
+ mkdir -p "$FAKE_HOME_SIBLING/.claude"
358
+ cat > "$FAKE_HOME_SIBLING/.claude/settings.json" << 'JSON'
359
+ {
360
+ "hooks": {
361
+ "PreToolUse": [
362
+ {
363
+ "hooks": [
364
+ {
365
+ "type": "command",
366
+ "command": "bash -lc 'root=\"$HOME/.claude/sibling-tool\"; node \"$root/scripts/hooks/claude-hook-adapter.js\" PreToolUse pre:config-protection config-protection.js standard,strict'"
367
+ }
368
+ ]
369
+ }
370
+ ]
371
+ }
372
+ }
373
+ JSON
374
+
375
+ CLAUDE_SIBLING_DEST="$TMPDIR_EVAL/sibling-claude"
376
+ SIBLING_OUTPUT=$(FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_SIBLING/.claude/settings.json" \
377
+ node "$ROOT_DIR/build/src/cli.js" init --runtime claude-code --dest "$CLAUDE_SIBLING_DEST" --yes 2>&1 || true)
378
+
379
+ if echo "$SIBLING_OUTPUT" | grep -q "WARNING: Flow Agents scope collision"; then
380
+ _fail "scope-collision: WARNING emitted for sibling-product settings (false positive on shared script names)"
381
+ else
382
+ _pass "scope-collision: no WARNING for sibling-product (shared-script-lineage) settings"
383
+ fi
384
+
385
+ # Case 2: clean $HOME (no colliding settings) — no warning expected
386
+ FAKE_HOME_CLEAN="$TMPDIR_EVAL/fake-home-clean"
387
+ mkdir -p "$FAKE_HOME_CLEAN/.claude"
388
+ echo '{"statusLine":{"type":"command","command":"echo hello"}}' > "$FAKE_HOME_CLEAN/.claude/settings.json"
389
+
390
+ CLAUDE_CLEAN_DEST="$TMPDIR_EVAL/clean-claude"
391
+ CLEAN_OUTPUT=$(FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_CLEAN/.claude/settings.json" \
392
+ node "$ROOT_DIR/build/src/cli.js" init --runtime claude-code --dest "$CLAUDE_CLEAN_DEST" --yes 2>&1 || true)
393
+
394
+ if echo "$CLEAN_OUTPUT" | grep -q "WARNING: Flow Agents scope collision"; then
395
+ _fail "scope-collision: WARNING emitted on clean $HOME (false positive)"
396
+ else
397
+ _pass "scope-collision: no WARNING on clean \$HOME (no collision)"
398
+ fi
399
+
400
+ # Case 3: no settings file at all — no warning expected
401
+ FAKE_HOME_EMPTY="$TMPDIR_EVAL/fake-home-empty"
402
+ mkdir -p "$FAKE_HOME_EMPTY"
403
+
404
+ CLAUDE_EMPTY_DEST="$TMPDIR_EVAL/empty-claude"
405
+ EMPTY_OUTPUT=$(FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_EMPTY/.claude/settings.json" \
406
+ node "$ROOT_DIR/build/src/cli.js" init --runtime claude-code --dest "$CLAUDE_EMPTY_DEST" --yes 2>&1 || true)
407
+
408
+ if echo "$EMPTY_OUTPUT" | grep -q "WARNING: Flow Agents scope collision"; then
409
+ _fail "scope-collision: WARNING emitted when no settings file exists (false positive)"
410
+ else
411
+ _pass "scope-collision: no WARNING when settings file is absent"
412
+ fi
413
+
414
+ echo ""
415
+ echo "--- Dogfood Smoke Test ---"
416
+ # Test `flow-agents dogfood --runtime claude-code` into a temp dir.
417
+ # Asserts:
418
+ # 1. Command succeeds.
419
+ # 2. .claude/settings.json is valid JSON.
420
+ # 3. permissions.defaultMode and skipDangerousModePermissionPrompt are ABSENT.
421
+ # 4. Hook commands are present (statusLine + hooks sections).
422
+ # 5. The hook commands execute correctly with a realistic UserPromptSubmit payload.
423
+
424
+ DOGFOOD_DEST="$TMPDIR_EVAL/dogfood-claude"
425
+ mkdir -p "$DOGFOOD_DEST"
426
+
427
+ DOGFOOD_OUTPUT=$(FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_EMPTY/.claude/settings.json" \
428
+ node "$ROOT_DIR/build/src/cli.js" init --runtime claude-code --dest "$DOGFOOD_DEST" --yes 2>&1 || true)
429
+
430
+ # Since dogfood is a separate subcommand exported from init.ts, call it directly
431
+ DOGFOOD_DEST2="$TMPDIR_EVAL/dogfood-claude2"
432
+ mkdir -p "$DOGFOOD_DEST2"
433
+
434
+ if FLOW_AGENTS_USER_CLAUDE_SETTINGS="$FAKE_HOME_EMPTY/.claude/settings.json" \
435
+ node "$ROOT_DIR/scripts/dogfood.js" --runtime claude-code --dest "$DOGFOOD_DEST2" >/dev/null 2>&1; then
436
+ _pass "dogfood: claude-code command succeeded"
437
+ else
438
+ _fail "dogfood: claude-code command failed"
439
+ fi
440
+
441
+ # Assert valid JSON
442
+ if [[ -f "$DOGFOOD_DEST2/.claude/settings.json" ]] && node -e "
443
+ JSON.parse(require('fs').readFileSync('$DOGFOOD_DEST2/.claude/settings.json','utf8'));
444
+ console.log('ok');
445
+ " 2>/dev/null | grep -q ok; then
446
+ _pass "dogfood: .claude/settings.json is valid JSON"
447
+ else
448
+ _fail "dogfood: .claude/settings.json is missing or invalid JSON"
449
+ fi
450
+
451
+ # Assert permissions keys are absent
452
+ if node - "$DOGFOOD_DEST2/.claude/settings.json" << 'NODE'
453
+ const fs = require("node:fs");
454
+ const settings = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
455
+ if ("permissions" in settings) throw new Error("permissions key must be absent in dogfood output");
456
+ if ("skipDangerousModePermissionPrompt" in settings) throw new Error("skipDangerousModePermissionPrompt must be absent in dogfood output");
457
+ console.log("ok");
458
+ NODE
459
+ then
460
+ _pass "dogfood: permissions.defaultMode and skipDangerousModePermissionPrompt are absent"
461
+ else
462
+ _fail "dogfood: permissive permission keys present in dogfood output (should be omitted)"
463
+ fi
464
+
465
+ # Assert hooks and statusLine are present
466
+ if node - "$DOGFOOD_DEST2/.claude/settings.json" << 'NODE'
467
+ const fs = require("node:fs");
468
+ const settings = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
469
+ const hooks = settings.hooks || {};
470
+ if (!settings.statusLine || !String(settings.statusLine.command || "").includes("flow-agents-statusline.js")) {
471
+ throw new Error("statusLine missing or does not reference flow-agents-statusline.js");
472
+ }
473
+ if (!hooks.UserPromptSubmit || !hooks.UserPromptSubmit.length) throw new Error("UserPromptSubmit hooks missing");
474
+ const wsHook = hooks.UserPromptSubmit.some((group) =>
475
+ (group.hooks || []).some((h) => String(h.command || "").includes("claude-hook-adapter.js") && String(h.command || "").includes("workflow-steering"))
476
+ );
477
+ if (!wsHook) throw new Error("workflow-steering hook missing from UserPromptSubmit");
478
+ console.log("ok");
479
+ NODE
480
+ then
481
+ _pass "dogfood: statusLine and workflow-steering hook present in settings.json"
482
+ else
483
+ _fail "dogfood: statusLine or workflow-steering hook missing from settings.json"
484
+ fi
485
+
486
+ # Execute the dogfood-generated hook command with a realistic payload.
487
+ # The dogfood use case: dogfood writes .claude/settings.json to the REPO ROOT itself
488
+ # (or any project dir). The hook commands use ${CLAUDE_PROJECT_DIR:-$(pwd)} to find
489
+ # scripts/hooks/claude-hook-adapter.js — these scripts must live in CLAUDE_PROJECT_DIR.
490
+ # For the test, we use an installed workspace (which has all scripts) as the project dir,
491
+ # and point CLAUDE_PROJECT_DIR there so the hook can resolve its scripts.
492
+ # This mirrors the real dogfood use case where the repo root has scripts/ from the bundle.
493
+ DOGFOOD_WORKSPACE="$CLAUDE_IDEM" # reuse the installed workspace from the idempotent section
494
+ mkdir -p "$DOGFOOD_WORKSPACE/.flow-agents"
495
+
496
+ if node - "$DOGFOOD_DEST2/.claude/settings.json" "$DOGFOOD_WORKSPACE" << 'NODE'
497
+ const fs = require("node:fs");
498
+ const path = require("node:path");
499
+ const { spawnSync } = require("node:child_process");
500
+ const [settingsPath, workspace] = process.argv.slice(2);
501
+
502
+ // Write minimal fixtures for workflow-steering into the workspace
503
+ const taskDir = path.join(workspace, ".flow-agents", "dogfood-hook-demo");
504
+ fs.mkdirSync(taskDir, { recursive: true });
505
+ const state = {
506
+ schema_version: "1.0",
507
+ task_slug: "dogfood-hook-demo",
508
+ status: "not_verified",
509
+ phase: "verification",
510
+ updated_at: "2026-06-11T00:00:00Z",
511
+ next_action: { status: "needs_user", summary: "Dogfood test.", target_phase: "goal_fit" },
512
+ };
513
+ const critique = {
514
+ schema_version: "1.0",
515
+ task_slug: "dogfood-hook-demo",
516
+ status: "fail",
517
+ required: true,
518
+ updated_at: "2026-06-11T00:01:00Z",
519
+ critiques: [{
520
+ id: "dogfood-review",
521
+ reviewer: "tool-code-reviewer",
522
+ reviewed_at: "2026-06-11T00:01:00Z",
523
+ verdict: "fail",
524
+ summary: "Blocking.",
525
+ findings: [{ id: "df-open", severity: "high", status: "open", description: "Test finding." }],
526
+ }],
527
+ };
528
+ fs.writeFileSync(path.join(taskDir, "state.json"), JSON.stringify(state), "utf8");
529
+ fs.writeFileSync(path.join(taskDir, "critique.json"), JSON.stringify(critique), "utf8");
530
+ if (!fs.existsSync(path.join(workspace, "docs/context-map.md"))) {
531
+ fs.mkdirSync(path.join(workspace, "docs"), { recursive: true });
532
+ fs.writeFileSync(path.join(workspace, "docs/context-map.md"), "# Context Map\n", "utf8");
533
+ }
534
+
535
+ // Find the workflow-steering hook command from the dogfood settings
536
+ const settings = JSON.parse(fs.readFileSync(settingsPath, "utf8"));
537
+ const groups = settings.hooks?.UserPromptSubmit || [];
538
+ let wsCommand = null;
539
+ for (const group of groups) {
540
+ for (const h of (group.hooks || [])) {
541
+ const cmd = String(h.command || "");
542
+ if (cmd.includes("claude-hook-adapter.js") && cmd.includes("workflow-steering")) {
543
+ wsCommand = cmd;
544
+ break;
545
+ }
546
+ }
547
+ if (wsCommand) break;
548
+ }
549
+ if (!wsCommand) throw new Error("workflow-steering hook command not found");
550
+
551
+ // Execute the hook. CLAUDE_PROJECT_DIR must point to the workspace that has scripts/hooks/.
552
+ // In the real dogfood use case this is the repo root; here we use the installed test workspace.
553
+ const payload = JSON.stringify({ hook_event_name: "UserPromptSubmit", cwd: workspace, prompt: "continue" });
554
+ const env = { ...process.env, SA_HOOK_PROFILE: "standard", CLAUDE_PROJECT_DIR: workspace };
555
+ const result = spawnSync(wsCommand, {
556
+ input: payload,
557
+ cwd: workspace,
558
+ env,
559
+ shell: true,
560
+ encoding: "utf8",
561
+ timeout: 30000,
562
+ });
563
+ if (result.status !== 0) {
564
+ throw new Error(`hook failed: rc=${result.status} stdout=${result.stdout} stderr=${result.stderr}`);
565
+ }
566
+ const ctx = JSON.parse(result.stdout || "{}").hookSpecificOutput?.additionalContext || "";
567
+ if (!ctx.includes("WORKFLOW STATE ATTENTION")) {
568
+ throw new Error(`hook did not emit workflow attention: stdout=${result.stdout} stderr=${result.stderr}`);
569
+ }
570
+ if (!ctx.includes("STATE: dogfood-hook-demo is status:not_verified phase:verification")) {
571
+ throw new Error(`hook missed state guidance: ${ctx}`);
572
+ }
573
+ if (!ctx.includes("CRITIQUE: required critique is status:fail")) {
574
+ throw new Error(`hook missed critique guidance: ${ctx}`);
575
+ }
576
+ console.log("ok");
577
+ NODE
578
+ then
579
+ _pass "dogfood: workflow-steering hook executes correctly with realistic UserPromptSubmit payload"
580
+ else
581
+ _fail "dogfood: workflow-steering hook did not execute correctly"
582
+ fi
583
+
584
+ # Dogfood smoke: assert no bundle-specific dirs were rsynced into the dest.
585
+ # In a full install, scripts/, .claude/agents/, .claude/skills/ would be present.
586
+ # Dogfood should write ONLY .claude/settings.json.
587
+ if [[ ! -d "$DOGFOOD_DEST2/.claude/agents" && ! -d "$DOGFOOD_DEST2/.claude/skills" && ! -d "$DOGFOOD_DEST2/scripts" ]]; then
588
+ _pass "dogfood: did not rsync full bundle (no agent/skill/scripts dirs in dest)"
589
+ else
590
+ _fail "dogfood: unexpectedly rsynced full bundle content into dest"
591
+ fi
592
+
593
+ echo ""
594
+ echo "--- opencode Plugin Hook Chain (end-to-end telemetry persistence) ---"
595
+ # Execute the REAL generated plugin module under node, invoke its handlers,
596
+ # and assert telemetry events persist inside the workspace .telemetry/ —
597
+ # not the workspace PARENT. Pins three live-smoke findings (2026-06-11):
598
+ # 1. spawning process.execPath fails under non-node hosts (NODE_BIN guard)
599
+ # 2. empty stdin makes the telemetry pipeline silently skip the emit
600
+ # 3. TELEMETRY_DATA_DIR escaping to the workspace parent (../../.. depth bug)
601
+ CHAIN_WS="$TMPDIR_EVAL/plugin-chain-opencode"
602
+ (cd "$ROOT_DIR/dist/opencode" && bash install.sh "$CHAIN_WS" >/dev/null 2>&1) || true
603
+ rm -rf "$CHAIN_WS/.telemetry" "$TMPDIR_EVAL/.telemetry"
604
+
605
+ if (cd "$CHAIN_WS" && node --input-type=module -e "
606
+ const mod = await import('./.opencode/plugins/flow-agents.js');
607
+ const hooks = await mod.FlowAgentsPlugin({ project: {}, client: {}, \$: null, directory: process.cwd(), worktree: process.cwd() });
608
+ await hooks['session.created']({}, {});
609
+ await hooks['tool.execute.before']({ tool: 'edit', sessionID: 's1', callID: 'c1' }, { args: { filePath: 'README.md' } });
610
+ " 2>/dev/null); then
611
+ _pass "opencode plugin: module loads and handlers execute under node"
612
+ else
613
+ _fail "opencode plugin: module load or handler execution failed"
614
+ fi
615
+
616
+ # The telemetry emit is detached (disowned) and can take a few seconds to
617
+ # land; poll rather than fixed-sleep.
618
+ for _i in 1 2 3 4 5 6 7 8 9 10; do
619
+ [[ -s "$CHAIN_WS/.telemetry/full.jsonl" ]] && break
620
+ sleep 1
621
+ done
622
+ if [[ -s "$CHAIN_WS/.telemetry/full.jsonl" ]] && node -e "
623
+ require('fs').readFileSync('$CHAIN_WS/.telemetry/full.jsonl','utf8').trim().split('\n').map(JSON.parse);
624
+ " 2>/dev/null; then
625
+ _pass "opencode plugin: handlers persisted telemetry events in workspace .telemetry/"
626
+ else
627
+ _fail "opencode plugin: no telemetry events persisted in workspace .telemetry/"
628
+ fi
629
+
630
+ if [[ ! -e "$TMPDIR_EVAL/.telemetry" ]]; then
631
+ _pass "opencode plugin: telemetry did not leak into the workspace parent directory"
632
+ else
633
+ _fail "opencode plugin: telemetry leaked into workspace parent (.telemetry escape)"
634
+ fi
635
+
636
+ echo ""
637
+ echo "==========================="
638
+ total=$((pass + fail))
639
+ echo "Results: ${pass}/${total} passed, ${fail} failed"
640
+ [[ "$fail" -gt 0 ]] && exit 1
641
+ exit 0