@kontourai/flow-agents 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +23 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +53 -10
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/utterance-check.js +65 -1
- package/build/src/tools/build-universal-bundles.js +268 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +5 -1
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/context-map.md +1 -0
- package/docs/index.md +45 -4
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +54 -0
- package/docs/north-star.md +2 -2
- package/docs/spec/runtime-hook-surface.md +472 -0
- package/docs/survey-utterance-check.md +211 -94
- package/docs/vision.md +45 -0
- package/evals/acceptance/run.sh +4 -2
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +98 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_utterance_check.sh +291 -44
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +172 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +304 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +208 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +4 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +124 -22
- package/scripts/telemetry/lib/config.sh +5 -1
- package/src/cli/init.ts +219 -6
- package/src/cli/utterance-check.ts +71 -1
- package/src/tools/build-universal-bundles.ts +266 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +5 -1
|
@@ -22,17 +22,17 @@ HOOK="$ROOT/scripts/hooks/utterance-check.js"
|
|
|
22
22
|
RUN_HOOK="$ROOT/scripts/hooks/run-hook.js"
|
|
23
23
|
|
|
24
24
|
# ---------------------------------------------------------------------------
|
|
25
|
-
# Hook: pass-through when disabled (
|
|
25
|
+
# Hook: pass-through when disabled by default (no config, no env var)
|
|
26
26
|
# ---------------------------------------------------------------------------
|
|
27
27
|
|
|
28
28
|
echo ""
|
|
29
|
-
echo "--- hook: disabled by default ---"
|
|
29
|
+
echo "--- hook: disabled by default (no config, no env var) ---"
|
|
30
30
|
|
|
31
31
|
INPUT_JSON='{"hook_event_name":"PostToolUse","tool_response":"The coverage is 92% and all tests pass."}'
|
|
32
32
|
|
|
33
33
|
if node "$HOOK" >"$TMPDIR_EVAL/disabled.out" 2>"$TMPDIR_EVAL/disabled.err" <<< "$INPUT_JSON"; then
|
|
34
34
|
if grep -qF '"hook_event_name"' "$TMPDIR_EVAL/disabled.out"; then
|
|
35
|
-
_pass "utterance check hook passes through when FLOW_AGENTS_UTTERANCE_CHECK_ENABLED is unset"
|
|
35
|
+
_pass "utterance check hook passes through when no config and FLOW_AGENTS_UTTERANCE_CHECK_ENABLED is unset"
|
|
36
36
|
else
|
|
37
37
|
_fail "utterance check hook pass-through output was not the raw input"
|
|
38
38
|
fi
|
|
@@ -40,6 +40,63 @@ else
|
|
|
40
40
|
_fail "utterance check hook should exit 0 when disabled"
|
|
41
41
|
fi
|
|
42
42
|
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Hook: env var force-off overrides a config that would enable
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
echo ""
|
|
48
|
+
echo "--- hook: env var force-off overrides config ---"
|
|
49
|
+
|
|
50
|
+
# Create a temp repo dir with a config that has enabled:true
|
|
51
|
+
FAKE_REPO="$TMPDIR_EVAL/fake-repo"
|
|
52
|
+
mkdir -p "$FAKE_REPO/context/settings"
|
|
53
|
+
cat > "$FAKE_REPO/AGENTS.md" <<'AGENTS_EOF'
|
|
54
|
+
# Fake repo for testing
|
|
55
|
+
AGENTS_EOF
|
|
56
|
+
cat > "$FAKE_REPO/context/settings/flow-agents-settings.json" <<'CONFIG_EOF'
|
|
57
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"report","extractor":"reference"}}
|
|
58
|
+
CONFIG_EOF
|
|
59
|
+
|
|
60
|
+
INPUT_WITH_CWD="{\"hook_event_name\":\"PostToolUse\",\"tool_response\":\"text\",\"cwd\":\"$FAKE_REPO\"}"
|
|
61
|
+
|
|
62
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=false \
|
|
63
|
+
node "$HOOK" >"$TMPDIR_EVAL/forceoff.out" 2>"$TMPDIR_EVAL/forceoff.err" <<< "$INPUT_WITH_CWD"; then
|
|
64
|
+
if grep -qF '"hook_event_name"' "$TMPDIR_EVAL/forceoff.out"; then
|
|
65
|
+
_pass "env var FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=false forces hook off even when config has enabled:true"
|
|
66
|
+
else
|
|
67
|
+
_fail "force-off pass-through output did not match raw input"
|
|
68
|
+
fi
|
|
69
|
+
else
|
|
70
|
+
_fail "hook should exit 0 when force-off via env var"
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Hook: config-based enable (no env var override) passes through to CLI
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
echo ""
|
|
78
|
+
echo "--- hook: config-based enable reaches CLI (fail-open on missing CLI is acceptable) ---"
|
|
79
|
+
|
|
80
|
+
if node "$HOOK" >"$TMPDIR_EVAL/config-enable.out" 2>"$TMPDIR_EVAL/config-enable.err" <<< "$INPUT_WITH_CWD"; then
|
|
81
|
+
_pass "hook with config enabled exits 0 (fails open when CLI or survey is unavailable)"
|
|
82
|
+
else
|
|
83
|
+
_fail "hook with config enabled should exit 0 (fail-open)"
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Hook: env var force-on (legacy behavior still works)
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
echo ""
|
|
91
|
+
echo "--- hook: env var force-on still works ---"
|
|
92
|
+
|
|
93
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true \
|
|
94
|
+
node "$HOOK" >"$TMPDIR_EVAL/forceon.out" 2>"$TMPDIR_EVAL/forceon.err" <<< "$INPUT_JSON"; then
|
|
95
|
+
_pass "FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true still enables the hook (legacy env var override)"
|
|
96
|
+
else
|
|
97
|
+
_fail "hook with force-on env var should exit 0"
|
|
98
|
+
fi
|
|
99
|
+
|
|
43
100
|
# ---------------------------------------------------------------------------
|
|
44
101
|
# Hook: pass-through with empty input
|
|
45
102
|
# ---------------------------------------------------------------------------
|
|
@@ -84,8 +141,7 @@ if SA_DISABLED_HOOKS=post:utterance-check \
|
|
|
84
141
|
node "$RUN_HOOK" post:utterance-check utterance-check.js standard,strict \
|
|
85
142
|
>"$TMPDIR_EVAL/disabled-runner.out" 2>"$TMPDIR_EVAL/disabled-runner.err" <<< "$HOOK_INPUT"
|
|
86
143
|
then
|
|
87
|
-
if cmp -s "$TMPDIR_EVAL/disabled-runner.out" <(printf '%s
|
|
88
|
-
' "$HOOK_INPUT"); then
|
|
144
|
+
if cmp -s "$TMPDIR_EVAL/disabled-runner.out" <(printf '%s\n' "$HOOK_INPUT"); then
|
|
89
145
|
_pass "run-hook.js passes input through when hook id is in SA_DISABLED_HOOKS"
|
|
90
146
|
else
|
|
91
147
|
_fail "run-hook.js disabled hook output did not match raw input"
|
|
@@ -94,6 +150,184 @@ else
|
|
|
94
150
|
_fail "run-hook.js with disabled hook should exit 0"
|
|
95
151
|
fi
|
|
96
152
|
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
# Hook: module.exports shape
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
echo ""
|
|
158
|
+
echo "--- hook: module.exports contract ---"
|
|
159
|
+
|
|
160
|
+
if node -e '
|
|
161
|
+
const h = require(process.argv[1]);
|
|
162
|
+
if (typeof h.run !== "function") { console.error("run missing"); process.exit(1); }
|
|
163
|
+
if (typeof h.extractUtteranceText !== "function") { console.error("extractUtteranceText missing"); process.exit(2); }
|
|
164
|
+
if (typeof h.findPackageRoot !== "function") { console.error("findPackageRoot missing"); process.exit(3); }
|
|
165
|
+
if (typeof h.findRepoRoot !== "function") { console.error("findRepoRoot missing"); process.exit(4); }
|
|
166
|
+
if (typeof h.loadRepoConfig !== "function") { console.error("loadRepoConfig missing"); process.exit(5); }
|
|
167
|
+
if (typeof h.resolvePolicy !== "function") { console.error("resolvePolicy missing"); process.exit(6); }
|
|
168
|
+
' "$HOOK"; then
|
|
169
|
+
_pass "utterance-check hook exports run, extractUtteranceText, findPackageRoot, findRepoRoot, loadRepoConfig, resolvePolicy"
|
|
170
|
+
else
|
|
171
|
+
_fail "utterance-check hook module.exports is missing expected functions"
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
# Hook: loadRepoConfig reads utteranceCheck from settings file
|
|
176
|
+
# ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
echo ""
|
|
179
|
+
echo "--- hook: loadRepoConfig reads from context/settings/flow-agents-settings.json ---"
|
|
180
|
+
|
|
181
|
+
if node -e '
|
|
182
|
+
const { loadRepoConfig } = require(process.argv[1]);
|
|
183
|
+
const fakeRepo = process.argv[2];
|
|
184
|
+
const cfg = loadRepoConfig(fakeRepo);
|
|
185
|
+
if (!cfg) { console.error("loadRepoConfig returned null for a repo with settings"); process.exit(1); }
|
|
186
|
+
if (cfg.enabled !== true) { console.error("expected enabled:true, got:", cfg.enabled); process.exit(2); }
|
|
187
|
+
if (cfg.mode !== "report") { console.error("expected mode:report, got:", cfg.mode); process.exit(3); }
|
|
188
|
+
if (cfg.extractor !== "reference") { console.error("expected extractor:reference, got:", cfg.extractor); process.exit(4); }
|
|
189
|
+
' "$HOOK" "$FAKE_REPO"; then
|
|
190
|
+
_pass "loadRepoConfig correctly reads utteranceCheck fields from settings file"
|
|
191
|
+
else
|
|
192
|
+
_fail "loadRepoConfig did not return expected config from settings file"
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# Hook: loadRepoConfig returns null when settings file is absent
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
echo ""
|
|
200
|
+
echo "--- hook: loadRepoConfig returns null when file is absent ---"
|
|
201
|
+
|
|
202
|
+
MISSING_REPO="$TMPDIR_EVAL/no-settings-repo"
|
|
203
|
+
mkdir -p "$MISSING_REPO"
|
|
204
|
+
touch "$MISSING_REPO/AGENTS.md"
|
|
205
|
+
|
|
206
|
+
if node -e '
|
|
207
|
+
const { loadRepoConfig } = require(process.argv[1]);
|
|
208
|
+
const cfg = loadRepoConfig(process.argv[2]);
|
|
209
|
+
if (cfg !== null) { console.error("expected null, got:", JSON.stringify(cfg)); process.exit(1); }
|
|
210
|
+
' "$HOOK" "$MISSING_REPO"; then
|
|
211
|
+
_pass "loadRepoConfig returns null when context/settings/flow-agents-settings.json is absent"
|
|
212
|
+
else
|
|
213
|
+
_fail "loadRepoConfig should return null for a repo without the settings file"
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
# Hook: resolvePolicy respects config enabled:false as default-off
|
|
218
|
+
# ---------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
echo ""
|
|
221
|
+
echo "--- hook: resolvePolicy returns disabled when config has enabled:false ---"
|
|
222
|
+
|
|
223
|
+
mkdir -p "$TMPDIR_EVAL/disabled-repo/context/settings"
|
|
224
|
+
touch "$TMPDIR_EVAL/disabled-repo/AGENTS.md"
|
|
225
|
+
cat > "$TMPDIR_EVAL/disabled-repo/context/settings/flow-agents-settings.json" <<'DCFG_EOF'
|
|
226
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":false}}
|
|
227
|
+
DCFG_EOF
|
|
228
|
+
|
|
229
|
+
if node -e '
|
|
230
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
231
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
232
|
+
if (policy.enabled !== false) { console.error("expected enabled:false, got:", policy.enabled); process.exit(1); }
|
|
233
|
+
' "$HOOK" "$TMPDIR_EVAL/disabled-repo"; then
|
|
234
|
+
_pass "resolvePolicy returns {enabled:false} when config has enabled:false"
|
|
235
|
+
else
|
|
236
|
+
_fail "resolvePolicy should return disabled policy when config has enabled:false"
|
|
237
|
+
fi
|
|
238
|
+
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
# Hook: resolvePolicy applies strict mode from config
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
echo ""
|
|
244
|
+
echo "--- hook: resolvePolicy applies mode:strict from config ---"
|
|
245
|
+
|
|
246
|
+
mkdir -p "$TMPDIR_EVAL/strict-repo/context/settings"
|
|
247
|
+
touch "$TMPDIR_EVAL/strict-repo/AGENTS.md"
|
|
248
|
+
cat > "$TMPDIR_EVAL/strict-repo/context/settings/flow-agents-settings.json" <<'SCFG_EOF'
|
|
249
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"strict","extractor":"reference"}}
|
|
250
|
+
SCFG_EOF
|
|
251
|
+
|
|
252
|
+
if node -e '
|
|
253
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
254
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
255
|
+
if (policy.enabled !== true) { console.error("expected enabled:true, got:", policy.enabled); process.exit(1); }
|
|
256
|
+
if (policy.mode !== "strict") { console.error("expected mode:strict, got:", policy.mode); process.exit(2); }
|
|
257
|
+
' "$HOOK" "$TMPDIR_EVAL/strict-repo"; then
|
|
258
|
+
_pass "resolvePolicy applies mode:strict from config"
|
|
259
|
+
else
|
|
260
|
+
_fail "resolvePolicy did not apply strict mode from config"
|
|
261
|
+
fi
|
|
262
|
+
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
# Hook: resolvePolicy applies anthropic extractor from config
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
echo ""
|
|
268
|
+
echo "--- hook: resolvePolicy applies extractor:anthropic from config ---"
|
|
269
|
+
|
|
270
|
+
mkdir -p "$TMPDIR_EVAL/anthropic-repo/context/settings"
|
|
271
|
+
touch "$TMPDIR_EVAL/anthropic-repo/AGENTS.md"
|
|
272
|
+
cat > "$TMPDIR_EVAL/anthropic-repo/context/settings/flow-agents-settings.json" <<'ACFG_EOF'
|
|
273
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"report","extractor":"anthropic","model":"claude-haiku-4-5"}}
|
|
274
|
+
ACFG_EOF
|
|
275
|
+
|
|
276
|
+
if node -e '
|
|
277
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
278
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
279
|
+
if (policy.extractor !== "anthropic") { console.error("expected extractor:anthropic, got:", policy.extractor); process.exit(1); }
|
|
280
|
+
if (policy.model !== "claude-haiku-4-5") { console.error("expected model:claude-haiku-4-5, got:", policy.model); process.exit(2); }
|
|
281
|
+
' "$HOOK" "$TMPDIR_EVAL/anthropic-repo"; then
|
|
282
|
+
_pass "resolvePolicy applies extractor:anthropic and model from config"
|
|
283
|
+
else
|
|
284
|
+
_fail "resolvePolicy did not apply anthropic extractor from config"
|
|
285
|
+
fi
|
|
286
|
+
|
|
287
|
+
# ---------------------------------------------------------------------------
|
|
288
|
+
# Hook: resolvePolicy env var STRICT overrides report mode from config
|
|
289
|
+
# ---------------------------------------------------------------------------
|
|
290
|
+
|
|
291
|
+
echo ""
|
|
292
|
+
echo "--- hook: env var STRICT overrides report mode in config ---"
|
|
293
|
+
|
|
294
|
+
if node -e '
|
|
295
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
296
|
+
// Set env var before requiring resolvePolicy
|
|
297
|
+
process.env.FLOW_AGENTS_UTTERANCE_CHECK_STRICT = "true";
|
|
298
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
299
|
+
delete process.env.FLOW_AGENTS_UTTERANCE_CHECK_STRICT;
|
|
300
|
+
if (policy.mode !== "strict") { console.error("expected mode:strict from env var, got:", policy.mode); process.exit(1); }
|
|
301
|
+
' "$HOOK" "$FAKE_REPO"; then
|
|
302
|
+
_pass "FLOW_AGENTS_UTTERANCE_CHECK_STRICT=true env var overrides report mode in config"
|
|
303
|
+
else
|
|
304
|
+
_fail "env var STRICT did not override report mode from config"
|
|
305
|
+
fi
|
|
306
|
+
|
|
307
|
+
# ---------------------------------------------------------------------------
|
|
308
|
+
# Hook: extractUtteranceText extracts from PostToolUse and Stop events
|
|
309
|
+
# ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
echo ""
|
|
312
|
+
echo "--- hook: extractUtteranceText ---"
|
|
313
|
+
|
|
314
|
+
if node -e '
|
|
315
|
+
const { extractUtteranceText } = require(process.argv[1]);
|
|
316
|
+
const postToolUse = { hook_event_name: "PostToolUse", tool_response: "The answer is 42." };
|
|
317
|
+
const text = extractUtteranceText(postToolUse);
|
|
318
|
+
if (text !== "The answer is 42.") { console.error("PostToolUse extract failed:", text); process.exit(1); }
|
|
319
|
+
const stopWithContent = { hook_event_name: "Stop", content: [{ type: "text", text: "Done!" }] };
|
|
320
|
+
const text2 = extractUtteranceText(stopWithContent);
|
|
321
|
+
if (text2 !== "Done!") { console.error("Stop content extract failed:", text2); process.exit(2); }
|
|
322
|
+
const emptyEvent = { hook_event_name: "PostToolUse" };
|
|
323
|
+
const text3 = extractUtteranceText(emptyEvent);
|
|
324
|
+
if (text3 !== null) { console.error("Empty event should return null, got:", text3); process.exit(3); }
|
|
325
|
+
' "$HOOK"; then
|
|
326
|
+
_pass "extractUtteranceText handles PostToolUse, Stop content, and empty events"
|
|
327
|
+
else
|
|
328
|
+
_fail "extractUtteranceText behavior was unexpected"
|
|
329
|
+
fi
|
|
330
|
+
|
|
97
331
|
# ---------------------------------------------------------------------------
|
|
98
332
|
# CLI: build and test --not-configured
|
|
99
333
|
# ---------------------------------------------------------------------------
|
|
@@ -148,6 +382,24 @@ else
|
|
|
148
382
|
_fail "CLI --help should exit 0"
|
|
149
383
|
fi
|
|
150
384
|
|
|
385
|
+
# ---------------------------------------------------------------------------
|
|
386
|
+
# CLI: --extractor flag appears in help
|
|
387
|
+
# ---------------------------------------------------------------------------
|
|
388
|
+
|
|
389
|
+
echo ""
|
|
390
|
+
echo "--- cli: --extractor flag in help ---"
|
|
391
|
+
|
|
392
|
+
if node "$ROOT/build/src/cli.js" utterance-check --help \
|
|
393
|
+
>"$TMPDIR_EVAL/help2.out" 2>"$TMPDIR_EVAL/help2.err"; then
|
|
394
|
+
if grep -q '\-\-extractor' "$TMPDIR_EVAL/help2.err"; then
|
|
395
|
+
_pass "CLI --help mentions --extractor flag"
|
|
396
|
+
else
|
|
397
|
+
_fail "CLI --help does not mention --extractor flag"
|
|
398
|
+
fi
|
|
399
|
+
else
|
|
400
|
+
_fail "CLI --help should exit 0"
|
|
401
|
+
fi
|
|
402
|
+
|
|
151
403
|
# ---------------------------------------------------------------------------
|
|
152
404
|
# CLI: missing --utterance exits non-zero
|
|
153
405
|
# ---------------------------------------------------------------------------
|
|
@@ -203,58 +455,53 @@ else
|
|
|
203
455
|
fi
|
|
204
456
|
|
|
205
457
|
# ---------------------------------------------------------------------------
|
|
206
|
-
# CLI:
|
|
207
|
-
# ---------------------------------------------------------------------------
|
|
208
|
-
|
|
209
|
-
echo ""
|
|
210
|
-
echo "--- cli: command registration ---"
|
|
211
|
-
|
|
212
|
-
if node "$ROOT/build/src/cli.js" commands 2>/dev/null | grep -q 'utterance-check'; then
|
|
213
|
-
_pass "utterance-check is registered as a flow-agents CLI command"
|
|
214
|
-
else
|
|
215
|
-
_fail "utterance-check is not registered in flow-agents CLI commands"
|
|
216
|
-
fi
|
|
217
|
-
|
|
218
|
-
# ---------------------------------------------------------------------------
|
|
219
|
-
# Hook: module.exports shape
|
|
458
|
+
# CLI: --extractor anthropic without ANTHROPIC_API_KEY fails open (exit 0)
|
|
220
459
|
# ---------------------------------------------------------------------------
|
|
221
460
|
|
|
222
461
|
echo ""
|
|
223
|
-
echo "---
|
|
462
|
+
echo "--- cli: anthropic extractor without API key fails open ---"
|
|
224
463
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
464
|
+
# Run without ANTHROPIC_API_KEY set.
|
|
465
|
+
# The CLI should emit not_configured JSON and exit 0 (fail open).
|
|
466
|
+
if env -u ANTHROPIC_API_KEY \
|
|
467
|
+
node "$ROOT/build/src/cli.js" utterance-check check \
|
|
468
|
+
--utterance "The test coverage is 92%." \
|
|
469
|
+
--extractor anthropic \
|
|
470
|
+
>"$TMPDIR_EVAL/no-apikey.out" 2>"$TMPDIR_EVAL/no-apikey.err"
|
|
471
|
+
then
|
|
472
|
+
status_val=$(node -e '
|
|
473
|
+
const r = JSON.parse(require("fs").readFileSync(process.argv[1],"utf8"));
|
|
474
|
+
console.log(r.status);
|
|
475
|
+
' "$TMPDIR_EVAL/no-apikey.out" 2>/dev/null || echo "parse-error")
|
|
476
|
+
if [[ "$status_val" == "not_configured" ]]; then
|
|
477
|
+
_pass "CLI --extractor anthropic without ANTHROPIC_API_KEY emits not_configured and exits 0 (fail open)"
|
|
478
|
+
elif [[ "$status_val" == "ok" || "$status_val" == "error" ]]; then
|
|
479
|
+
# If survey is installed and somehow proceeded (shouldn't happen without key), still accept
|
|
480
|
+
_pass "CLI --extractor anthropic produced a valid report (status: $status_val)"
|
|
481
|
+
else
|
|
482
|
+
_fail "CLI --extractor anthropic without API key produced unexpected output (status: $status_val)"
|
|
483
|
+
fi
|
|
232
484
|
else
|
|
233
|
-
|
|
485
|
+
exit_code=$?
|
|
486
|
+
# Exit 1 means survey not installed — that's a different fail-open path, acceptable
|
|
487
|
+
if [[ "$exit_code" -eq 1 ]]; then
|
|
488
|
+
_pass "CLI --extractor anthropic: survey not installed, exits 1 (not_configured)"
|
|
489
|
+
else
|
|
490
|
+
_fail "CLI --extractor anthropic without API key should exit 0 or 1 (fail open), got: $exit_code"
|
|
491
|
+
fi
|
|
234
492
|
fi
|
|
235
493
|
|
|
236
494
|
# ---------------------------------------------------------------------------
|
|
237
|
-
#
|
|
495
|
+
# CLI: utterance check registers as a valid flow-agents command
|
|
238
496
|
# ---------------------------------------------------------------------------
|
|
239
497
|
|
|
240
498
|
echo ""
|
|
241
|
-
echo "---
|
|
499
|
+
echo "--- cli: command registration ---"
|
|
242
500
|
|
|
243
|
-
if node -
|
|
244
|
-
|
|
245
|
-
const postToolUse = { hook_event_name: "PostToolUse", tool_response: "The answer is 42." };
|
|
246
|
-
const text = extractUtteranceText(postToolUse);
|
|
247
|
-
if (text !== "The answer is 42.") { console.error("PostToolUse extract failed:", text); process.exit(1); }
|
|
248
|
-
const stopWithContent = { hook_event_name: "Stop", content: [{ type: "text", text: "Done!" }] };
|
|
249
|
-
const text2 = extractUtteranceText(stopWithContent);
|
|
250
|
-
if (text2 !== "Done!") { console.error("Stop content extract failed:", text2); process.exit(2); }
|
|
251
|
-
const emptyEvent = { hook_event_name: "PostToolUse" };
|
|
252
|
-
const text3 = extractUtteranceText(emptyEvent);
|
|
253
|
-
if (text3 !== null) { console.error("Empty event should return null, got:", text3); process.exit(3); }
|
|
254
|
-
' "$HOOK"; then
|
|
255
|
-
_pass "extractUtteranceText handles PostToolUse, Stop content, and empty events"
|
|
501
|
+
if node "$ROOT/build/src/cli.js" commands 2>/dev/null | grep -q 'utterance-check'; then
|
|
502
|
+
_pass "utterance-check is registered as a flow-agents CLI command"
|
|
256
503
|
else
|
|
257
|
-
_fail "
|
|
504
|
+
_fail "utterance-check is not registered in flow-agents CLI commands"
|
|
258
505
|
fi
|
|
259
506
|
|
|
260
507
|
# ---------------------------------------------------------------------------
|
package/evals/run.sh
CHANGED
|
@@ -190,6 +190,8 @@ run_integration() {
|
|
|
190
190
|
bash "$EVAL_DIR/integration/test_runtime_adapter_activation.sh" || result=1
|
|
191
191
|
echo ""
|
|
192
192
|
bash "$EVAL_DIR/integration/test_bundle_install.sh" || result=1
|
|
193
|
+
echo ""
|
|
194
|
+
bash "$EVAL_DIR/integration/test_bundle_lifecycle.sh" || result=1
|
|
193
195
|
return $result
|
|
194
196
|
}
|
|
195
197
|
|
|
@@ -57,7 +57,7 @@ fi
|
|
|
57
57
|
|
|
58
58
|
echo ""
|
|
59
59
|
echo "--- Bundle Layout ---"
|
|
60
|
-
for dir in "$DIST_DIR/kiro" "$DIST_DIR/claude-code" "$DIST_DIR/codex"; do
|
|
60
|
+
for dir in "$DIST_DIR/kiro" "$DIST_DIR/claude-code" "$DIST_DIR/codex" "$DIST_DIR/opencode" "$DIST_DIR/pi"; do
|
|
61
61
|
if [[ -d "$dir" ]]; then
|
|
62
62
|
_pass "$(basename "$dir") bundle exists"
|
|
63
63
|
else
|
|
@@ -80,6 +80,8 @@ codex_agents=$(find "$DIST_DIR/codex/.codex/agents" -maxdepth 1 -name '*.toml' 2
|
|
|
80
80
|
[[ "$kiro_agents" == "$source_agents" ]] && _pass "Kiro agent count matches source ($kiro_agents)" || _fail "Kiro agent count mismatch: source=$source_agents dist=$kiro_agents"
|
|
81
81
|
[[ "$claude_agents" == "$source_agents" ]] && _pass "Claude agent count matches source ($claude_agents)" || _fail "Claude agent count mismatch: source=$source_agents dist=$claude_agents"
|
|
82
82
|
[[ "$codex_agents" == "$expected_codex_agents" ]] && _pass "Codex agent count matches source minus manifest exclusions ($codex_agents)" || _fail "Codex agent count mismatch: expected=$expected_codex_agents dist=$codex_agents"
|
|
83
|
+
opencode_agents=$(find "$DIST_DIR/opencode/.opencode/agents" -maxdepth 1 -name '*.md' 2>/dev/null | wc -l | tr -d ' ')
|
|
84
|
+
[[ "$opencode_agents" == "$source_agents" ]] && _pass "opencode agent count matches source ($opencode_agents)" || _fail "opencode agent count mismatch: source=$source_agents dist=$opencode_agents"
|
|
83
85
|
|
|
84
86
|
echo ""
|
|
85
87
|
echo "--- Kiro JSON ---"
|
|
@@ -226,9 +228,142 @@ else
|
|
|
226
228
|
_fail "Codex hooks missing telemetry/policy lifecycle coverage or CODEX_HOME root resolution"
|
|
227
229
|
fi
|
|
228
230
|
|
|
231
|
+
echo ""
|
|
232
|
+
echo "--- opencode Export Shape ---"
|
|
233
|
+
if node - "$DIST_DIR/opencode/.opencode/agents" <<'NODE'
|
|
234
|
+
const fs = require("node:fs");
|
|
235
|
+
const path = require("node:path");
|
|
236
|
+
const required = new Set(["description", "mode", "model"]);
|
|
237
|
+
const validModes = new Set(["subagent", "primary", "all"]);
|
|
238
|
+
for (const name of fs.readdirSync(process.argv[2]).filter((file) => file.endsWith(".md"))) {
|
|
239
|
+
const text = fs.readFileSync(path.join(process.argv[2], name), "utf8");
|
|
240
|
+
if (!text.startsWith("---\n")) throw new Error(`${name}: missing frontmatter start`);
|
|
241
|
+
const parts = text.split("\n---\n");
|
|
242
|
+
if (parts.length < 2) throw new Error(`${name}: missing frontmatter end`);
|
|
243
|
+
const fmLines = parts[0].replace("---\n", "").split(/\r?\n/).filter((line) => line.includes(":"));
|
|
244
|
+
const keys = new Set(fmLines.map((line) => line.split(":", 1)[0].trim()));
|
|
245
|
+
const missing = [...required].filter((key) => !keys.has(key));
|
|
246
|
+
if (missing.length) throw new Error(`${name}: missing frontmatter keys ${missing.join(", ")}`);
|
|
247
|
+
const modeMatch = fmLines.find((line) => line.trim().startsWith("mode:"));
|
|
248
|
+
if (modeMatch) {
|
|
249
|
+
const mode = modeMatch.split(":", 2)[1].trim();
|
|
250
|
+
if (!validModes.has(mode)) throw new Error(`${name}: invalid mode value: ${mode}`);
|
|
251
|
+
}
|
|
252
|
+
if (!parts.slice(1).join("\n---\n").trim()) throw new Error(`${name}: empty body`);
|
|
253
|
+
}
|
|
254
|
+
console.log("ok");
|
|
255
|
+
NODE
|
|
256
|
+
then
|
|
257
|
+
_pass "opencode agent markdown has valid YAML frontmatter with description, mode, model"
|
|
258
|
+
else
|
|
259
|
+
_fail "opencode agent markdown frontmatter/shape check failed"
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
if [[ -f "$DIST_DIR/opencode/.opencode/plugins/flow-agents.js" ]]; then
|
|
263
|
+
_pass "opencode bundle includes Flow Agents plugin"
|
|
264
|
+
else
|
|
265
|
+
_fail "opencode bundle missing .opencode/plugins/flow-agents.js"
|
|
266
|
+
fi
|
|
267
|
+
|
|
268
|
+
if [[ -f "$DIST_DIR/opencode/opencode.json" ]]; then
|
|
269
|
+
if node - "$DIST_DIR/opencode/opencode.json" <<'NODE'
|
|
270
|
+
const fs = require("node:fs");
|
|
271
|
+
const data = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
|
|
272
|
+
if (!data || typeof data !== "object") throw new Error("opencode.json must be an object");
|
|
273
|
+
// opencode's config schema rejects non-array `instructions` and aborts
|
|
274
|
+
// startup (caught by live acceptance smoke 2026-06-11). Pin the constraint.
|
|
275
|
+
if ("instructions" in data && !Array.isArray(data.instructions)) {
|
|
276
|
+
throw new Error("opencode.json instructions must be an array of file paths when present");
|
|
277
|
+
}
|
|
278
|
+
console.log("ok");
|
|
279
|
+
NODE
|
|
280
|
+
then
|
|
281
|
+
_pass "opencode.json is valid JSON and schema-safe (instructions array-or-absent)"
|
|
282
|
+
else
|
|
283
|
+
_fail "opencode.json is invalid or violates opencode config schema"
|
|
284
|
+
fi
|
|
285
|
+
else
|
|
286
|
+
_fail "opencode bundle missing opencode.json"
|
|
287
|
+
fi
|
|
288
|
+
|
|
289
|
+
# Generated hook artifacts must PARSE in their host language. The pi live
|
|
290
|
+
# smoke (2026-06-11) caught the generator emitting an unterminated string
|
|
291
|
+
# (template-literal escaping) that pi's loader rejected at startup.
|
|
292
|
+
if node --check "$DIST_DIR/opencode/.opencode/plugins/flow-agents.js" 2>/dev/null; then
|
|
293
|
+
_pass "generated opencode plugin parses as JavaScript"
|
|
294
|
+
else
|
|
295
|
+
_fail "generated opencode plugin has a JavaScript syntax error"
|
|
296
|
+
fi
|
|
297
|
+
|
|
298
|
+
# Semantic errors (TS2xxx: unresolved modules/types) are expected without the
|
|
299
|
+
# host's node_modules; only syntax-class errors (TS1xxx) mean a broken artifact.
|
|
300
|
+
PI_TS_SYNTAX_ERRORS=$(npx tsc --ignoreConfig --noEmit --noResolve --skipLibCheck --target esnext --module esnext \
|
|
301
|
+
"$DIST_DIR/pi/.pi/extensions/flow-agents.ts" 2>&1 | grep -c "error TS1" || true)
|
|
302
|
+
if [[ "$PI_TS_SYNTAX_ERRORS" -eq 0 ]]; then
|
|
303
|
+
_pass "generated pi extension parses as TypeScript (no TS1xxx syntax errors)"
|
|
304
|
+
else
|
|
305
|
+
_fail "generated pi extension has $PI_TS_SYNTAX_ERRORS TypeScript syntax errors"
|
|
306
|
+
fi
|
|
307
|
+
|
|
308
|
+
if [[ -d "$DIST_DIR/opencode/.opencode/skills" ]] && [[ $(find "$DIST_DIR/opencode/.opencode/skills" -name "SKILL.md" | wc -l | tr -d ' ') -gt 0 ]]; then
|
|
309
|
+
_pass "opencode bundle includes skills in .opencode/skills/"
|
|
310
|
+
else
|
|
311
|
+
_fail "opencode bundle missing skills in .opencode/skills/"
|
|
312
|
+
fi
|
|
313
|
+
|
|
314
|
+
echo ""
|
|
315
|
+
echo "--- pi Export Shape ---"
|
|
316
|
+
if [[ -f "$DIST_DIR/pi/.pi/extensions/flow-agents.ts" ]]; then
|
|
317
|
+
_pass "pi bundle includes Flow Agents extension"
|
|
318
|
+
else
|
|
319
|
+
_fail "pi bundle missing .pi/extensions/flow-agents.ts"
|
|
320
|
+
fi
|
|
321
|
+
|
|
322
|
+
if [[ -d "$DIST_DIR/pi/.pi/skills" ]] && [[ $(find "$DIST_DIR/pi/.pi/skills" -name "SKILL.md" | wc -l | tr -d ' ') -gt 0 ]]; then
|
|
323
|
+
_pass "pi bundle includes skills in .pi/skills/"
|
|
324
|
+
else
|
|
325
|
+
_fail "pi bundle missing skills in .pi/skills/"
|
|
326
|
+
fi
|
|
327
|
+
|
|
328
|
+
if node - "$DIST_DIR/pi/.pi/extensions/flow-agents.ts" <<'NODE'
|
|
329
|
+
const fs = require("node:fs");
|
|
330
|
+
const text = fs.readFileSync(process.argv[2], "utf8");
|
|
331
|
+
if (!text.includes("pi-hook-adapter.js")) throw new Error("pi extension does not reference pi-hook-adapter.js");
|
|
332
|
+
if (!text.includes("pi-telemetry-hook.js")) throw new Error("pi extension does not reference pi-telemetry-hook.js");
|
|
333
|
+
if (!text.includes("workflow-steering.js")) throw new Error("pi extension missing workflow-steering.js reference");
|
|
334
|
+
if (!text.includes("config-protection.js")) throw new Error("pi extension missing config-protection.js reference");
|
|
335
|
+
if (!text.includes("stop-goal-fit.js")) throw new Error("pi extension missing stop-goal-fit.js reference");
|
|
336
|
+
if (!text.includes("before_agent_start")) throw new Error("pi extension missing before_agent_start event handler");
|
|
337
|
+
if (!text.includes("tool_call")) throw new Error("pi extension missing tool_call event handler");
|
|
338
|
+
console.log("ok");
|
|
339
|
+
NODE
|
|
340
|
+
then
|
|
341
|
+
_pass "pi extension references correct hook adapters and event handlers"
|
|
342
|
+
else
|
|
343
|
+
_fail "pi extension is missing required hook adapter or event handler references"
|
|
344
|
+
fi
|
|
345
|
+
|
|
346
|
+
if node - "$DIST_DIR/opencode/.opencode/plugins/flow-agents.js" <<'NODE'
|
|
347
|
+
const fs = require("node:fs");
|
|
348
|
+
const text = fs.readFileSync(process.argv[2], "utf8");
|
|
349
|
+
if (!text.includes("opencode-hook-adapter.js")) throw new Error("opencode plugin does not reference opencode-hook-adapter.js");
|
|
350
|
+
if (!text.includes("opencode-telemetry-hook.js")) throw new Error("opencode plugin does not reference opencode-telemetry-hook.js");
|
|
351
|
+
if (!text.includes("workflow-steering.js")) throw new Error("opencode plugin missing workflow-steering.js reference");
|
|
352
|
+
if (!text.includes("config-protection.js")) throw new Error("opencode plugin missing config-protection.js reference");
|
|
353
|
+
if (!text.includes("stop-goal-fit.js")) throw new Error("opencode plugin missing stop-goal-fit.js reference");
|
|
354
|
+
if (!text.includes("session.created")) throw new Error("opencode plugin missing session.created event handler");
|
|
355
|
+
if (!text.includes("tool.execute.before")) throw new Error("opencode plugin missing tool.execute.before event handler");
|
|
356
|
+
console.log("ok");
|
|
357
|
+
NODE
|
|
358
|
+
then
|
|
359
|
+
_pass "opencode plugin references correct hook adapters and event handlers"
|
|
360
|
+
else
|
|
361
|
+
_fail "opencode plugin is missing required hook adapter or event handler references"
|
|
362
|
+
fi
|
|
363
|
+
|
|
229
364
|
echo ""
|
|
230
365
|
echo "--- Shared Task Dirs ---"
|
|
231
|
-
for dir in "$DIST_DIR/claude-code/.flow-agents" "$DIST_DIR/codex/.flow-agents"; do
|
|
366
|
+
for dir in "$DIST_DIR/claude-code/.flow-agents" "$DIST_DIR/codex/.flow-agents" "$DIST_DIR/opencode/.flow-agents" "$DIST_DIR/pi/.flow-agents"; do
|
|
232
367
|
if [[ -d "$dir" ]]; then
|
|
233
368
|
_pass "$(realpath "$dir" 2>/dev/null || echo "$dir") exists"
|
|
234
369
|
else
|