@kontourai/flow-agents 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/publish-npm.yml +1 -1
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +38 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +58 -19
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/utterance-check.js +236 -0
- package/build/src/cli.js +3 -0
- package/build/src/tools/build-universal-bundles.js +268 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +6 -1
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/agent-system-guidebook.md +4 -5
- package/docs/context-map.md +1 -0
- package/docs/index.md +46 -6
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +54 -0
- package/docs/north-star.md +3 -3
- package/docs/repository-structure.md +1 -1
- package/docs/skills-map.md +10 -4
- package/docs/spec/runtime-hook-surface.md +472 -0
- package/docs/survey-utterance-check.md +308 -0
- package/docs/vision.md +45 -0
- package/docs/workflow-usage-guide.md +1 -1
- package/evals/acceptance/run.sh +4 -2
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +98 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_utterance_check.sh +518 -0
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +172 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +304 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +208 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +5 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +327 -0
- package/scripts/telemetry/lib/config.sh +5 -1
- package/skills/idea-to-backlog/SKILL.md +1 -1
- package/src/cli/init.ts +219 -6
- package/src/cli/utterance-check.ts +324 -0
- package/src/cli.ts +3 -0
- package/src/tools/build-universal-bundles.ts +266 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +6 -1
- package/build/src/cli/docs-preview.js +0 -39
- package/build/src/cli/export-bookmarks.js +0 -38
- package/build/src/cli/import-bookmarks.js +0 -50
- package/build/src/cli/instinct-cli.js +0 -93
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_utterance_check.sh — Survey utterance check hook and CLI adapter coverage
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT/evals/lib/node.sh"
|
|
7
|
+
|
|
8
|
+
TMPDIR_EVAL="$(mktemp -d)"
|
|
9
|
+
errors=0
|
|
10
|
+
|
|
11
|
+
cleanup() {
|
|
12
|
+
rm -rf "$TMPDIR_EVAL"
|
|
13
|
+
}
|
|
14
|
+
trap cleanup EXIT
|
|
15
|
+
|
|
16
|
+
_pass() { echo " ✓ $1"; }
|
|
17
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
18
|
+
|
|
19
|
+
echo "=== Utterance Check Hook and CLI Adapter ==="
|
|
20
|
+
|
|
21
|
+
HOOK="$ROOT/scripts/hooks/utterance-check.js"
|
|
22
|
+
RUN_HOOK="$ROOT/scripts/hooks/run-hook.js"
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Hook: pass-through when disabled by default (no config, no env var)
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
echo ""
|
|
29
|
+
echo "--- hook: disabled by default (no config, no env var) ---"
|
|
30
|
+
|
|
31
|
+
INPUT_JSON='{"hook_event_name":"PostToolUse","tool_response":"The coverage is 92% and all tests pass."}'
|
|
32
|
+
|
|
33
|
+
if node "$HOOK" >"$TMPDIR_EVAL/disabled.out" 2>"$TMPDIR_EVAL/disabled.err" <<< "$INPUT_JSON"; then
|
|
34
|
+
if grep -qF '"hook_event_name"' "$TMPDIR_EVAL/disabled.out"; then
|
|
35
|
+
_pass "utterance check hook passes through when no config and FLOW_AGENTS_UTTERANCE_CHECK_ENABLED is unset"
|
|
36
|
+
else
|
|
37
|
+
_fail "utterance check hook pass-through output was not the raw input"
|
|
38
|
+
fi
|
|
39
|
+
else
|
|
40
|
+
_fail "utterance check hook should exit 0 when disabled"
|
|
41
|
+
fi
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# Hook: env var force-off overrides a config that would enable
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
echo ""
|
|
48
|
+
echo "--- hook: env var force-off overrides config ---"
|
|
49
|
+
|
|
50
|
+
# Create a temp repo dir with a config that has enabled:true
|
|
51
|
+
FAKE_REPO="$TMPDIR_EVAL/fake-repo"
|
|
52
|
+
mkdir -p "$FAKE_REPO/context/settings"
|
|
53
|
+
cat > "$FAKE_REPO/AGENTS.md" <<'AGENTS_EOF'
|
|
54
|
+
# Fake repo for testing
|
|
55
|
+
AGENTS_EOF
|
|
56
|
+
cat > "$FAKE_REPO/context/settings/flow-agents-settings.json" <<'CONFIG_EOF'
|
|
57
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"report","extractor":"reference"}}
|
|
58
|
+
CONFIG_EOF
|
|
59
|
+
|
|
60
|
+
INPUT_WITH_CWD="{\"hook_event_name\":\"PostToolUse\",\"tool_response\":\"text\",\"cwd\":\"$FAKE_REPO\"}"
|
|
61
|
+
|
|
62
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=false \
|
|
63
|
+
node "$HOOK" >"$TMPDIR_EVAL/forceoff.out" 2>"$TMPDIR_EVAL/forceoff.err" <<< "$INPUT_WITH_CWD"; then
|
|
64
|
+
if grep -qF '"hook_event_name"' "$TMPDIR_EVAL/forceoff.out"; then
|
|
65
|
+
_pass "env var FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=false forces hook off even when config has enabled:true"
|
|
66
|
+
else
|
|
67
|
+
_fail "force-off pass-through output did not match raw input"
|
|
68
|
+
fi
|
|
69
|
+
else
|
|
70
|
+
_fail "hook should exit 0 when force-off via env var"
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Hook: config-based enable (no env var override) passes through to CLI
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
echo ""
|
|
78
|
+
echo "--- hook: config-based enable reaches CLI (fail-open on missing CLI is acceptable) ---"
|
|
79
|
+
|
|
80
|
+
if node "$HOOK" >"$TMPDIR_EVAL/config-enable.out" 2>"$TMPDIR_EVAL/config-enable.err" <<< "$INPUT_WITH_CWD"; then
|
|
81
|
+
_pass "hook with config enabled exits 0 (fails open when CLI or survey is unavailable)"
|
|
82
|
+
else
|
|
83
|
+
_fail "hook with config enabled should exit 0 (fail-open)"
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Hook: env var force-on (legacy behavior still works)
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
echo ""
|
|
91
|
+
echo "--- hook: env var force-on still works ---"
|
|
92
|
+
|
|
93
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true \
|
|
94
|
+
node "$HOOK" >"$TMPDIR_EVAL/forceon.out" 2>"$TMPDIR_EVAL/forceon.err" <<< "$INPUT_JSON"; then
|
|
95
|
+
_pass "FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true still enables the hook (legacy env var override)"
|
|
96
|
+
else
|
|
97
|
+
_fail "hook with force-on env var should exit 0"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
# Hook: pass-through with empty input
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
echo ""
|
|
105
|
+
echo "--- hook: empty input ---"
|
|
106
|
+
|
|
107
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true node "$HOOK" >"$TMPDIR_EVAL/empty.out" 2>"$TMPDIR_EVAL/empty.err" <<< '{}'; then
|
|
108
|
+
_pass "utterance check hook passes through when no utterance text is present"
|
|
109
|
+
else
|
|
110
|
+
_fail "utterance check hook should exit 0 on empty input"
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
# Hook: pass-through when CLI is not built yet
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
echo ""
|
|
118
|
+
echo "--- hook: missing CLI gracefully fails open ---"
|
|
119
|
+
|
|
120
|
+
if FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true \
|
|
121
|
+
node "$HOOK" >"$TMPDIR_EVAL/nocli.out" 2>"$TMPDIR_EVAL/nocli.err" <<JSON
|
|
122
|
+
{"hook_event_name":"PostToolUse","tool_response":"Some agent text."}
|
|
123
|
+
JSON
|
|
124
|
+
then
|
|
125
|
+
# Either built CLI path worked, or hook failed open (exit 0)
|
|
126
|
+
_pass "utterance check hook fails open when CLI or survey is not available"
|
|
127
|
+
else
|
|
128
|
+
_fail "utterance check hook should not block when CLI is unavailable"
|
|
129
|
+
fi
|
|
130
|
+
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
# Hook: respects SA_DISABLED_HOOKS through run-hook.js
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
echo ""
|
|
136
|
+
echo "--- hook: run-hook.js respects SA_DISABLED_HOOKS ---"
|
|
137
|
+
|
|
138
|
+
HOOK_INPUT='{"hook_event_name":"PostToolUse","tool_response":"text"}'
|
|
139
|
+
|
|
140
|
+
if SA_DISABLED_HOOKS=post:utterance-check \
|
|
141
|
+
node "$RUN_HOOK" post:utterance-check utterance-check.js standard,strict \
|
|
142
|
+
>"$TMPDIR_EVAL/disabled-runner.out" 2>"$TMPDIR_EVAL/disabled-runner.err" <<< "$HOOK_INPUT"
|
|
143
|
+
then
|
|
144
|
+
if cmp -s "$TMPDIR_EVAL/disabled-runner.out" <(printf '%s\n' "$HOOK_INPUT"); then
|
|
145
|
+
_pass "run-hook.js passes input through when hook id is in SA_DISABLED_HOOKS"
|
|
146
|
+
else
|
|
147
|
+
_fail "run-hook.js disabled hook output did not match raw input"
|
|
148
|
+
fi
|
|
149
|
+
else
|
|
150
|
+
_fail "run-hook.js with disabled hook should exit 0"
|
|
151
|
+
fi
|
|
152
|
+
|
|
153
|
+
# ---------------------------------------------------------------------------
|
|
154
|
+
# Hook: module.exports shape
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
echo ""
|
|
158
|
+
echo "--- hook: module.exports contract ---"
|
|
159
|
+
|
|
160
|
+
if node -e '
|
|
161
|
+
const h = require(process.argv[1]);
|
|
162
|
+
if (typeof h.run !== "function") { console.error("run missing"); process.exit(1); }
|
|
163
|
+
if (typeof h.extractUtteranceText !== "function") { console.error("extractUtteranceText missing"); process.exit(2); }
|
|
164
|
+
if (typeof h.findPackageRoot !== "function") { console.error("findPackageRoot missing"); process.exit(3); }
|
|
165
|
+
if (typeof h.findRepoRoot !== "function") { console.error("findRepoRoot missing"); process.exit(4); }
|
|
166
|
+
if (typeof h.loadRepoConfig !== "function") { console.error("loadRepoConfig missing"); process.exit(5); }
|
|
167
|
+
if (typeof h.resolvePolicy !== "function") { console.error("resolvePolicy missing"); process.exit(6); }
|
|
168
|
+
' "$HOOK"; then
|
|
169
|
+
_pass "utterance-check hook exports run, extractUtteranceText, findPackageRoot, findRepoRoot, loadRepoConfig, resolvePolicy"
|
|
170
|
+
else
|
|
171
|
+
_fail "utterance-check hook module.exports is missing expected functions"
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
# Hook: loadRepoConfig reads utteranceCheck from settings file
|
|
176
|
+
# ---------------------------------------------------------------------------
|
|
177
|
+
|
|
178
|
+
echo ""
|
|
179
|
+
echo "--- hook: loadRepoConfig reads from context/settings/flow-agents-settings.json ---"
|
|
180
|
+
|
|
181
|
+
if node -e '
|
|
182
|
+
const { loadRepoConfig } = require(process.argv[1]);
|
|
183
|
+
const fakeRepo = process.argv[2];
|
|
184
|
+
const cfg = loadRepoConfig(fakeRepo);
|
|
185
|
+
if (!cfg) { console.error("loadRepoConfig returned null for a repo with settings"); process.exit(1); }
|
|
186
|
+
if (cfg.enabled !== true) { console.error("expected enabled:true, got:", cfg.enabled); process.exit(2); }
|
|
187
|
+
if (cfg.mode !== "report") { console.error("expected mode:report, got:", cfg.mode); process.exit(3); }
|
|
188
|
+
if (cfg.extractor !== "reference") { console.error("expected extractor:reference, got:", cfg.extractor); process.exit(4); }
|
|
189
|
+
' "$HOOK" "$FAKE_REPO"; then
|
|
190
|
+
_pass "loadRepoConfig correctly reads utteranceCheck fields from settings file"
|
|
191
|
+
else
|
|
192
|
+
_fail "loadRepoConfig did not return expected config from settings file"
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
# Hook: loadRepoConfig returns null when settings file is absent
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
echo ""
|
|
200
|
+
echo "--- hook: loadRepoConfig returns null when file is absent ---"
|
|
201
|
+
|
|
202
|
+
MISSING_REPO="$TMPDIR_EVAL/no-settings-repo"
|
|
203
|
+
mkdir -p "$MISSING_REPO"
|
|
204
|
+
touch "$MISSING_REPO/AGENTS.md"
|
|
205
|
+
|
|
206
|
+
if node -e '
|
|
207
|
+
const { loadRepoConfig } = require(process.argv[1]);
|
|
208
|
+
const cfg = loadRepoConfig(process.argv[2]);
|
|
209
|
+
if (cfg !== null) { console.error("expected null, got:", JSON.stringify(cfg)); process.exit(1); }
|
|
210
|
+
' "$HOOK" "$MISSING_REPO"; then
|
|
211
|
+
_pass "loadRepoConfig returns null when context/settings/flow-agents-settings.json is absent"
|
|
212
|
+
else
|
|
213
|
+
_fail "loadRepoConfig should return null for a repo without the settings file"
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
# Hook: resolvePolicy respects config enabled:false as default-off
|
|
218
|
+
# ---------------------------------------------------------------------------
|
|
219
|
+
|
|
220
|
+
echo ""
|
|
221
|
+
echo "--- hook: resolvePolicy returns disabled when config has enabled:false ---"
|
|
222
|
+
|
|
223
|
+
mkdir -p "$TMPDIR_EVAL/disabled-repo/context/settings"
|
|
224
|
+
touch "$TMPDIR_EVAL/disabled-repo/AGENTS.md"
|
|
225
|
+
cat > "$TMPDIR_EVAL/disabled-repo/context/settings/flow-agents-settings.json" <<'DCFG_EOF'
|
|
226
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":false}}
|
|
227
|
+
DCFG_EOF
|
|
228
|
+
|
|
229
|
+
if node -e '
|
|
230
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
231
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
232
|
+
if (policy.enabled !== false) { console.error("expected enabled:false, got:", policy.enabled); process.exit(1); }
|
|
233
|
+
' "$HOOK" "$TMPDIR_EVAL/disabled-repo"; then
|
|
234
|
+
_pass "resolvePolicy returns {enabled:false} when config has enabled:false"
|
|
235
|
+
else
|
|
236
|
+
_fail "resolvePolicy should return disabled policy when config has enabled:false"
|
|
237
|
+
fi
|
|
238
|
+
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
# Hook: resolvePolicy applies strict mode from config
|
|
241
|
+
# ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
echo ""
|
|
244
|
+
echo "--- hook: resolvePolicy applies mode:strict from config ---"
|
|
245
|
+
|
|
246
|
+
mkdir -p "$TMPDIR_EVAL/strict-repo/context/settings"
|
|
247
|
+
touch "$TMPDIR_EVAL/strict-repo/AGENTS.md"
|
|
248
|
+
cat > "$TMPDIR_EVAL/strict-repo/context/settings/flow-agents-settings.json" <<'SCFG_EOF'
|
|
249
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"strict","extractor":"reference"}}
|
|
250
|
+
SCFG_EOF
|
|
251
|
+
|
|
252
|
+
if node -e '
|
|
253
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
254
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
255
|
+
if (policy.enabled !== true) { console.error("expected enabled:true, got:", policy.enabled); process.exit(1); }
|
|
256
|
+
if (policy.mode !== "strict") { console.error("expected mode:strict, got:", policy.mode); process.exit(2); }
|
|
257
|
+
' "$HOOK" "$TMPDIR_EVAL/strict-repo"; then
|
|
258
|
+
_pass "resolvePolicy applies mode:strict from config"
|
|
259
|
+
else
|
|
260
|
+
_fail "resolvePolicy did not apply strict mode from config"
|
|
261
|
+
fi
|
|
262
|
+
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
# Hook: resolvePolicy applies anthropic extractor from config
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
echo ""
|
|
268
|
+
echo "--- hook: resolvePolicy applies extractor:anthropic from config ---"
|
|
269
|
+
|
|
270
|
+
mkdir -p "$TMPDIR_EVAL/anthropic-repo/context/settings"
|
|
271
|
+
touch "$TMPDIR_EVAL/anthropic-repo/AGENTS.md"
|
|
272
|
+
cat > "$TMPDIR_EVAL/anthropic-repo/context/settings/flow-agents-settings.json" <<'ACFG_EOF'
|
|
273
|
+
{"schema_version":"1.0","utteranceCheck":{"enabled":true,"mode":"report","extractor":"anthropic","model":"claude-haiku-4-5"}}
|
|
274
|
+
ACFG_EOF
|
|
275
|
+
|
|
276
|
+
if node -e '
|
|
277
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
278
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
279
|
+
if (policy.extractor !== "anthropic") { console.error("expected extractor:anthropic, got:", policy.extractor); process.exit(1); }
|
|
280
|
+
if (policy.model !== "claude-haiku-4-5") { console.error("expected model:claude-haiku-4-5, got:", policy.model); process.exit(2); }
|
|
281
|
+
' "$HOOK" "$TMPDIR_EVAL/anthropic-repo"; then
|
|
282
|
+
_pass "resolvePolicy applies extractor:anthropic and model from config"
|
|
283
|
+
else
|
|
284
|
+
_fail "resolvePolicy did not apply anthropic extractor from config"
|
|
285
|
+
fi
|
|
286
|
+
|
|
287
|
+
# ---------------------------------------------------------------------------
|
|
288
|
+
# Hook: resolvePolicy env var STRICT overrides report mode from config
|
|
289
|
+
# ---------------------------------------------------------------------------
|
|
290
|
+
|
|
291
|
+
echo ""
|
|
292
|
+
echo "--- hook: env var STRICT overrides report mode in config ---"
|
|
293
|
+
|
|
294
|
+
if node -e '
|
|
295
|
+
const { resolvePolicy } = require(process.argv[1]);
|
|
296
|
+
// Set env var before requiring resolvePolicy
|
|
297
|
+
process.env.FLOW_AGENTS_UTTERANCE_CHECK_STRICT = "true";
|
|
298
|
+
const policy = resolvePolicy(process.argv[2]);
|
|
299
|
+
delete process.env.FLOW_AGENTS_UTTERANCE_CHECK_STRICT;
|
|
300
|
+
if (policy.mode !== "strict") { console.error("expected mode:strict from env var, got:", policy.mode); process.exit(1); }
|
|
301
|
+
' "$HOOK" "$FAKE_REPO"; then
|
|
302
|
+
_pass "FLOW_AGENTS_UTTERANCE_CHECK_STRICT=true env var overrides report mode in config"
|
|
303
|
+
else
|
|
304
|
+
_fail "env var STRICT did not override report mode from config"
|
|
305
|
+
fi
|
|
306
|
+
|
|
307
|
+
# ---------------------------------------------------------------------------
|
|
308
|
+
# Hook: extractUtteranceText extracts from PostToolUse and Stop events
|
|
309
|
+
# ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
echo ""
|
|
312
|
+
echo "--- hook: extractUtteranceText ---"
|
|
313
|
+
|
|
314
|
+
if node -e '
|
|
315
|
+
const { extractUtteranceText } = require(process.argv[1]);
|
|
316
|
+
const postToolUse = { hook_event_name: "PostToolUse", tool_response: "The answer is 42." };
|
|
317
|
+
const text = extractUtteranceText(postToolUse);
|
|
318
|
+
if (text !== "The answer is 42.") { console.error("PostToolUse extract failed:", text); process.exit(1); }
|
|
319
|
+
const stopWithContent = { hook_event_name: "Stop", content: [{ type: "text", text: "Done!" }] };
|
|
320
|
+
const text2 = extractUtteranceText(stopWithContent);
|
|
321
|
+
if (text2 !== "Done!") { console.error("Stop content extract failed:", text2); process.exit(2); }
|
|
322
|
+
const emptyEvent = { hook_event_name: "PostToolUse" };
|
|
323
|
+
const text3 = extractUtteranceText(emptyEvent);
|
|
324
|
+
if (text3 !== null) { console.error("Empty event should return null, got:", text3); process.exit(3); }
|
|
325
|
+
' "$HOOK"; then
|
|
326
|
+
_pass "extractUtteranceText handles PostToolUse, Stop content, and empty events"
|
|
327
|
+
else
|
|
328
|
+
_fail "extractUtteranceText behavior was unexpected"
|
|
329
|
+
fi
|
|
330
|
+
|
|
331
|
+
# ---------------------------------------------------------------------------
|
|
332
|
+
# CLI: build and test --not-configured
|
|
333
|
+
# ---------------------------------------------------------------------------
|
|
334
|
+
|
|
335
|
+
echo ""
|
|
336
|
+
echo "--- cli: not-configured output ---"
|
|
337
|
+
|
|
338
|
+
# Build the TypeScript source if needed
|
|
339
|
+
if [[ ! -f "$ROOT/build/src/cli.js" ]]; then
|
|
340
|
+
echo " (building TypeScript source...)"
|
|
341
|
+
if ! (cd "$ROOT" && npm run build --silent 2>"$TMPDIR_EVAL/build.err"); then
|
|
342
|
+
_fail "TypeScript build failed: $(cat "$TMPDIR_EVAL/build.err" | head -5)"
|
|
343
|
+
errors=$((errors + 1))
|
|
344
|
+
echo ""
|
|
345
|
+
echo "Utterance check integration tests failed: $errors issue(s)."
|
|
346
|
+
exit 1
|
|
347
|
+
fi
|
|
348
|
+
fi
|
|
349
|
+
|
|
350
|
+
if node "$ROOT/build/src/cli.js" utterance-check check --not-configured \
|
|
351
|
+
>"$TMPDIR_EVAL/not-configured.out" 2>"$TMPDIR_EVAL/not-configured.err"
|
|
352
|
+
then
|
|
353
|
+
if node -e '
|
|
354
|
+
const r = JSON.parse(require("fs").readFileSync(process.argv[1], "utf8"));
|
|
355
|
+
if (r.status !== "not_configured") process.exit(1);
|
|
356
|
+
if (!Array.isArray(r.statements)) process.exit(2);
|
|
357
|
+
if (typeof r.summary !== "string") process.exit(3);
|
|
358
|
+
' "$TMPDIR_EVAL/not-configured.out"; then
|
|
359
|
+
_pass "CLI outputs not_configured JSON when --not-configured is set"
|
|
360
|
+
else
|
|
361
|
+
_fail "CLI not-configured output did not match expected shape"
|
|
362
|
+
fi
|
|
363
|
+
else
|
|
364
|
+
_fail "CLI should exit 0 with --not-configured"
|
|
365
|
+
fi
|
|
366
|
+
|
|
367
|
+
# ---------------------------------------------------------------------------
|
|
368
|
+
# CLI: --help exits 0 and prints usage
|
|
369
|
+
# ---------------------------------------------------------------------------
|
|
370
|
+
|
|
371
|
+
echo ""
|
|
372
|
+
echo "--- cli: help output ---"
|
|
373
|
+
|
|
374
|
+
if node "$ROOT/build/src/cli.js" utterance-check --help \
|
|
375
|
+
>"$TMPDIR_EVAL/help.out" 2>"$TMPDIR_EVAL/help.err"; then
|
|
376
|
+
if grep -q 'utterance-check check' "$TMPDIR_EVAL/help.err"; then
|
|
377
|
+
_pass "CLI --help prints usage"
|
|
378
|
+
else
|
|
379
|
+
_fail "CLI --help did not print expected usage text"
|
|
380
|
+
fi
|
|
381
|
+
else
|
|
382
|
+
_fail "CLI --help should exit 0"
|
|
383
|
+
fi
|
|
384
|
+
|
|
385
|
+
# ---------------------------------------------------------------------------
|
|
386
|
+
# CLI: --extractor flag appears in help
|
|
387
|
+
# ---------------------------------------------------------------------------
|
|
388
|
+
|
|
389
|
+
echo ""
|
|
390
|
+
echo "--- cli: --extractor flag in help ---"
|
|
391
|
+
|
|
392
|
+
if node "$ROOT/build/src/cli.js" utterance-check --help \
|
|
393
|
+
>"$TMPDIR_EVAL/help2.out" 2>"$TMPDIR_EVAL/help2.err"; then
|
|
394
|
+
if grep -q '\-\-extractor' "$TMPDIR_EVAL/help2.err"; then
|
|
395
|
+
_pass "CLI --help mentions --extractor flag"
|
|
396
|
+
else
|
|
397
|
+
_fail "CLI --help does not mention --extractor flag"
|
|
398
|
+
fi
|
|
399
|
+
else
|
|
400
|
+
_fail "CLI --help should exit 0"
|
|
401
|
+
fi
|
|
402
|
+
|
|
403
|
+
# ---------------------------------------------------------------------------
|
|
404
|
+
# CLI: missing --utterance exits non-zero
|
|
405
|
+
# ---------------------------------------------------------------------------
|
|
406
|
+
|
|
407
|
+
echo ""
|
|
408
|
+
echo "--- cli: missing utterance flag ---"
|
|
409
|
+
|
|
410
|
+
if node "$ROOT/build/src/cli.js" utterance-check check \
|
|
411
|
+
>"$TMPDIR_EVAL/no-utterance.out" 2>"$TMPDIR_EVAL/no-utterance.err"
|
|
412
|
+
then
|
|
413
|
+
_fail "CLI check without --utterance should exit non-zero"
|
|
414
|
+
else
|
|
415
|
+
_pass "CLI check without --utterance exits non-zero (usage error)"
|
|
416
|
+
fi
|
|
417
|
+
|
|
418
|
+
# ---------------------------------------------------------------------------
|
|
419
|
+
# CLI: survey not installed → not_configured output, exits 1
|
|
420
|
+
# ---------------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
echo ""
|
|
423
|
+
echo "--- cli: @kontourai/survey not installed ---"
|
|
424
|
+
|
|
425
|
+
# Run with a NODE_PATH that does not include any survey package, so the
|
|
426
|
+
# dynamic import fails. node's module resolution will not find @kontourai/survey
|
|
427
|
+
# from this test since it is not installed in flow-agents/node_modules.
|
|
428
|
+
if node "$ROOT/build/src/cli.js" utterance-check check \
|
|
429
|
+
--utterance "The test coverage is 92%." \
|
|
430
|
+
>"$TMPDIR_EVAL/no-survey.out" 2>"$TMPDIR_EVAL/no-survey.err"
|
|
431
|
+
then
|
|
432
|
+
# survey might be installed; check for not_configured or ok status
|
|
433
|
+
status_val=$(node -e 'console.log(JSON.parse(require("fs").readFileSync(process.argv[1],"utf8")).status)' \
|
|
434
|
+
"$TMPDIR_EVAL/no-survey.out" 2>/dev/null || echo "parse-error")
|
|
435
|
+
if [[ "$status_val" == "ok" || "$status_val" == "not_configured" ]]; then
|
|
436
|
+
_pass "CLI utterance check produces valid report (status: $status_val)"
|
|
437
|
+
else
|
|
438
|
+
_fail "CLI utterance check output had unexpected status: $status_val"
|
|
439
|
+
fi
|
|
440
|
+
else
|
|
441
|
+
exit_code=$?
|
|
442
|
+
# Exit 1 means not_configured (survey not installed) — expected in CI
|
|
443
|
+
if [[ "$exit_code" -eq 1 ]]; then
|
|
444
|
+
if node -e '
|
|
445
|
+
const r = JSON.parse(require("fs").readFileSync(process.argv[1], "utf8"));
|
|
446
|
+
if (r.status !== "not_configured") process.exit(1);
|
|
447
|
+
' "$TMPDIR_EVAL/no-survey.out" 2>/dev/null; then
|
|
448
|
+
_pass "CLI outputs not_configured when @kontourai/survey is not installed"
|
|
449
|
+
else
|
|
450
|
+
_fail "CLI exit 1 but output was not not_configured JSON"
|
|
451
|
+
fi
|
|
452
|
+
else
|
|
453
|
+
_fail "CLI should exit 0 or 1, got exit code: $exit_code"
|
|
454
|
+
fi
|
|
455
|
+
fi
|
|
456
|
+
|
|
457
|
+
# ---------------------------------------------------------------------------
|
|
458
|
+
# CLI: --extractor anthropic without ANTHROPIC_API_KEY fails open (exit 0)
|
|
459
|
+
# ---------------------------------------------------------------------------
|
|
460
|
+
|
|
461
|
+
echo ""
|
|
462
|
+
echo "--- cli: anthropic extractor without API key fails open ---"
|
|
463
|
+
|
|
464
|
+
# Run without ANTHROPIC_API_KEY set.
|
|
465
|
+
# The CLI should emit not_configured JSON and exit 0 (fail open).
|
|
466
|
+
if env -u ANTHROPIC_API_KEY \
|
|
467
|
+
node "$ROOT/build/src/cli.js" utterance-check check \
|
|
468
|
+
--utterance "The test coverage is 92%." \
|
|
469
|
+
--extractor anthropic \
|
|
470
|
+
>"$TMPDIR_EVAL/no-apikey.out" 2>"$TMPDIR_EVAL/no-apikey.err"
|
|
471
|
+
then
|
|
472
|
+
status_val=$(node -e '
|
|
473
|
+
const r = JSON.parse(require("fs").readFileSync(process.argv[1],"utf8"));
|
|
474
|
+
console.log(r.status);
|
|
475
|
+
' "$TMPDIR_EVAL/no-apikey.out" 2>/dev/null || echo "parse-error")
|
|
476
|
+
if [[ "$status_val" == "not_configured" ]]; then
|
|
477
|
+
_pass "CLI --extractor anthropic without ANTHROPIC_API_KEY emits not_configured and exits 0 (fail open)"
|
|
478
|
+
elif [[ "$status_val" == "ok" || "$status_val" == "error" ]]; then
|
|
479
|
+
# If survey is installed and somehow proceeded (shouldn't happen without key), still accept
|
|
480
|
+
_pass "CLI --extractor anthropic produced a valid report (status: $status_val)"
|
|
481
|
+
else
|
|
482
|
+
_fail "CLI --extractor anthropic without API key produced unexpected output (status: $status_val)"
|
|
483
|
+
fi
|
|
484
|
+
else
|
|
485
|
+
exit_code=$?
|
|
486
|
+
# Exit 1 means survey not installed — that's a different fail-open path, acceptable
|
|
487
|
+
if [[ "$exit_code" -eq 1 ]]; then
|
|
488
|
+
_pass "CLI --extractor anthropic: survey not installed, exits 1 (not_configured)"
|
|
489
|
+
else
|
|
490
|
+
_fail "CLI --extractor anthropic without API key should exit 0 or 1 (fail open), got: $exit_code"
|
|
491
|
+
fi
|
|
492
|
+
fi
|
|
493
|
+
|
|
494
|
+
# ---------------------------------------------------------------------------
|
|
495
|
+
# CLI: utterance check registers as a valid flow-agents command
|
|
496
|
+
# ---------------------------------------------------------------------------
|
|
497
|
+
|
|
498
|
+
echo ""
|
|
499
|
+
echo "--- cli: command registration ---"
|
|
500
|
+
|
|
501
|
+
if node "$ROOT/build/src/cli.js" commands 2>/dev/null | grep -q 'utterance-check'; then
|
|
502
|
+
_pass "utterance-check is registered as a flow-agents CLI command"
|
|
503
|
+
else
|
|
504
|
+
_fail "utterance-check is not registered in flow-agents CLI commands"
|
|
505
|
+
fi
|
|
506
|
+
|
|
507
|
+
# ---------------------------------------------------------------------------
|
|
508
|
+
# Summary
|
|
509
|
+
# ---------------------------------------------------------------------------
|
|
510
|
+
|
|
511
|
+
echo ""
|
|
512
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
513
|
+
echo "Utterance check integration tests passed."
|
|
514
|
+
exit 0
|
|
515
|
+
fi
|
|
516
|
+
|
|
517
|
+
echo "Utterance check integration tests failed: $errors issue(s)."
|
|
518
|
+
exit 1
|
package/evals/run.sh
CHANGED
|
@@ -190,6 +190,8 @@ run_integration() {
|
|
|
190
190
|
bash "$EVAL_DIR/integration/test_runtime_adapter_activation.sh" || result=1
|
|
191
191
|
echo ""
|
|
192
192
|
bash "$EVAL_DIR/integration/test_bundle_install.sh" || result=1
|
|
193
|
+
echo ""
|
|
194
|
+
bash "$EVAL_DIR/integration/test_bundle_lifecycle.sh" || result=1
|
|
193
195
|
return $result
|
|
194
196
|
}
|
|
195
197
|
|