@kontourai/flow-agents 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.github/dependabot.yml +23 -0
  2. package/.github/workflows/release-please.yml +31 -0
  3. package/.github/workflows/runtime-compat.yml +118 -0
  4. package/CHANGELOG.md +46 -0
  5. package/CONTRIBUTING.md +4 -0
  6. package/README.md +80 -18
  7. package/build/src/cli/flow-kit.js +9 -4
  8. package/build/src/cli/init.js +215 -5
  9. package/build/src/cli/runtime-adapter.js +9 -5
  10. package/build/src/cli/telemetry-doctor.js +4 -1
  11. package/build/src/cli/utterance-check.js +65 -1
  12. package/build/src/runtime-adapters.js +34 -0
  13. package/build/src/tools/build-universal-bundles.js +285 -0
  14. package/build/src/tools/filter-installed-packs.js +3 -0
  15. package/build/src/tools/validate-source-tree.js +5 -1
  16. package/console.telemetry.json +115 -20
  17. package/context/scripts/telemetry/lib/config.sh +5 -1
  18. package/context/settings/flow-agents-settings.json +7 -0
  19. package/docs/_layouts/default.html +2 -0
  20. package/docs/context-map.md +1 -0
  21. package/docs/index.md +53 -4
  22. package/docs/integrations/conformance.md +246 -0
  23. package/docs/integrations/framework-adapter.md +275 -0
  24. package/docs/integrations/harness-install.md +213 -0
  25. package/docs/integrations/index.md +58 -0
  26. package/docs/integrations/knowledge-kit-live.md +211 -0
  27. package/docs/kit-authoring-guide.md +169 -0
  28. package/docs/north-star.md +2 -2
  29. package/docs/spec/runtime-hook-surface.md +525 -0
  30. package/docs/survey-utterance-check.md +211 -94
  31. package/docs/vision.md +45 -0
  32. package/evals/acceptance/run.sh +13 -2
  33. package/evals/acceptance/test_knowledge_kit_live.sh +221 -0
  34. package/evals/acceptance/test_opencode_harness.sh +121 -0
  35. package/evals/acceptance/test_pi_harness.sh +113 -0
  36. package/evals/integration/test_bundle_install.sh +226 -1
  37. package/evals/integration/test_bundle_lifecycle.sh +641 -0
  38. package/evals/integration/test_runtime_adapter_activation.sh +113 -1
  39. package/evals/integration/test_utterance_check.sh +291 -44
  40. package/evals/run.sh +2 -0
  41. package/evals/static/test_universal_bundles.sh +137 -2
  42. package/integrations/strands/README.md +256 -0
  43. package/integrations/strands/example.py +74 -0
  44. package/integrations/strands/examples/knowledge_kit_live.py +461 -0
  45. package/integrations/strands/flow_agents_strands/__init__.py +27 -0
  46. package/integrations/strands/flow_agents_strands/hooks.py +194 -0
  47. package/integrations/strands/flow_agents_strands/policy.py +348 -0
  48. package/integrations/strands/flow_agents_strands/steering.py +225 -0
  49. package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
  50. package/integrations/strands/pyproject.toml +38 -0
  51. package/integrations/strands/tests/__init__.py +0 -0
  52. package/integrations/strands/tests/test_hooks.py +392 -0
  53. package/integrations/strands/tests/test_policy.py +315 -0
  54. package/integrations/strands/tests/test_telemetry.py +184 -0
  55. package/integrations/strands-ts/README.md +224 -0
  56. package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
  57. package/integrations/strands-ts/package.json +53 -0
  58. package/integrations/strands-ts/src/hooks.ts +312 -0
  59. package/integrations/strands-ts/src/index.ts +22 -0
  60. package/integrations/strands-ts/src/policy.ts +345 -0
  61. package/integrations/strands-ts/src/telemetry.ts +251 -0
  62. package/integrations/strands-ts/test/test-policy.ts +322 -0
  63. package/integrations/strands-ts/test/test-steering.ts +159 -0
  64. package/integrations/strands-ts/test/test-telemetry.ts +226 -0
  65. package/integrations/strands-ts/tsconfig.json +20 -0
  66. package/kits/catalog.json +6 -0
  67. package/kits/knowledge/adapters/default-store/index.js +821 -0
  68. package/kits/knowledge/adapters/flow-runner/index.js +1179 -0
  69. package/kits/knowledge/adapters/flow-runner/telemetry.js +174 -0
  70. package/kits/knowledge/docs/README.md +135 -0
  71. package/kits/knowledge/docs/store-contract.md +526 -0
  72. package/kits/knowledge/evals/consolidation/suite.test.js +1234 -0
  73. package/kits/knowledge/evals/contract-suite/suite.test.js +670 -0
  74. package/kits/knowledge/evals/ingest-compile/suite.test.js +574 -0
  75. package/kits/knowledge/evals/synthesis/suite.test.js +909 -0
  76. package/kits/knowledge/flows/compile.flow.json +60 -0
  77. package/kits/knowledge/flows/consolidate.flow.json +77 -0
  78. package/kits/knowledge/flows/ingest.flow.json +60 -0
  79. package/kits/knowledge/flows/store-contract.flow.json +48 -0
  80. package/kits/knowledge/flows/synthesize.flow.json +77 -0
  81. package/kits/knowledge/kit.json +78 -0
  82. package/package.json +7 -2
  83. package/packaging/conformance/README.md +142 -0
  84. package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
  85. package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
  86. package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
  87. package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
  88. package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
  89. package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
  90. package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
  91. package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
  92. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
  93. package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
  94. package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
  95. package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
  96. package/packaging/conformance/package.json +4 -0
  97. package/packaging/conformance/run-conformance.js +322 -0
  98. package/packaging/manifest.json +59 -0
  99. package/schemas/flow-agents-settings.schema.json +48 -0
  100. package/scripts/README.md +4 -0
  101. package/scripts/dogfood.js +16 -0
  102. package/scripts/hooks/opencode-hook-adapter.js +123 -0
  103. package/scripts/hooks/opencode-telemetry-hook.js +101 -0
  104. package/scripts/hooks/pi-hook-adapter.js +123 -0
  105. package/scripts/hooks/pi-telemetry-hook.js +105 -0
  106. package/scripts/hooks/run-hook.js +8 -0
  107. package/scripts/hooks/utterance-check.js +124 -22
  108. package/scripts/telemetry/lib/config.sh +5 -1
  109. package/src/cli/flow-kit.ts +10 -4
  110. package/src/cli/init.ts +219 -6
  111. package/src/cli/runtime-adapter.ts +10 -5
  112. package/src/cli/telemetry-doctor.ts +4 -1
  113. package/src/cli/utterance-check.ts +71 -1
  114. package/src/runtime-adapters.ts +35 -0
  115. package/src/tools/build-universal-bundles.ts +283 -0
  116. package/src/tools/filter-installed-packs.ts +3 -0
  117. package/src/tools/validate-source-tree.ts +5 -1
@@ -0,0 +1,221 @@
1
+ #!/usr/bin/env bash
2
+ # test_knowledge_kit_live.sh — Acceptance: Knowledge Kit S5 live example
3
+ #
4
+ # Gated on:
5
+ # 1. ollama binary at /run/current-system/sw/bin/ollama
6
+ # 2. qwen3:1.7b model pulled (checked via ollama list)
7
+ # 3. Python venv with strands-agents[ollama] at /tmp/strands-py-live/venv
8
+ #
9
+ # Skips cleanly if any gate is absent (matching other harness conventions).
10
+ # Starts ollama serve, runs the live example, asserts evidence, stops ollama.
11
+ #
12
+ # Assertions:
13
+ # A1. Script exits 0 (overall PASS printed)
14
+ # A2. <workspace>/.telemetry/full.jsonl exists and contains tool.invoke + tool.result
15
+ # A3. <workspace>/.flow-agents/.telemetry/full.jsonl exists and contains
16
+ # session.start, tool.invoke, tool.result (FlowAgentsHooks events)
17
+ # A4. No new .telemetry directory created in the workspace's parent directory
18
+ # by this script (pre-existing parent-dir .telemetry is not counted)
19
+ # A5. At least 1 compiled record in <workspace>/.knowledge-store/records/
20
+ # A6. Compiled record has provenance source_ids referencing raw records
21
+ #
22
+ set -uo pipefail
23
+
24
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
25
+
26
+ OLLAMA_BIN="/run/current-system/sw/bin/ollama"
27
+ VENV_PYTHON="/tmp/strands-py-live/venv/bin/python3"
28
+ EXAMPLE_SCRIPT="$ROOT_DIR/integrations/strands/examples/knowledge_kit_live.py"
29
+
30
+ pass=0
31
+ fail=0
32
+ skip=0
33
+ OLLAMA_STARTED=0
34
+
35
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
36
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
37
+ _skip() { echo " ○ $1"; skip=$((skip + 1)); }
38
+
39
+ cleanup() {
40
+ if [[ "$OLLAMA_STARTED" -eq 1 ]]; then
41
+ pkill -f "ollama serve" 2>/dev/null || true
42
+ fi
43
+ }
44
+ trap cleanup EXIT
45
+
46
+ echo "=== Acceptance: Knowledge Kit S5 Live Example ==="
47
+ echo ""
48
+
49
+ # ── Gate checks ─────────────────────────────────────────────────────────────
50
+ if [[ ! -x "$OLLAMA_BIN" ]]; then
51
+ _skip "ollama binary not found at $OLLAMA_BIN"
52
+ echo ""
53
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
54
+ exit 0
55
+ fi
56
+
57
+ if [[ ! -x "$VENV_PYTHON" ]]; then
58
+ _skip "Python venv not found at $VENV_PYTHON — run: python3 -m venv /tmp/strands-py-live/venv && /tmp/strands-py-live/venv/bin/pip install 'strands-agents[ollama]'"
59
+ echo ""
60
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
61
+ exit 0
62
+ fi
63
+
64
+ _pass "Gate: ollama binary present"
65
+ _pass "Gate: Python venv with strands-agents present"
66
+ echo ""
67
+
68
+ # ── Start ollama serve ───────────────────────────────────────────────────────
69
+ echo "--- Starting ollama serve ---"
70
+ "$OLLAMA_BIN" serve > /tmp/ollama-knowledge-kit-live.log 2>&1 &
71
+ OLLAMA_STARTED=1
72
+
73
+ # Wait for server to be ready (up to 15 seconds)
74
+ for i in {1..15}; do
75
+ if curl -s localhost:11434/v1/models >/dev/null 2>&1; then
76
+ _pass "ollama serve ready (${i}s)"
77
+ break
78
+ fi
79
+ if [[ "$i" -eq 15 ]]; then
80
+ _fail "ollama serve did not start within 15 seconds"
81
+ echo ""
82
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
83
+ exit 1
84
+ fi
85
+ sleep 1
86
+ done
87
+
88
+ # Model gate AFTER server start: ollama list errors when no server is running,
89
+ # which previously misreported a pulled model as missing (skip-path bug).
90
+ if ! "$OLLAMA_BIN" list 2>/dev/null | grep -q "qwen3:1.7b"; then
91
+ _skip "qwen3:1.7b model not pulled — run: ollama pull qwen3:1.7b"
92
+ echo ""
93
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
94
+ exit 0
95
+ fi
96
+ _pass "Gate: qwen3:1.7b model pulled"
97
+ echo ""
98
+
99
+ # ── Run the example ──────────────────────────────────────────────────────────
100
+ echo "--- Running knowledge_kit_live.py ---"
101
+ EXAMPLE_OUTPUT="$(mktemp /tmp/knowledge-kit-live-output.XXXXXX)"
102
+
103
+ FLOW_AGENTS_ROOT="$ROOT_DIR" \
104
+ "$VENV_PYTHON" "$EXAMPLE_SCRIPT" 2>&1 | tee "$EXAMPLE_OUTPUT"
105
+ EXAMPLE_EXIT="${PIPESTATUS[0]}"
106
+
107
+ echo ""
108
+
109
+ # ── Assert A1: script exits 0 ─────────────────────────────────────────────
110
+ if [[ "$EXAMPLE_EXIT" -eq 0 ]]; then
111
+ _pass "A1: example script exits 0"
112
+ else
113
+ _fail "A1: example script exited $EXAMPLE_EXIT"
114
+ fi
115
+
116
+ # Extract workspace path from script output
117
+ WORKSPACE="$(grep "^Workspace: " "$EXAMPLE_OUTPUT" | head -1 | sed 's/^Workspace: //')"
118
+ if [[ -z "$WORKSPACE" ]]; then
119
+ _fail "Could not extract workspace path from script output"
120
+ echo ""
121
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
122
+ exit 1
123
+ fi
124
+
125
+ echo " Workspace: $WORKSPACE"
126
+ KIT_TELEMETRY="$WORKSPACE/.telemetry/full.jsonl"
127
+ SESSION_TELEMETRY="$WORKSPACE/.flow-agents/.telemetry/full.jsonl"
128
+ STORE_RECORDS="$WORKSPACE/.knowledge-store/records"
129
+
130
+ # ── Assert A2: kit telemetry contains tool.invoke + tool.result ───────────
131
+ if [[ -f "$KIT_TELEMETRY" ]] && \
132
+ node -e "
133
+ const fs = require('fs');
134
+ const lines = fs.readFileSync('$KIT_TELEMETRY', 'utf8').trim().split('\n').filter(Boolean);
135
+ const types = lines.map(l => { try { return JSON.parse(l).event_type; } catch(e) { return ''; } });
136
+ const required = ['tool.invoke', 'tool.result'];
137
+ const missing = required.filter(t => !types.includes(t));
138
+ if (missing.length > 0) { process.stderr.write('missing: ' + missing.join(', ') + '\n'); process.exit(1); }
139
+ " 2>/dev/null; then
140
+ _pass "A2: kit telemetry contains tool.invoke + tool.result gate events"
141
+ else
142
+ _fail "A2: kit telemetry missing or lacks required event types (tool.invoke, tool.result)"
143
+ fi
144
+
145
+ # ── Assert A3: session telemetry contains session.start, tool.invoke, tool.result ─
146
+ if [[ -f "$SESSION_TELEMETRY" ]] && \
147
+ node -e "
148
+ const fs = require('fs');
149
+ const lines = fs.readFileSync('$SESSION_TELEMETRY', 'utf8').trim().split('\n').filter(Boolean);
150
+ const types = lines.map(l => { try { return JSON.parse(l).event_type; } catch(e) { return ''; } });
151
+ const required = ['session.start', 'tool.invoke', 'tool.result'];
152
+ const missing = required.filter(t => !types.includes(t));
153
+ if (missing.length > 0) { process.stderr.write('missing: ' + missing.join(', ') + '\n'); process.exit(1); }
154
+ " 2>/dev/null; then
155
+ _pass "A3: session telemetry contains session.start, tool.invoke, tool.result"
156
+ else
157
+ _fail "A3: session telemetry missing or lacks required FlowAgentsHooks events"
158
+ fi
159
+
160
+ # ── Assert A4: workspace telemetry does not leak to parent ────────────────
161
+ # This assertion checks that telemetry written during this test run does not
162
+ # appear in the parent directory. We verify that the workspace telemetry is
163
+ # contained within WORKSPACE, not in its parent.
164
+ # (Pre-existing .telemetry in the system temp dir is not counted as a leak.)
165
+ PARENT_TELEMETRY="$(dirname "$WORKSPACE")/.telemetry"
166
+ if [[ -d "$PARENT_TELEMETRY" ]]; then
167
+ # Only fail if the directory was modified during our test (mtime within last 60s)
168
+ PARENT_MTIME="$(find "$PARENT_TELEMETRY" -newer "$EXAMPLE_OUTPUT" -name "*.jsonl" 2>/dev/null | wc -l | tr -d ' ')"
169
+ if [[ "$PARENT_MTIME" -gt 0 ]]; then
170
+ _fail "A4: telemetry leaked — new .jsonl files written to workspace parent directory during this test"
171
+ else
172
+ _pass "A4: workspace telemetry contained within workspace (pre-existing parent .telemetry not modified by this test)"
173
+ fi
174
+ else
175
+ _pass "A4: no .telemetry in workspace parent directory"
176
+ fi
177
+
178
+ # ── Assert A5: at least 1 compiled record exists ─────────────────────────
179
+ COMPILED_COUNT=0
180
+ if [[ -d "$STORE_RECORDS" ]]; then
181
+ COMPILED_COUNT=$(grep -rl "type: compiled" "$STORE_RECORDS"/*.md 2>/dev/null | wc -l | tr -d ' ')
182
+ fi
183
+ if [[ "$COMPILED_COUNT" -ge 1 ]]; then
184
+ _pass "A5: compiled record found in store ($COMPILED_COUNT)"
185
+ else
186
+ _fail "A5: no compiled records found in $STORE_RECORDS"
187
+ fi
188
+
189
+ # ── Assert A6: compiled record has provenance source_ids ─────────────────
190
+ PROVENANCE_OK=0
191
+ if [[ -d "$STORE_RECORDS" ]]; then
192
+ for compiled_md in "$STORE_RECORDS"/*.md; do
193
+ [[ -f "$compiled_md" ]] || continue
194
+ if grep -q "type: compiled" "$compiled_md" && grep -q "source_ids:" "$compiled_md"; then
195
+ # Verify at least 2 raw ids are referenced
196
+ SOURCE_COUNT=$(grep -c "^ - " "$compiled_md" 2>/dev/null || echo 0)
197
+ if [[ "$SOURCE_COUNT" -ge 2 ]]; then
198
+ PROVENANCE_OK=1
199
+ break
200
+ fi
201
+ fi
202
+ done
203
+ fi
204
+ if [[ "$PROVENANCE_OK" -eq 1 ]]; then
205
+ _pass "A6: compiled record has provenance source_ids with resolving raw refs"
206
+ else
207
+ _fail "A6: compiled record missing source_ids or insufficient provenance refs"
208
+ fi
209
+
210
+ # ── Cleanup temp files ───────────────────────────────────────────────────
211
+ rm -f "$EXAMPLE_OUTPUT"
212
+ if [[ -d "$WORKSPACE" ]]; then
213
+ rm -rf "$WORKSPACE"
214
+ fi
215
+
216
+ echo ""
217
+ echo "==========================="
218
+ total=$((pass + fail))
219
+ echo "Results: ${pass}/${total} passed, ${fail} failed, ${skip} skipped"
220
+ [[ "$fail" -gt 0 ]] && exit 1
221
+ exit 0
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
5
+ source "$ROOT_DIR/evals/lib/node.sh"
6
+ TMP_WORK=""
7
+ pass=0
8
+ fail=0
9
+ skip=0
10
+
11
+ cleanup() {
12
+ [[ -n "$TMP_WORK" ]] && rm -rf "$TMP_WORK"
13
+ }
14
+ trap cleanup EXIT
15
+
16
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
17
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
18
+ _skip() { echo " ○ $1"; skip=$((skip + 1)); }
19
+
20
+ wait_for_telemetry() {
21
+ local file="$1"
22
+ local i=0
23
+ while [[ $i -lt 150 ]]; do
24
+ [[ -s "$file" ]] && return 0
25
+ sleep 0.1
26
+ i=$((i + 1))
27
+ done
28
+ return 1
29
+ }
30
+
31
+ echo "=== Harness Acceptance: opencode ==="
32
+ echo ""
33
+
34
+ if ! command -v opencode >/dev/null 2>&1; then
35
+ _skip "opencode CLI not installed"
36
+ echo ""
37
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
38
+ exit 0
39
+ fi
40
+
41
+ cd "$ROOT_DIR"
42
+ flow_agents_node scripts/build-universal-bundles.js >/dev/null
43
+
44
+ TMP_WORK="$(mktemp -d /tmp/opencode-acceptance-work.XXXXXX)"
45
+ (cd dist/opencode && bash install.sh "$TMP_WORK") >/dev/null
46
+
47
+ echo "--- Plugin Load + Telemetry ---"
48
+ cd "$TMP_WORK"
49
+ rm -rf .telemetry
50
+
51
+ MODEL_ARGS=()
52
+ if [[ -n "${FLOW_AGENTS_ACCEPT_OPENCODE_MODEL:-}" ]]; then
53
+ MODEL_ARGS=(-m "$FLOW_AGENTS_ACCEPT_OPENCODE_MODEL")
54
+ fi
55
+
56
+ # Models sometimes answer without calling the tool (nondeterminism), which
57
+ # would void the tool.invoke/tool.result assertions — force the tool call
58
+ # and retry once if no tool events landed.
59
+ ACCEPT_PROMPT="You MUST call the read tool before replying — answering from memory is a failure. Read the first 5 lines of README.md with the read tool, then reply: done"
60
+ run_output=""
61
+ provider_error=0
62
+ for _attempt in 1 2; do
63
+ run_output="$(opencode run "${MODEL_ARGS[@]}" "$ACCEPT_PROMPT" 2>&1 || true)"
64
+ if echo "$run_output" | grep -qi "error"; then
65
+ provider_error=1
66
+ break
67
+ fi
68
+ provider_error=0
69
+ for _i in $(seq 1 50); do
70
+ [[ -s "$TMP_WORK/.telemetry/full.jsonl" ]] && grep -q '"tool.invoke"' "$TMP_WORK/.telemetry/full.jsonl" 2>/dev/null && break
71
+ sleep 0.3
72
+ done
73
+ grep -q '"tool.invoke"' "$TMP_WORK/.telemetry/full.jsonl" 2>/dev/null && break
74
+ done
75
+
76
+ LATEST_LOG="$(ls -t ~/.local/share/opencode/log/*.log 2>/dev/null | head -1 || true)"
77
+ if [[ -n "$LATEST_LOG" ]] && grep -q "plugins/flow-agents.js loading plugin" "$LATEST_LOG" 2>/dev/null; then
78
+ _pass "opencode log confirms flow-agents plugin loaded"
79
+ else
80
+ _fail "opencode log did not confirm flow-agents plugin loaded"
81
+ fi
82
+
83
+ telemetry_file="$TMP_WORK/.telemetry/full.jsonl"
84
+ if [[ "$provider_error" -eq 1 ]]; then
85
+ _skip "opencode telemetry assertions skipped (provider/auth error)"
86
+ _skip "opencode telemetry tool events skipped (provider/auth error)"
87
+ else
88
+ if wait_for_telemetry "$telemetry_file"; then
89
+ _pass "opencode telemetry log was written"
90
+ else
91
+ _fail "opencode telemetry log was not written"
92
+ fi
93
+
94
+ if [[ -f "$telemetry_file" ]] && \
95
+ node -e "
96
+ const fs = require('fs');
97
+ const lines = fs.readFileSync('$telemetry_file', 'utf8').trim().split('\n');
98
+ const types = lines.map(l => { try { return JSON.parse(l).event_type; } catch(e) { return ''; } });
99
+ const hasInvoke = types.some(t => t === 'tool.invoke');
100
+ const hasResult = types.some(t => t === 'tool.result');
101
+ process.exit(hasInvoke && hasResult ? 0 : 1);
102
+ " 2>/dev/null; then
103
+ _pass "opencode telemetry contains tool.invoke and tool.result events"
104
+ else
105
+ _fail "opencode telemetry missing tool.invoke or tool.result events"
106
+ fi
107
+ fi
108
+
109
+ PARENT_TELEMETRY="$(dirname "$TMP_WORK")/.telemetry"
110
+ if [[ -d "$PARENT_TELEMETRY" ]]; then
111
+ _fail "opencode wrote .telemetry to workspace parent directory"
112
+ else
113
+ _pass "no .telemetry leak to workspace parent directory"
114
+ fi
115
+
116
+ echo ""
117
+ echo "==========================="
118
+ total=$((pass + fail))
119
+ echo "Results: ${pass}/${total} passed, ${fail} failed, ${skip} skipped"
120
+ [[ "$fail" -gt 0 ]] && exit 1
121
+ exit 0
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
5
+ source "$ROOT_DIR/evals/lib/node.sh"
6
+ TMP_WORK=""
7
+ pass=0
8
+ fail=0
9
+ skip=0
10
+
11
+ cleanup() {
12
+ [[ -n "$TMP_WORK" ]] && rm -rf "$TMP_WORK"
13
+ }
14
+ trap cleanup EXIT
15
+
16
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
17
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
18
+ _skip() { echo " ○ $1"; skip=$((skip + 1)); }
19
+
20
+ wait_for_telemetry() {
21
+ local file="$1"
22
+ local i=0
23
+ while [[ $i -lt 150 ]]; do
24
+ [[ -s "$file" ]] && return 0
25
+ sleep 0.1
26
+ i=$((i + 1))
27
+ done
28
+ return 1
29
+ }
30
+
31
+ echo "=== Harness Acceptance: pi ==="
32
+ echo ""
33
+
34
+ if ! command -v pi >/dev/null 2>&1; then
35
+ _skip "pi CLI not installed"
36
+ echo ""
37
+ echo "Results: ${pass}/$((pass + fail)) passed, ${fail} failed, ${skip} skipped"
38
+ exit 0
39
+ fi
40
+
41
+ cd "$ROOT_DIR"
42
+ flow_agents_node scripts/build-universal-bundles.js >/dev/null
43
+
44
+ TMP_WORK="$(mktemp -d /tmp/pi-acceptance-work.XXXXXX)"
45
+ (cd dist/pi && bash install.sh "$TMP_WORK") >/dev/null
46
+
47
+ echo "--- Telemetry ---"
48
+ cd "$TMP_WORK"
49
+ rm -rf .telemetry
50
+
51
+ run_output="$(pi --approve -p \
52
+ "Use your read tool to read the first 5 lines of README.md, then reply: done" 2>&1 || true)"
53
+ provider_error=0
54
+ if echo "$run_output" | grep -qi "error"; then
55
+ provider_error=1
56
+ fi
57
+
58
+ telemetry_file="$TMP_WORK/.telemetry/full.jsonl"
59
+ if [[ "$provider_error" -eq 1 ]]; then
60
+ _skip "pi telemetry assertions skipped (provider/auth error)"
61
+ _skip "pi telemetry event types skipped (provider/auth error)"
62
+ _skip "pi telemetry session events skipped (provider/auth error)"
63
+ else
64
+ if wait_for_telemetry "$telemetry_file"; then
65
+ _pass "pi telemetry log was written"
66
+ else
67
+ _fail "pi telemetry log was not written"
68
+ fi
69
+
70
+ if [[ -f "$telemetry_file" ]] && \
71
+ node -e "
72
+ const fs = require('fs');
73
+ const lines = fs.readFileSync('$telemetry_file', 'utf8').trim().split('\n');
74
+ const types = lines.map(l => { try { return JSON.parse(l).event_type; } catch(e) { return ''; } });
75
+ const required = ['session.start', 'tool.invoke', 'tool.result', 'session.end'];
76
+ const missing = required.filter(t => !types.includes(t));
77
+ if (missing.length > 0) { process.stderr.write('missing: ' + missing.join(', ') + '\n'); process.exit(1); }
78
+ process.exit(0);
79
+ " 2>/dev/null; then
80
+ _pass "pi telemetry contains session.start, tool.invoke, tool.result, session.end"
81
+ else
82
+ _fail "pi telemetry missing one or more required event types (session.start, tool.invoke, tool.result, session.end)"
83
+ fi
84
+
85
+ # Assert session.start appears exactly once (guards against before_agent_start double-emit).
86
+ if [[ -f "$telemetry_file" ]] && \
87
+ node -e "
88
+ const fs = require('fs');
89
+ const lines = fs.readFileSync('$telemetry_file', 'utf8').trim().split('\n');
90
+ const types = lines.map(l => { try { return JSON.parse(l).event_type; } catch(e) { return ''; } });
91
+ const count = types.filter(t => t === 'session.start').length;
92
+ if (count !== 1) { process.stderr.write('session.start count=' + count + ' (expected exactly 1)\n'); process.exit(1); }
93
+ process.exit(0);
94
+ " 2>/dev/null; then
95
+ _pass "pi telemetry: session.start appears exactly once (no double-emit)"
96
+ else
97
+ _fail "pi telemetry: session.start count is not 1 (double-emit or missing)"
98
+ fi
99
+ fi
100
+
101
+ PARENT_TELEMETRY="$(dirname "$TMP_WORK")/.telemetry"
102
+ if [[ -d "$PARENT_TELEMETRY" ]]; then
103
+ _fail "pi wrote .telemetry to workspace parent directory"
104
+ else
105
+ _pass "no .telemetry leak to workspace parent directory"
106
+ fi
107
+
108
+ echo ""
109
+ echo "==========================="
110
+ total=$((pass + fail))
111
+ echo "Results: ${pass}/${total} passed, ${fail} failed, ${skip} skipped"
112
+ [[ "$fail" -gt 0 ]] && exit 1
113
+ exit 0