@kontourai/flow-agents 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.github/dependabot.yml +23 -0
  2. package/.github/workflows/release-please.yml +31 -0
  3. package/.github/workflows/runtime-compat.yml +118 -0
  4. package/CHANGELOG.md +46 -0
  5. package/CONTRIBUTING.md +4 -0
  6. package/README.md +80 -18
  7. package/build/src/cli/flow-kit.js +9 -4
  8. package/build/src/cli/init.js +215 -5
  9. package/build/src/cli/runtime-adapter.js +9 -5
  10. package/build/src/cli/telemetry-doctor.js +4 -1
  11. package/build/src/cli/utterance-check.js +65 -1
  12. package/build/src/runtime-adapters.js +34 -0
  13. package/build/src/tools/build-universal-bundles.js +285 -0
  14. package/build/src/tools/filter-installed-packs.js +3 -0
  15. package/build/src/tools/validate-source-tree.js +5 -1
  16. package/console.telemetry.json +115 -20
  17. package/context/scripts/telemetry/lib/config.sh +5 -1
  18. package/context/settings/flow-agents-settings.json +7 -0
  19. package/docs/_layouts/default.html +2 -0
  20. package/docs/context-map.md +1 -0
  21. package/docs/index.md +53 -4
  22. package/docs/integrations/conformance.md +246 -0
  23. package/docs/integrations/framework-adapter.md +275 -0
  24. package/docs/integrations/harness-install.md +213 -0
  25. package/docs/integrations/index.md +58 -0
  26. package/docs/integrations/knowledge-kit-live.md +211 -0
  27. package/docs/kit-authoring-guide.md +169 -0
  28. package/docs/north-star.md +2 -2
  29. package/docs/spec/runtime-hook-surface.md +525 -0
  30. package/docs/survey-utterance-check.md +211 -94
  31. package/docs/vision.md +45 -0
  32. package/evals/acceptance/run.sh +13 -2
  33. package/evals/acceptance/test_knowledge_kit_live.sh +221 -0
  34. package/evals/acceptance/test_opencode_harness.sh +121 -0
  35. package/evals/acceptance/test_pi_harness.sh +113 -0
  36. package/evals/integration/test_bundle_install.sh +226 -1
  37. package/evals/integration/test_bundle_lifecycle.sh +641 -0
  38. package/evals/integration/test_runtime_adapter_activation.sh +113 -1
  39. package/evals/integration/test_utterance_check.sh +291 -44
  40. package/evals/run.sh +2 -0
  41. package/evals/static/test_universal_bundles.sh +137 -2
  42. package/integrations/strands/README.md +256 -0
  43. package/integrations/strands/example.py +74 -0
  44. package/integrations/strands/examples/knowledge_kit_live.py +461 -0
  45. package/integrations/strands/flow_agents_strands/__init__.py +27 -0
  46. package/integrations/strands/flow_agents_strands/hooks.py +194 -0
  47. package/integrations/strands/flow_agents_strands/policy.py +348 -0
  48. package/integrations/strands/flow_agents_strands/steering.py +225 -0
  49. package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
  50. package/integrations/strands/pyproject.toml +38 -0
  51. package/integrations/strands/tests/__init__.py +0 -0
  52. package/integrations/strands/tests/test_hooks.py +392 -0
  53. package/integrations/strands/tests/test_policy.py +315 -0
  54. package/integrations/strands/tests/test_telemetry.py +184 -0
  55. package/integrations/strands-ts/README.md +224 -0
  56. package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
  57. package/integrations/strands-ts/package.json +53 -0
  58. package/integrations/strands-ts/src/hooks.ts +312 -0
  59. package/integrations/strands-ts/src/index.ts +22 -0
  60. package/integrations/strands-ts/src/policy.ts +345 -0
  61. package/integrations/strands-ts/src/telemetry.ts +251 -0
  62. package/integrations/strands-ts/test/test-policy.ts +322 -0
  63. package/integrations/strands-ts/test/test-steering.ts +159 -0
  64. package/integrations/strands-ts/test/test-telemetry.ts +226 -0
  65. package/integrations/strands-ts/tsconfig.json +20 -0
  66. package/kits/catalog.json +6 -0
  67. package/kits/knowledge/adapters/default-store/index.js +821 -0
  68. package/kits/knowledge/adapters/flow-runner/index.js +1179 -0
  69. package/kits/knowledge/adapters/flow-runner/telemetry.js +174 -0
  70. package/kits/knowledge/docs/README.md +135 -0
  71. package/kits/knowledge/docs/store-contract.md +526 -0
  72. package/kits/knowledge/evals/consolidation/suite.test.js +1234 -0
  73. package/kits/knowledge/evals/contract-suite/suite.test.js +670 -0
  74. package/kits/knowledge/evals/ingest-compile/suite.test.js +574 -0
  75. package/kits/knowledge/evals/synthesis/suite.test.js +909 -0
  76. package/kits/knowledge/flows/compile.flow.json +60 -0
  77. package/kits/knowledge/flows/consolidate.flow.json +77 -0
  78. package/kits/knowledge/flows/ingest.flow.json +60 -0
  79. package/kits/knowledge/flows/store-contract.flow.json +48 -0
  80. package/kits/knowledge/flows/synthesize.flow.json +77 -0
  81. package/kits/knowledge/kit.json +78 -0
  82. package/package.json +7 -2
  83. package/packaging/conformance/README.md +142 -0
  84. package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
  85. package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
  86. package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
  87. package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
  88. package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
  89. package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
  90. package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
  91. package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
  92. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
  93. package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
  94. package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
  95. package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
  96. package/packaging/conformance/package.json +4 -0
  97. package/packaging/conformance/run-conformance.js +322 -0
  98. package/packaging/manifest.json +59 -0
  99. package/schemas/flow-agents-settings.schema.json +48 -0
  100. package/scripts/README.md +4 -0
  101. package/scripts/dogfood.js +16 -0
  102. package/scripts/hooks/opencode-hook-adapter.js +123 -0
  103. package/scripts/hooks/opencode-telemetry-hook.js +101 -0
  104. package/scripts/hooks/pi-hook-adapter.js +123 -0
  105. package/scripts/hooks/pi-telemetry-hook.js +105 -0
  106. package/scripts/hooks/run-hook.js +8 -0
  107. package/scripts/hooks/utterance-check.js +124 -22
  108. package/scripts/telemetry/lib/config.sh +5 -1
  109. package/src/cli/flow-kit.ts +10 -4
  110. package/src/cli/init.ts +219 -6
  111. package/src/cli/runtime-adapter.ts +10 -5
  112. package/src/cli/telemetry-doctor.ts +4 -1
  113. package/src/cli/utterance-check.ts +71 -1
  114. package/src/runtime-adapters.ts +35 -0
  115. package/src/tools/build-universal-bundles.ts +283 -0
  116. package/src/tools/filter-installed-packs.ts +3 -0
  117. package/src/tools/validate-source-tree.ts +5 -1
@@ -4,72 +4,206 @@ title: Survey Utterance Check Integration
4
4
 
5
5
  # Survey Utterance Check Integration
6
6
 
7
- Flow Agents can optionally check agent utterances for evidence coverage using `@kontourai/survey`. This integration is disabled by default and intentionally optional ordinary Flow Agents workflows do not require Survey.
7
+ When an agent says something factual "test coverage is 92%", "the API is backward-compatible", "no breaking changes in this release" — that claim either has evidence behind it or it doesn't. The utterance check feature bridges Flow Agents hooks to `@kontourai/survey` so that every factual statement in an agent response is compared against a trust bundle and tagged with a badge. Statements with no backing evidence are flagged inline so the agent can acknowledge the gap rather than assert silently.
8
8
 
9
- The guiding rule mirrors the Veritas boundary: Flow Agents owns the hook wiring and badge guidance format; Survey owns the extraction, claim resolution, and trust report semantics.
9
+ This document explains how to enable and configure the feature, what the workflow looks like end to end, and what to watch out for.
10
10
 
11
- ## Background: ADR 0003 §9
11
+ ---
12
+
13
+ ## What actually happens
14
+
15
+ Here is a concrete walkthrough from agent response to badge guidance:
16
+
17
+ ```
18
+ Agent says: "The test coverage for auth-service is 92%.
19
+ All critical paths have been verified."
20
+
21
+ Flow Agents hook (PostToolUse):
22
+ 1. Captures the agent response text from the PostToolUse event.
23
+ 2. Invokes the utterance-check CLI with the response text and your trust bundle.
24
+
25
+ @kontourai/survey (inside the CLI):
26
+ 3. Extractor splits the response into factual statements:
27
+ - "test coverage for auth-service is 92%"
28
+ - "All critical paths have been verified"
29
+ 4. Each statement is resolved against the trust bundle.
30
+ 5. Neither statement has a matching verified claim → both resolve as "unsupported".
31
+
32
+ Flow Agents hook injects guidance into the agent context:
33
+ UTTERANCE CHECK: 2 statement(s) in this response lack evidence coverage.
34
+ Summary: unsupported:2
35
+ - [unsupported] "test coverage for auth-service is 92%"
36
+ - [unsupported] "All critical paths have been verified"
37
+ Evidence note: unsupported = no matching claim in the trust bundle; ...
38
+ ```
39
+
40
+ The agent sees honest gap disclosure rather than silent pass-through. It can then cite sources, note the gap explicitly, or record a coverage claim via `@kontourai/survey`.
41
+
42
+ ---
43
+
44
+ ## Deciding between report and strict mode
45
+
46
+ The hook has two modes:
47
+
48
+ | Mode | Effect |
49
+ |------|--------|
50
+ | `report` (default) | Appends badge guidance to the agent context. Never blocks. Agent decides next step. |
51
+ | `strict` | If any statement is `unsupported`, `disputed`, or `rejected`, the hook exits 2, which routes the Stop event back to the agent for revision. |
52
+
53
+ Use **report** when you want visibility without gate behavior — good for exploratory sessions, onboarding, or repos where the trust bundle is still being built out. Use **strict** when you want the agent to revise or cite sources before completing a turn — appropriate for regulated workflows, production deployments, or repos with a well-populated bundle.
54
+
55
+ The empty-bundle caveat: if you enable the hook without a `bundlePath`, every factual statement the extractor finds will resolve as `unsupported` because there are no claims to match against. In strict mode this means every response with factual statements will be blocked. Make sure you either provide a `bundlePath` or use report mode until you have a bundle.
56
+
57
+ ---
58
+
59
+ ## The trust bundle
60
+
61
+ The trust bundle is a JSON file with a `claims` array. It is the authoritative record of what is considered evidenced for your codebase. Two practical sources:
62
+
63
+ - **Veritas-generated bundle**: if your repo uses `@veritas/veritas`, it can produce a `trust.bundle.json` from `.veritas/evidence`. Point `bundlePath` at that output.
64
+ - **Surface report**: the `@kontourai/surface` package can generate a trust bundle from a surface verification run. If your repo runs surface checks, look for the generated bundle in the surface output directory (e.g. `dist/trust-bundle.json` or a named artifact).
65
+ - **Hand-authored bundle**: a minimal bundle is just `{ "claims": [] }`. Add claims incrementally as you record evidence.
66
+
67
+ An empty or missing bundle means everything is unsupported. That is not necessarily wrong — it is an honest starting state — but it is only useful in report mode.
68
+
69
+ ---
70
+
71
+ ## Choosing an extractor
12
72
 
13
- ADR 0003 §9 designates agent-utterance extraction as a **Survey producer profile** — Survey pointed at agent prose instead of web sources. Each factual statement in agent output is extracted as a candidate claim and run through Survey's Inquiry pipeline. Flow Agents supplies the enforcement point (hooks) that ADR 0003 calls out. This integration is step 6 of the ADR sequencing and depends on the Inquiry pipeline already existing in Survey.
73
+ The extractor is responsible for splitting the agent utterance into discrete factual statements. Two are available:
14
74
 
15
- ## User-Facing Story
75
+ | Extractor | How it works | Requirements |
76
+ |-----------|-------------|--------------|
77
+ | `reference` (default) | Pattern-based heuristics. Fast, no API call, no key needed. Works offline. Lower recall on complex prose. | `@kontourai/survey` installed |
78
+ | `anthropic` | Model-backed extraction via `@kontourai/survey/anthropic`. Higher recall, understands context and nuance, can split compound claims. | `@kontourai/survey` + `@anthropic-ai/sdk` installed, `ANTHROPIC_API_KEY` set |
16
79
 
17
- ```text
18
- Agent: "The test coverage for auth-service is 92%. All critical paths have been verified."
80
+ For most exploratory use, `reference` is sufficient. Switch to `anthropic` when you find the reference extractor is missing statements that matter for your domain.
19
81
 
20
- Flow Agents (hook active):
21
- 1. Captures the agent's response text from the PostToolUse event.
22
- 2. Invokes the utterance-check CLI adapter with the response text.
23
- 3. @kontourai/survey extracts factual statements: coverage:92%, paths:verified.
24
- 4. Survey resolves each statement against the configured trust bundle.
25
- 5. Statements without matching claims resolve as "unsupported".
26
- 6. Flow Agents injects badge guidance into the agent context:
27
- UTTERANCE CHECK: 2 statement(s) lack evidence coverage.
28
- - [unsupported] "test coverage for auth-service is 92%"
29
- - [unsupported] "All critical paths have been verified"
82
+ The `anthropic` extractor fails open: if `ANTHROPIC_API_KEY` is missing or `@anthropic-ai/sdk` is not installed, the CLI emits `status: "not_configured"` (with a clear explanation in `summary`) and exits 0. The hook treats this as a silent pass-through. You will see a message in stderr explaining what is missing, but the hook will not block.
83
+
84
+ ---
85
+
86
+ ## Per-repo configuration
87
+
88
+ The canonical way to enable utterance checking is a `context/settings/flow-agents-settings.json` file in the consumer repo. This is a peer to `context/settings/backlog-provider-settings.json` — the same directory, the same convention.
89
+
90
+ **Minimal example (report mode, reference extractor):**
91
+
92
+ ```json
93
+ {
94
+ "$schema": "../../node_modules/@kontourai/flow-agents/schemas/flow-agents-settings.schema.json",
95
+ "schema_version": "1.0",
96
+ "utteranceCheck": {
97
+ "enabled": true,
98
+ "mode": "report",
99
+ "extractor": "reference"
100
+ }
101
+ }
30
102
  ```
31
103
 
32
- The agent sees honest gap disclosure rather than silent pass-through.
104
+ **With a trust bundle and anthropic extractor:**
33
105
 
34
- ## Ownership Split
106
+ ```json
107
+ {
108
+ "$schema": "../../node_modules/@kontourai/flow-agents/schemas/flow-agents-settings.schema.json",
109
+ "schema_version": "1.0",
110
+ "utteranceCheck": {
111
+ "enabled": true,
112
+ "mode": "report",
113
+ "extractor": "anthropic",
114
+ "bundlePath": ".veritas/trust.bundle.json",
115
+ "model": "claude-haiku-4-5",
116
+ "agentId": "surface-agent"
117
+ }
118
+ }
119
+ ```
35
120
 
36
- | Area | Flow Agents Owns | Survey Owns |
37
- | --- | --- | --- |
38
- | Hook wiring | PostToolUse/Stop hook, badge guidance format, enable/disable flags | None |
39
- | Extraction | Invoking the CLI adapter | Statement extraction, extractor interface |
40
- | Resolution | Passing the trust bundle path | Inquiry pipeline, claim resolution |
41
- | Output | Guidance text injected into agent context | UtteranceTrustReport with per-statement badges |
42
- | Packaging | Optional hook activation, CLI adapter | @kontourai/survey npm package |
121
+ **Strict mode:**
43
122
 
44
- Flow Agents does not own trust claim models, inquiry semantics, or extractor implementations. Survey's `referenceUtteranceExtractor` is the default extractor; production use should inject `createAnthropicUtteranceExtractor` from `@kontourai/survey/anthropic` for model-backed extraction.
123
+ ```json
124
+ {
125
+ "$schema": "../../node_modules/@kontourai/flow-agents/schemas/flow-agents-settings.schema.json",
126
+ "schema_version": "1.0",
127
+ "utteranceCheck": {
128
+ "enabled": true,
129
+ "mode": "strict",
130
+ "extractor": "anthropic",
131
+ "bundlePath": "dist/trust-bundle.json"
132
+ }
133
+ }
134
+ ```
45
135
 
46
- ## Enabling the Hook
136
+ Config field reference:
47
137
 
48
- The hook is disabled by default. Set environment variables before starting the agent session:
138
+ | Field | Type | Default | Description |
139
+ |-------|------|---------|-------------|
140
+ | `enabled` | boolean | `false` | Whether utterance checking is active for this repo. |
141
+ | `mode` | `"report"` \| `"strict"` | `"report"` | How to handle concerning badges. See above. |
142
+ | `extractor` | `"reference"` \| `"anthropic"` | `"reference"` | Extractor to use. See above. |
143
+ | `bundlePath` | string | — | Repo-relative or absolute path to the trust bundle JSON. Omit to use an empty bundle. |
144
+ | `model` | string | — | Model for the anthropic extractor. Only used when `extractor` is `"anthropic"`. |
145
+ | `agentId` | string | `"flow-agents-hook"` | Agent identifier for provenance in the trust report. |
49
146
 
50
- ```bash
51
- export FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true
147
+ ---
148
+
149
+ ## Environment variable overrides
150
+
151
+ For one-off sessions or CI pipelines, you can override the config with environment variables. These take precedence over `flow-agents-settings.json`.
152
+
153
+ | Variable | Effect |
154
+ |----------|--------|
155
+ | `FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true\|false` | Force the hook on or off, overriding the config `enabled` field. |
156
+ | `FLOW_AGENTS_UTTERANCE_CHECK_STRICT=true` | Force strict mode. |
157
+ | `FLOW_AGENTS_UTTERANCE_CHECK_BUNDLE_PATH=/path/to/bundle.json` | Override `bundlePath`. |
158
+ | `FLOW_AGENTS_UTTERANCE_CHECK_AGENT_ID=my-agent` | Override `agentId`. |
159
+ | `FLOW_AGENTS_UTTERANCE_CHECK_EXTRACTOR=anthropic\|reference` | Override `extractor`. |
160
+
161
+ **When the config file is absent and no env vars are set**, the hook is disabled. This is the safe default — existing repos are not affected until they opt in.
162
+
163
+ ---
164
+
165
+ ## Registering the hook
166
+
167
+ Add the utterance check to a Claude Code session via `.claude/settings.json`:
52
168
 
53
- # Optional: path to a trust bundle JSON file for claim resolution
54
- export FLOW_AGENTS_UTTERANCE_CHECK_BUNDLE_PATH=/path/to/trust-bundle.json
169
+ ```json
170
+ {
171
+ "hooks": {
172
+ "PostToolUse": [
173
+ {
174
+ "matcher": ".*",
175
+ "hooks": [
176
+ {
177
+ "type": "command",
178
+ "command": "node scripts/hooks/claude-hook-adapter.js PostToolUse post:utterance-check utterance-check.js standard,strict"
179
+ }
180
+ ]
181
+ }
182
+ ]
183
+ }
184
+ }
185
+ ```
55
186
 
56
- # Optional: agent identifier for provenance
57
- export FLOW_AGENTS_UTTERANCE_CHECK_AGENT_ID=my-codex-session
187
+ Or run the hook directly (Kiro/Codex convention, exit 2 blocks):
58
188
 
59
- # Optional: strict mode — blocks Stop when concerning badges are present
60
- export FLOW_AGENTS_UTTERANCE_CHECK_STRICT=true
189
+ ```bash
190
+ node scripts/hooks/run-hook.js post:utterance-check utterance-check.js standard,strict
61
191
  ```
62
192
 
63
- The hook runs through the standard `run-hook.js` runner and respects `SA_DISABLED_HOOKS` and `SA_HOOK_PROFILE`.
193
+ The hook reads `context/settings/flow-agents-settings.json` relative to the repo root it detects from the hook event `cwd` or `process.cwd()`. No configuration needed in the hook command itself.
194
+
195
+ ---
64
196
 
65
- ## CLI Adapter Contract
197
+ ## CLI reference
66
198
 
67
199
  The utterance check CLI is available as:
68
200
 
69
201
  ```bash
70
202
  node build/src/cli.js utterance-check check \
71
203
  --utterance "The coverage is 92% and all tests pass." \
72
- --bundle-path .surface/trust-bundle.json \
204
+ --bundle-path .veritas/trust.bundle.json \
205
+ --extractor anthropic \
206
+ --model claude-haiku-4-5 \
73
207
  --agent-id my-session
74
208
  ```
75
209
 
@@ -79,6 +213,8 @@ Options:
79
213
  --utterance TEXT Utterance text to check (required unless --not-configured).
80
214
  --bundle-path FILE Trust bundle JSON file. Omit for an empty bundle (all unsupported).
81
215
  --agent-id ID Agent identifier for provenance (default: flow-agents-utterance-check).
216
+ --extractor NAME 'reference' (default) or 'anthropic'.
217
+ --model MODEL Model for the anthropic extractor (e.g. claude-haiku-4-5).
82
218
  --not-configured Skip survey call; output not_configured without error.
83
219
  --strict Exit non-zero when any badge is disputed, rejected, or unsupported.
84
220
  --help Show this help.
@@ -109,83 +245,64 @@ The CLI outputs a JSON report to stdout:
109
245
  Badge values:
110
246
 
111
247
  | Badge | Meaning |
112
- | --- | --- |
113
- | `verified` | Matched a claim with verified status |
114
- | `assumed` | Matched a claim with assumed status |
115
- | `stale` | Matched a claim that is stale |
116
- | `disputed` | Matched a claim with conflicting evidence |
117
- | `rejected` | Matched a claim that was rejected |
118
- | `unsupported` | No matching claim in the trust bundle |
248
+ |-------|---------|
249
+ | `verified` | Matched a claim with verified status. |
250
+ | `assumed` | Matched a claim with assumed status. |
251
+ | `stale` | Matched a claim that is stale. |
252
+ | `disputed` | Matched a claim with conflicting evidence. |
253
+ | `rejected` | Matched a claim that was rejected. |
254
+ | `unsupported` | No matching claim in the trust bundle. |
119
255
 
120
- Exit codes: `0` = pass, `1` = survey unavailable, `2` = strict mode with concerning badges, `3` = usage error.
256
+ Exit codes: `0` = pass, `0` = anthropic not_configured (fail open), `1` = survey unavailable, `2` = strict mode with concerning badges, `3` = usage error.
121
257
 
122
- When `@kontourai/survey` is not installed, the CLI outputs `status: "not_configured"` and exits `1`. The hook treats `not_configured` as a silent pass-through.
258
+ ---
123
259
 
124
- ## Registering the Hook
260
+ ## Installing dependencies
125
261
 
126
- Add the utterance check to a Claude Code session via `.claude/settings.json`:
127
-
128
- ```json
129
- {
130
- "hooks": {
131
- "PostToolUse": [
132
- {
133
- "matcher": ".*",
134
- "hooks": [
135
- {
136
- "type": "command",
137
- "command": "node scripts/hooks/claude-hook-adapter.js PostToolUse post:utterance-check utterance-check.js standard,strict"
138
- }
139
- ]
140
- }
141
- ]
142
- }
143
- }
144
- ```
145
-
146
- Or run the hook directly (Kiro/Codex convention, exit 2 blocks):
262
+ The CLI adapter uses dynamic imports so flow-agents itself does not list `@kontourai/survey` as a dependency. Install in the target workspace:
147
263
 
148
264
  ```bash
149
- node scripts/hooks/run-hook.js post:utterance-check utterance-check.js standard,strict
150
- ```
265
+ # Reference extractor only (default)
266
+ npm install @kontourai/survey
151
267
 
152
- ## Installing @kontourai/survey
268
+ # Anthropic extractor (model-backed)
269
+ npm install @kontourai/survey @anthropic-ai/sdk
270
+ ```
153
271
 
154
- The CLI adapter uses a dynamic import so flow-agents itself does not list `@kontourai/survey` as a dependency. Install it in the target workspace:
272
+ ---
155
273
 
156
- ```bash
157
- npm install @kontourai/survey
158
- ```
274
+ ## Ownership split
159
275
 
160
- For model-backed extraction (production-quality, requires `@anthropic-ai/sdk`):
276
+ | Area | Flow Agents owns | Survey owns |
277
+ |------|-----------------|-------------|
278
+ | Hook wiring | PostToolUse/Stop hook, badge guidance format, config loading | None |
279
+ | Extraction | Invoking the CLI, extractor selection, fail-open handling | Statement extraction, extractor interface, anthropic integration |
280
+ | Resolution | Passing the trust bundle path | Inquiry pipeline, claim resolution |
281
+ | Output | Guidance text injected into agent context | UtteranceTrustReport with per-statement badges |
282
+ | Config | Per-repo `flow-agents-settings.json`, env var overrides | None |
161
283
 
162
- ```bash
163
- npm install @kontourai/survey @anthropic-ai/sdk
164
- ```
284
+ Flow Agents does not own trust claim models, inquiry semantics, or extractor implementations.
165
285
 
166
- Then inject the Anthropic extractor by extending the CLI adapter or creating a wrapper script that calls `surveyAgentUtterance` with `createAnthropicUtteranceExtractor`.
286
+ ---
167
287
 
168
- ## Non-Goals
288
+ ## Non-goals
169
289
 
170
290
  - Do not make `@kontourai/survey` a mandatory dependency of flow-agents.
171
291
  - Do not copy Survey's extraction or inquiry schemas into flow-agents.
172
292
  - Do not auto-register the hook in the default pack; it is opt-in only.
173
- - Do not make the hook blocking without explicit `--strict` / `FLOW_AGENTS_UTTERANCE_CHECK_STRICT=true`.
293
+ - Do not make the hook blocking without explicit `mode: "strict"` or the env override.
174
294
  - Do not silently decide anything. The hook injects guidance; the agent decides next steps.
175
295
 
176
- ## Current Integration Shape
296
+ ---
177
297
 
178
- The integration delivers:
298
+ ## Current integration shape
179
299
 
180
- 1. `src/cli/utterance-check.ts` — TypeScript CLI adapter. Accepts utterance text, optional bundle path, and agent ID. Dynamically imports `@kontourai/survey`. Outputs a JSON badge report to stdout and human-readable guidance to stderr. Mirrors the `veritas-governance` adapter pattern.
300
+ The integration delivers:
181
301
 
182
- 2. `scripts/hooks/utterance-check.js` — CJS hook script. PostToolUse/Stop, non-blocking by default. Reads agent output text from the hook event, invokes the CLI adapter when `FLOW_AGENTS_UTTERANCE_CHECK_ENABLED=true`, and injects badge guidance into the agent context. Always fails open.
302
+ 1. `src/cli/utterance-check.ts` — TypeScript CLI adapter. Accepts utterance text, optional bundle path, agent ID, extractor name, and model. Dynamically imports `@kontourai/survey` (and optionally `@kontourai/survey/anthropic`). Outputs a JSON badge report to stdout and human-readable guidance to stderr.
183
303
 
184
- The forward path (out of scope for this slice):
304
+ 2. `scripts/hooks/utterance-check.js` CJS hook script. PostToolUse/Stop, non-blocking in report mode. Reads per-repo policy from `context/settings/flow-agents-settings.json`, uses env vars as overrides. Resolves repo root from hook event `cwd`. Always fails open.
185
305
 
186
- - Register the hook in a dedicated `survey` pack for opt-in activation.
187
- - Support injecting the Anthropic extractor via `FLOW_AGENTS_UTTERANCE_CHECK_EXTRACTOR=anthropic`.
188
- - Surface badge results as evidence sidecar entries (linking utterance coverage to workflow evidence).
189
- - Auto-propose new claim mappings from unsupported statements via the Survey mapping proposer.
306
+ 3. `schemas/flow-agents-settings.schema.json` JSON Schema for the per-repo settings file.
190
307
 
191
308
  Survey source and API details: https://github.com/kontourai/survey
package/docs/vision.md ADDED
@@ -0,0 +1,45 @@
1
+ ---
2
+ title: Flow Agents Vision and Direction
3
+ ---
4
+
5
+ # Vision and Direction
6
+
7
+ This page captures where Flow Agents is headed, clearly labeled as direction rather than shipped capability. Shipped artifacts are documented in the [Runtime Hook Surface spec](spec/runtime-hook-surface.html) and the [Runtime and support matrix](index.html#runtime-and-support-matrix) on the overview page.
8
+
9
+ ---
10
+
11
+ ## What ships today
12
+
13
+ Flow Agents currently ships as a harness adapter layer: six core harness runtimes (base, Claude Code, Codex, Kiro, opencode, pi) receive bundled agents, skills, context, scripts, and hook wiring through the `npx @kontourai/flow-agents init` installer. The four canonical policy classes — workflow steering, quality gate, stop-goal-fit, and config protection — are implemented as canonical scripts under `scripts/hooks/` and wired to each host's native event surface at conformance levels L0, L1, or L2.
14
+
15
+ One official framework adapter spike exists: `integrations/strands/` is a Python `HookProvider` for AWS Strands that emits the canonical telemetry taxonomy and enforces config protection via tool-call cancellation. It is preview-status with documented limitations.
16
+
17
+ ---
18
+
19
+ ## Direction
20
+
21
+ The items below are direction, not committed delivery dates. They record the intended shape of where this work goes.
22
+
23
+ ### Kits beyond coding
24
+
25
+ The process-discipline layer is not coding-specific. The canonical policies, sidecar state model, and evidence taxonomy are defined without reference to source code, build systems, or CI. The direction is deployable agentic workflows — Flow Kits for domains beyond software delivery: knowledge work, research, operations, sales contexts, and personal productivity. The [North Star](north-star.html) records the broader scope.
26
+
27
+ ### TypeScript framework adapters
28
+
29
+ The Strands Python spike proves the thesis: the policy engine is not harness-specific. The direction is TypeScript framework adapters that consume the canonical policy engine natively via the published `@kontourai/flow-agents` npm package, rather than shelling out to bash scripts. Candidate frameworks include LangGraph, VoltAgent, and the OpenAI Agents SDK. The [Runtime Hook Surface spec](spec/runtime-hook-surface.html) documents the adapter contract and the framework event mapping tables for each.
30
+
31
+ ### Kontour Console as the unifying telemetry surface
32
+
33
+ Today, telemetry writes to local JSONL files by default, with optional sinks to a local or hosted Kontour Console. The direction is Kontour Console as the unifying surface that spans both harness sessions (Claude Code, Codex, Kiro, opencode, pi) and deployed framework agents (Strands, LangGraph, etc.) — so the same workflow state, evidence, and hook telemetry are visible regardless of which runtime executed the work.
34
+
35
+ ### Conformance kit for community adapters
36
+
37
+ The runtime matrix includes a "conformance-certified" tier for community and third-party adapters that self-certify at a declared L0, L1, or L2 level. A conformance kit — a test suite and declaration format — is in development. It does not yet ship.
38
+
39
+ ---
40
+
41
+ ## What this is not
42
+
43
+ Flow Agents is not building another agent runtime, coding assistant, workflow engine, or orchestration control plane. The model, the runtime, the IDE, the agent UI, the workflow engine, and the repo governance engine are all deliberately out of scope. Flow Agents owns the glue: discovery, just-in-time guidance, scoped delegation, Flow-backed workflow state inside agent runtimes, evidence-backed completion, and feedback loops.
44
+
45
+ See the [North Star](north-star.html) for the full design principles and the [Developer Architecture](developer-architecture.html) for the product boundary map.
@@ -11,19 +11,30 @@ run_one() {
11
11
  bash "$ACCEPT_DIR/test_${name}_harness.sh"
12
12
  }
13
13
 
14
+ run_knowledge_kit_live() {
15
+ echo ""
16
+ bash "$ACCEPT_DIR/test_knowledge_kit_live.sh"
17
+ }
18
+
14
19
  case "$TARGET" in
15
- kiro|claude|codex)
20
+ kiro|claude|codex|opencode|pi)
16
21
  run_one "$TARGET"
17
22
  ;;
23
+ knowledge-kit-live)
24
+ run_knowledge_kit_live
25
+ ;;
18
26
  all)
19
27
  status=0
20
28
  run_one kiro || status=1
21
29
  run_one claude || status=1
22
30
  run_one codex || status=1
31
+ run_one opencode || status=1
32
+ run_one pi || status=1
33
+ run_knowledge_kit_live || status=1
23
34
  exit "$status"
24
35
  ;;
25
36
  *)
26
- echo "Usage: bash evals/acceptance/run.sh [all|kiro|claude|codex]"
37
+ echo "Usage: bash evals/acceptance/run.sh [all|kiro|claude|codex|opencode|pi|knowledge-kit-live]"
27
38
  exit 1
28
39
  ;;
29
40
  esac