@kontourai/flow-agents 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +23 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +53 -10
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/utterance-check.js +65 -1
- package/build/src/tools/build-universal-bundles.js +268 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +5 -1
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/context-map.md +1 -0
- package/docs/index.md +45 -4
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +54 -0
- package/docs/north-star.md +2 -2
- package/docs/spec/runtime-hook-surface.md +472 -0
- package/docs/survey-utterance-check.md +211 -94
- package/docs/vision.md +45 -0
- package/evals/acceptance/run.sh +4 -2
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +98 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_utterance_check.sh +291 -44
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +172 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +304 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +208 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +4 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +124 -22
- package/scripts/telemetry/lib/config.sh +5 -1
- package/src/cli/init.ts +219 -6
- package/src/cli/utterance-check.ts +71 -1
- package/src/tools/build-universal-bundles.ts +266 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +5 -1
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* test-telemetry.ts — Tests for telemetry module.
|
|
3
|
+
*
|
|
4
|
+
* Covers: event mapping, JSONL emission shape, normalizeToolName.
|
|
5
|
+
* Uses node:test only — no additional dependencies.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { test, describe } from "node:test";
|
|
9
|
+
import assert from "node:assert/strict";
|
|
10
|
+
import fs from "node:fs";
|
|
11
|
+
import os from "node:os";
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import { TelemetrySink, STRANDS_TO_CANONICAL, normalizeToolName, SCHEMA_VERSION } from "../src/telemetry.js";
|
|
14
|
+
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// STRANDS_TO_CANONICAL mapping table
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
describe("STRANDS_TO_CANONICAL mapping", () => {
|
|
20
|
+
test("contains all expected Strands TS event class names", () => {
|
|
21
|
+
const expected = new Set([
|
|
22
|
+
"BeforeInvocationEvent",
|
|
23
|
+
"AfterInvocationEvent",
|
|
24
|
+
"BeforeToolCallEvent",
|
|
25
|
+
"AfterToolCallEvent",
|
|
26
|
+
"AgentInitializedEvent",
|
|
27
|
+
"AfterModelCallEvent",
|
|
28
|
+
"MessageAddedEvent",
|
|
29
|
+
]);
|
|
30
|
+
assert.deepStrictEqual(new Set(Object.keys(STRANDS_TO_CANONICAL)), expected);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("BeforeInvocationEvent → userPromptSubmit", () => {
|
|
34
|
+
assert.strictEqual(STRANDS_TO_CANONICAL.BeforeInvocationEvent, "userPromptSubmit");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("AfterInvocationEvent → stop", () => {
|
|
38
|
+
assert.strictEqual(STRANDS_TO_CANONICAL.AfterInvocationEvent, "stop");
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
test("BeforeToolCallEvent → preToolUse", () => {
|
|
42
|
+
assert.strictEqual(STRANDS_TO_CANONICAL.BeforeToolCallEvent, "preToolUse");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("AfterToolCallEvent → postToolUse", () => {
|
|
46
|
+
assert.strictEqual(STRANDS_TO_CANONICAL.AfterToolCallEvent, "postToolUse");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test("AgentInitializedEvent → agentSpawn", () => {
|
|
50
|
+
assert.strictEqual(STRANDS_TO_CANONICAL.AgentInitializedEvent, "agentSpawn");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("all values are non-empty strings", () => {
|
|
54
|
+
for (const [key, value] of Object.entries(STRANDS_TO_CANONICAL)) {
|
|
55
|
+
assert.strictEqual(typeof value, "string", `${key} value must be a string`);
|
|
56
|
+
assert.ok(value.length > 0, `${key} value must be non-empty`);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// normalizeToolName
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
describe("normalizeToolName", () => {
|
|
66
|
+
test("bash → execute_bash", () => {
|
|
67
|
+
assert.strictEqual(normalizeToolName("bash"), "execute_bash");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("edit → fs_write", () => {
|
|
71
|
+
assert.strictEqual(normalizeToolName("edit"), "fs_write");
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test("write → fs_write", () => {
|
|
75
|
+
assert.strictEqual(normalizeToolName("write"), "fs_write");
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("read → fs_read", () => {
|
|
79
|
+
assert.strictEqual(normalizeToolName("read"), "fs_read");
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test("task → use_subagent", () => {
|
|
83
|
+
assert.strictEqual(normalizeToolName("task"), "use_subagent");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test("unknown passthrough", () => {
|
|
87
|
+
assert.strictEqual(normalizeToolName("my_custom_tool"), "my_custom_tool");
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// TelemetrySink emission shape
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
function makeTempSink(agentName = "test-agent", runtime = "strands-test"): { sink: TelemetrySink; dir: string } {
|
|
96
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "fa-ts-telemetry-"));
|
|
97
|
+
const sink = new TelemetrySink({ sinkPath: dir, agentName, runtime });
|
|
98
|
+
return { sink, dir };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function readEvents(dir: string): Record<string, unknown>[] {
|
|
102
|
+
const logFile = path.join(dir, "full.jsonl");
|
|
103
|
+
if (!fs.existsSync(logFile)) return [];
|
|
104
|
+
const lines = fs.readFileSync(logFile, "utf8").trim().split("\n");
|
|
105
|
+
return lines.filter((l) => l.trim()).map((l) => JSON.parse(l) as Record<string, unknown>);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
describe("TelemetrySink emission shape", () => {
|
|
109
|
+
test("session start event has correct schema_version and event_type", () => {
|
|
110
|
+
const { sink, dir } = makeTempSink();
|
|
111
|
+
const evt = sink.emitSessionStart();
|
|
112
|
+
assert.strictEqual(evt.schema_version, SCHEMA_VERSION);
|
|
113
|
+
assert.strictEqual(evt.event_type, "session.start");
|
|
114
|
+
// cleanup
|
|
115
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test("session start written to JSONL", () => {
|
|
119
|
+
const { sink, dir } = makeTempSink();
|
|
120
|
+
sink.emitSessionStart();
|
|
121
|
+
const events = readEvents(dir);
|
|
122
|
+
assert.strictEqual(events.length, 1);
|
|
123
|
+
assert.strictEqual((events[0] as Record<string, unknown>).event_type, "session.start");
|
|
124
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
test("event has required top-level fields (timestamp, session_id, event_id)", () => {
|
|
128
|
+
const { sink, dir } = makeTempSink();
|
|
129
|
+
const evt = sink.emitSessionStart();
|
|
130
|
+
assert.ok("timestamp" in evt, "missing timestamp");
|
|
131
|
+
assert.ok("session_id" in evt, "missing session_id");
|
|
132
|
+
assert.ok("event_id" in evt, "missing event_id");
|
|
133
|
+
assert.ok("agent" in evt, "missing agent");
|
|
134
|
+
assert.ok("hook" in evt, "missing hook");
|
|
135
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("agent sub-object has name and runtime", () => {
|
|
139
|
+
const { sink, dir } = makeTempSink("my-agent", "strands-ts");
|
|
140
|
+
const evt = sink.emitSessionStart();
|
|
141
|
+
const agent = evt.agent as Record<string, unknown>;
|
|
142
|
+
assert.strictEqual(agent.name, "my-agent");
|
|
143
|
+
assert.strictEqual(agent.runtime, "strands-ts");
|
|
144
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("hook sub-object has event_name and source fields", () => {
|
|
148
|
+
const { sink, dir } = makeTempSink();
|
|
149
|
+
const evt = sink.emitSessionStart();
|
|
150
|
+
const hook = evt.hook as Record<string, unknown>;
|
|
151
|
+
assert.ok("event_name" in hook, "hook.event_name missing");
|
|
152
|
+
assert.ok("source" in hook, "hook.source missing");
|
|
153
|
+
assert.strictEqual(hook.source, "strands-ts");
|
|
154
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test("tool invoke event has correct event_type and tool fields", () => {
|
|
158
|
+
const { sink, dir } = makeTempSink();
|
|
159
|
+
const evt = sink.emitToolInvoke("edit", { path: "foo.py" });
|
|
160
|
+
assert.strictEqual(evt.event_type, "tool.invoke");
|
|
161
|
+
const tool = evt.tool as Record<string, unknown>;
|
|
162
|
+
assert.strictEqual(tool.name, "edit");
|
|
163
|
+
assert.strictEqual(tool.normalized_name, "fs_write");
|
|
164
|
+
assert.deepStrictEqual(tool.input, { path: "foo.py" });
|
|
165
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test("tool result event has correct event_type", () => {
|
|
169
|
+
const { sink, dir } = makeTempSink();
|
|
170
|
+
const evt = sink.emitToolResult("read", "file contents");
|
|
171
|
+
assert.strictEqual(evt.event_type, "tool.result");
|
|
172
|
+
const tool = evt.tool as Record<string, unknown>;
|
|
173
|
+
assert.strictEqual(tool.name, "read");
|
|
174
|
+
assert.strictEqual(tool.normalized_name, "fs_read");
|
|
175
|
+
assert.strictEqual(tool.output, "file contents");
|
|
176
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
test("session end has event_type session.end", () => {
|
|
180
|
+
const { sink, dir } = makeTempSink();
|
|
181
|
+
const evt = sink.emitSessionEnd(1000);
|
|
182
|
+
assert.strictEqual(evt.event_type, "session.end");
|
|
183
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
test("user prompt submit has event_type turn.user", () => {
|
|
187
|
+
const { sink, dir } = makeTempSink();
|
|
188
|
+
const evt = sink.emitUserPromptSubmit();
|
|
189
|
+
assert.strictEqual(evt.event_type, "turn.user");
|
|
190
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test("multiple events share the same session_id", () => {
|
|
194
|
+
const { sink, dir } = makeTempSink();
|
|
195
|
+
sink.emitSessionStart();
|
|
196
|
+
sink.emitToolInvoke("read", {});
|
|
197
|
+
sink.emitSessionEnd();
|
|
198
|
+
const events = readEvents(dir);
|
|
199
|
+
const sessionIds = new Set(events.map((e) => (e as Record<string, unknown>).session_id));
|
|
200
|
+
assert.strictEqual(sessionIds.size, 1, "All events must share one session_id");
|
|
201
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test("each JSONL line is valid JSON", () => {
|
|
205
|
+
const { sink, dir } = makeTempSink();
|
|
206
|
+
sink.emitSessionStart();
|
|
207
|
+
sink.emitToolInvoke("bash", { command: "ls" });
|
|
208
|
+
sink.emitSessionEnd(500);
|
|
209
|
+
const logFile = path.join(dir, "full.jsonl");
|
|
210
|
+
const lines = fs.readFileSync(logFile, "utf8").split("\n");
|
|
211
|
+
for (const line of lines) {
|
|
212
|
+
if (!line.trim()) continue;
|
|
213
|
+
const parsed = JSON.parse(line); // throws on invalid JSON
|
|
214
|
+
assert.strictEqual(typeof parsed, "object");
|
|
215
|
+
}
|
|
216
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
test("sinkPath directory creates full.jsonl", () => {
|
|
220
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "fa-ts-sink-dir-"));
|
|
221
|
+
const sink = new TelemetrySink({ sinkPath: dir });
|
|
222
|
+
sink.emitSessionStart();
|
|
223
|
+
assert.ok(fs.existsSync(path.join(dir, "full.jsonl")));
|
|
224
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
225
|
+
});
|
|
226
|
+
});
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "Node16",
|
|
5
|
+
"moduleResolution": "Node16",
|
|
6
|
+
"ignoreDeprecations": "6.0",
|
|
7
|
+
"lib": ["ES2022"],
|
|
8
|
+
"types": ["node"],
|
|
9
|
+
"rootDir": ".",
|
|
10
|
+
"outDir": "dist",
|
|
11
|
+
"strict": true,
|
|
12
|
+
"noUnusedLocals": true,
|
|
13
|
+
"noUnusedParameters": true,
|
|
14
|
+
"esModuleInterop": true,
|
|
15
|
+
"forceConsistentCasingInFileNames": true,
|
|
16
|
+
"skipLibCheck": true
|
|
17
|
+
},
|
|
18
|
+
"include": ["src/**/*.ts", "test/**/*.ts"],
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
20
|
+
}
|
package/package.json
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kontourai/flow-agents",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Flow Agents — a Kontour product that applies Flow and Veritas discipline inside the agent tools you already use: Claude Code, Codex, Kiro, and GitHub Actions.",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Flow Agents — a Kontour product that applies Flow and Veritas discipline as a portable process layer inside the agent tools you already use: Claude Code, Codex, Kiro, opencode, pi, and GitHub Actions — with framework adapters (AWS Strands preview) on the same policy-engine contract.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"agents",
|
|
7
7
|
"ai-agents",
|
|
8
8
|
"workflow",
|
|
9
9
|
"skills",
|
|
10
|
+
"hooks",
|
|
10
11
|
"claude-code",
|
|
11
12
|
"codex",
|
|
12
13
|
"kiro",
|
|
14
|
+
"opencode",
|
|
15
|
+
"pi",
|
|
16
|
+
"strands",
|
|
13
17
|
"evidence",
|
|
14
18
|
"process-transparency"
|
|
15
19
|
],
|
|
@@ -108,6 +112,7 @@
|
|
|
108
112
|
"validate:hook-influence": "npm run build --silent && node build/src/cli.js validate-hook-influence",
|
|
109
113
|
"workflow-artifact-cleanup-audit": "npm run build --silent && node build/src/cli.js workflow-artifact-cleanup-audit",
|
|
110
114
|
"fixture:retirement-audit": "npm run build --silent && node build/src/cli.js fixture-retirement-audit",
|
|
115
|
+
"dogfood": "npm run build --silent && node scripts/dogfood.js",
|
|
111
116
|
"setup:repo-hooks": "bash scripts/setup-repo-hooks.sh",
|
|
112
117
|
"validate:repo-hooks": "bash evals/static/test_repo_hooks.sh",
|
|
113
118
|
"eval": "bash evals/run.sh",
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# Flow Agents Conformance Kit
|
|
2
|
+
|
|
3
|
+
The conformance kit lets third-party adapter authors self-certify their implementation against the Flow Agents policy engine contract.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## What is the conformance kit?
|
|
8
|
+
|
|
9
|
+
Flow Agents ships four canonical policy classes (config-protection, quality-gate, stop-goal-fit, workflow-steering) that adapters must invoke via subprocess or native import. The conformance kit provides:
|
|
10
|
+
|
|
11
|
+
1. **Golden fixtures** (`fixtures/`) — payload→expected-decision JSON pairs, one per policy class × canonical event × case, extracted from real engine behavior.
|
|
12
|
+
2. **Conformance runner** (`run-conformance.js`) — a standalone Node.js script (no npm deps) that pipes each fixture through an adapter command and reports per-level verdict.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Conformance levels
|
|
17
|
+
|
|
18
|
+
| Level | What is required |
|
|
19
|
+
|-------|-----------------|
|
|
20
|
+
| **L0** | No policy fixtures required. Adapter wires telemetry only. |
|
|
21
|
+
| **L1** | Workflow steering (`userPromptSubmit`) and stop-goal-fit (`stop`) in warning mode must pass. |
|
|
22
|
+
| **L2** | All L1 requirements plus config-protection (`preToolUse`, blocking) and quality-gate (`postToolUse`, non-blocking). |
|
|
23
|
+
|
|
24
|
+
These levels match the definitions in `docs/spec/runtime-hook-surface.md` §4.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Self-test the canonical engine (must report L2):
|
|
32
|
+
node packaging/conformance/run-conformance.js --self
|
|
33
|
+
|
|
34
|
+
# Test a third-party adapter:
|
|
35
|
+
node packaging/conformance/run-conformance.js \
|
|
36
|
+
--adapter-cmd "node /path/to/your-adapter.js" \
|
|
37
|
+
--level L2
|
|
38
|
+
|
|
39
|
+
# Test at L1 only:
|
|
40
|
+
node packaging/conformance/run-conformance.js \
|
|
41
|
+
--adapter-cmd "node /path/to/your-adapter.js" \
|
|
42
|
+
--level L1
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Adapter contract
|
|
48
|
+
|
|
49
|
+
Your adapter command:
|
|
50
|
+
- Receives a canonical JSON payload on **stdin** (one JSON object, see §Payload schema).
|
|
51
|
+
- Writes the input JSON (or augmented form) to **stdout** on allow.
|
|
52
|
+
- Writes nothing meaningful to stdout on block (or empty/echoed input).
|
|
53
|
+
- Exits **0** to allow, **2** to block, any other code for error (treated as allow / fail-open).
|
|
54
|
+
|
|
55
|
+
The runner invokes your command exactly once per fixture via `sh -c "<your-cmd>"`.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Payload schema (contract_version "1.0")
|
|
60
|
+
|
|
61
|
+
All payloads are JSON objects with:
|
|
62
|
+
|
|
63
|
+
| Field | Type | Description |
|
|
64
|
+
|-------|------|-------------|
|
|
65
|
+
| `hook_event_name` | string | Canonical event name: `PreToolUse`, `PostToolUse`, `UserPromptSubmit`, `Stop` |
|
|
66
|
+
| `tool_name` | string? | Tool name (for tool-call events) |
|
|
67
|
+
| `tool_input` | object? | Tool input (for tool-call events); contains `path` or `file_path` for write tools |
|
|
68
|
+
| `cwd` | string? | Current working directory of the agent session |
|
|
69
|
+
|
|
70
|
+
Full payload/decision schema is documented in `docs/spec/runtime-hook-surface.md` §8 (Engine Contract).
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Fixture inventory
|
|
75
|
+
|
|
76
|
+
| Fixture | Policy class | Event | Level | What it tests |
|
|
77
|
+
|---------|-------------|-------|-------|---------------|
|
|
78
|
+
| `config-protection--block-eslintrc.json` | config-protection | preToolUse | L2 | Block write to `.eslintrc.json` |
|
|
79
|
+
| `config-protection--block-biome.json` | config-protection | preToolUse | L2 | Block edit to `biome.json` via `file_path` |
|
|
80
|
+
| `config-protection--allow-safe-file.json` | config-protection | preToolUse | L2 | Allow write to `src/main.ts` |
|
|
81
|
+
| `config-protection--allow-no-path.json` | config-protection | preToolUse | L2 | Allow when no path in tool_input |
|
|
82
|
+
| `quality-gate--allow-nonexistent-file.json` | quality-gate | postToolUse | L2 | Non-blocking for missing .ts file |
|
|
83
|
+
| `quality-gate--allow-no-path.json` | quality-gate | postToolUse | L2 | Non-blocking when no path in tool_input |
|
|
84
|
+
| `stop-goal-fit--allow-clean-cwd.json` | stop-goal-fit | stop | L1 | No warnings in clean workspace |
|
|
85
|
+
| `stop-goal-fit--warn-active-delivery.json` | stop-goal-fit | stop | L1 | Warnings for active delivery without DOD/GoalFit |
|
|
86
|
+
| `stop-goal-fit--block-strict-mode.json` | stop-goal-fit | stop | L2 | Exit 2 with FLOW_AGENTS_GOAL_FIT_STRICT=true |
|
|
87
|
+
| `workflow-steering--allow-no-state.json` | workflow-steering | userPromptSubmit | L1 | Pass-through when no active workflow state |
|
|
88
|
+
| `workflow-steering--inject-active-state.json` | workflow-steering | userPromptSubmit | L1 | Injects STATE hint for blocked task |
|
|
89
|
+
| `workflow-steering--inject-subagent-steering.json` | workflow-steering | postToolUse | L1 | Injects EXECUTION COMPLETE hint after tool-worker |
|
|
90
|
+
|
|
91
|
+
Fixtures with `workspace_setup` create a temporary directory with the listed files before invoking the adapter, and clean it up afterward. The `cwd` field in those payloads is replaced with the temp directory path at runtime.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## How to declare conformance
|
|
96
|
+
|
|
97
|
+
After running the conformance kit, include a conformance declaration in your adapter documentation:
|
|
98
|
+
|
|
99
|
+
```yaml
|
|
100
|
+
conformance_level: L2 # or L0 / L1
|
|
101
|
+
engine_contract_version: "1.0"
|
|
102
|
+
runner_version: "run-conformance.js"
|
|
103
|
+
test_date: 2026-06-11
|
|
104
|
+
verdict: PASS
|
|
105
|
+
fixture_count: 12
|
|
106
|
+
fixtures_passed: 12
|
|
107
|
+
gaps: [] # List any declared gaps here
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
If any fixtures fail, list them under `gaps` with a description of the degradation behavior.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Declaring gaps
|
|
115
|
+
|
|
116
|
+
If your adapter legitimately cannot satisfy a fixture (e.g., the host runtime has no blocking `preToolUse` equivalent), declare it explicitly:
|
|
117
|
+
|
|
118
|
+
```yaml
|
|
119
|
+
gaps:
|
|
120
|
+
- fixture: config-protection--block-eslintrc.json
|
|
121
|
+
reason: "Host does not support blocking tool calls; config-protection fails open"
|
|
122
|
+
degradation: "Agent may modify linter configs without interception"
|
|
123
|
+
workaround: "Run config-protection as a linting step in CI instead"
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Declared gaps do not prevent reaching a lower conformance level.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## CLI reference
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
node packaging/conformance/run-conformance.js [options]
|
|
134
|
+
|
|
135
|
+
--self Run against the canonical engine (target L2)
|
|
136
|
+
--adapter-cmd CMD Shell command to pipe fixtures to (adapter under test)
|
|
137
|
+
--level L0|L1|L2 Minimum conformance level to enforce (default: L2 for --self, L0 for --adapter-cmd)
|
|
138
|
+
--fixtures DIR Override fixture directory (default: packaging/conformance/fixtures/)
|
|
139
|
+
--verbose Print fixture payloads and full output in per-fixture results
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Exit codes: `0` = target level reached, `1` = target level not reached, `2` = usage error.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "config-protection allows when no file path is present in tool_input",
|
|
3
|
+
"policy_class": "config-protection",
|
|
4
|
+
"canonical_event": "preToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "config-protection",
|
|
7
|
+
"hook_script": "config-protection.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PreToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {}
|
|
12
|
+
},
|
|
13
|
+
"expected": {
|
|
14
|
+
"exit_code": 0,
|
|
15
|
+
"stderr_is_empty": true,
|
|
16
|
+
"stdout_echoes_input": true
|
|
17
|
+
}
|
|
18
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "config-protection allows a write to a regular source file",
|
|
3
|
+
"policy_class": "config-protection",
|
|
4
|
+
"canonical_event": "preToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "config-protection",
|
|
7
|
+
"hook_script": "config-protection.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PreToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"path": "/repo/src/main.ts"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"expected": {
|
|
16
|
+
"exit_code": 0,
|
|
17
|
+
"stderr_is_empty": true,
|
|
18
|
+
"stdout_echoes_input": true
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "config-protection blocks an edit to biome.json (protected Biome config)",
|
|
3
|
+
"policy_class": "config-protection",
|
|
4
|
+
"canonical_event": "preToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "config-protection",
|
|
7
|
+
"hook_script": "config-protection.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PreToolUse",
|
|
10
|
+
"tool_name": "edit",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"file_path": "biome.json"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"expected": {
|
|
16
|
+
"exit_code": 2,
|
|
17
|
+
"stderr_contains": ["BLOCKED", "biome.json"],
|
|
18
|
+
"stdout_is_empty": true
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "config-protection blocks a write to .eslintrc.json (protected ESLint config)",
|
|
3
|
+
"policy_class": "config-protection",
|
|
4
|
+
"canonical_event": "preToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "config-protection",
|
|
7
|
+
"hook_script": "config-protection.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PreToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"path": "/repo/.eslintrc.json"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"expected": {
|
|
16
|
+
"exit_code": 2,
|
|
17
|
+
"stderr_contains": ["BLOCKED"],
|
|
18
|
+
"stdout_is_empty": true
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "quality-gate is non-blocking (exit 0) when no file path is in tool_input",
|
|
3
|
+
"policy_class": "quality-gate",
|
|
4
|
+
"canonical_event": "postToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "quality-gate",
|
|
7
|
+
"hook_script": "quality-gate.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PostToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {}
|
|
12
|
+
},
|
|
13
|
+
"expected": {
|
|
14
|
+
"exit_code": 0,
|
|
15
|
+
"stdout_echoes_input": true
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "quality-gate is non-blocking (exit 0) even for a .ts file that does not exist",
|
|
3
|
+
"policy_class": "quality-gate",
|
|
4
|
+
"canonical_event": "postToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "quality-gate",
|
|
7
|
+
"hook_script": "quality-gate.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PostToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"path": "/tmp/nonexistent-file-for-conformance-test.ts"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"expected": {
|
|
16
|
+
"exit_code": 0,
|
|
17
|
+
"stdout_echoes_input": true
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit passes (exit 0, no warnings) when cwd has no .flow-agents workflow artifacts",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "/tmp"
|
|
11
|
+
},
|
|
12
|
+
"expected": {
|
|
13
|
+
"exit_code": 0,
|
|
14
|
+
"stdout_echoes_input": true,
|
|
15
|
+
"stderr_is_empty": true
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit blocks (exit 2) in FLOW_AGENTS_GOAL_FIT_STRICT=true mode for an active delivery artifact missing DOD and Goal Fit Gate",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
|
|
15
|
+
},
|
|
16
|
+
"env": {
|
|
17
|
+
"FLOW_AGENTS_GOAL_FIT_STRICT": "true"
|
|
18
|
+
},
|
|
19
|
+
"expected": {
|
|
20
|
+
"exit_code": 2,
|
|
21
|
+
"stderr_contains": ["status:executing"]
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit warns (exit 0, stderr has warnings) for an active delivery artifact missing DOD and Goal Fit Gate",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
|
|
15
|
+
},
|
|
16
|
+
"expected": {
|
|
17
|
+
"exit_code": 0,
|
|
18
|
+
"stdout_echoes_input": true,
|
|
19
|
+
"stderr_contains": ["status:executing", "Definition Of Done", "Goal Fit Gate"]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "workflow-steering passes through (exit 0, stdout echoes input) when cwd has no active workflow state",
|
|
3
|
+
"policy_class": "workflow-steering",
|
|
4
|
+
"canonical_event": "userPromptSubmit",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "workflow-steering",
|
|
7
|
+
"hook_script": "workflow-steering.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "UserPromptSubmit",
|
|
10
|
+
"cwd": "/tmp"
|
|
11
|
+
},
|
|
12
|
+
"expected": {
|
|
13
|
+
"exit_code": 0,
|
|
14
|
+
"stdout_echoes_input": true
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "workflow-steering injects STATE hint (exit 0, stdout contains STATE:) for blocked workflow state at UserPromptSubmit",
|
|
3
|
+
"policy_class": "workflow-steering",
|
|
4
|
+
"canonical_event": "userPromptSubmit",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "workflow-steering",
|
|
7
|
+
"hook_script": "workflow-steering.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "UserPromptSubmit",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/state.json": {
|
|
15
|
+
"task_slug": "my-task",
|
|
16
|
+
"status": "blocked",
|
|
17
|
+
"phase": "execute",
|
|
18
|
+
"next_action": {
|
|
19
|
+
"summary": "Needs decision from user",
|
|
20
|
+
"status": "needs_user",
|
|
21
|
+
"target_phase": "verify"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"expected": {
|
|
26
|
+
"exit_code": 0,
|
|
27
|
+
"stdout_contains": ["STATE:", "my-task", "blocked"]
|
|
28
|
+
}
|
|
29
|
+
}
|