@kontourai/flow-agents 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +23 -0
- package/.github/workflows/release-please.yml +31 -0
- package/.github/workflows/runtime-compat.yml +118 -0
- package/CHANGELOG.md +46 -0
- package/CONTRIBUTING.md +4 -0
- package/README.md +80 -18
- package/build/src/cli/flow-kit.js +9 -4
- package/build/src/cli/init.js +215 -5
- package/build/src/cli/runtime-adapter.js +9 -5
- package/build/src/cli/telemetry-doctor.js +4 -1
- package/build/src/cli/utterance-check.js +65 -1
- package/build/src/runtime-adapters.js +34 -0
- package/build/src/tools/build-universal-bundles.js +285 -0
- package/build/src/tools/filter-installed-packs.js +3 -0
- package/build/src/tools/validate-source-tree.js +5 -1
- package/console.telemetry.json +115 -20
- package/context/scripts/telemetry/lib/config.sh +5 -1
- package/context/settings/flow-agents-settings.json +7 -0
- package/docs/_layouts/default.html +2 -0
- package/docs/context-map.md +1 -0
- package/docs/index.md +53 -4
- package/docs/integrations/conformance.md +246 -0
- package/docs/integrations/framework-adapter.md +275 -0
- package/docs/integrations/harness-install.md +213 -0
- package/docs/integrations/index.md +58 -0
- package/docs/integrations/knowledge-kit-live.md +211 -0
- package/docs/kit-authoring-guide.md +169 -0
- package/docs/north-star.md +2 -2
- package/docs/spec/runtime-hook-surface.md +525 -0
- package/docs/survey-utterance-check.md +211 -94
- package/docs/vision.md +45 -0
- package/evals/acceptance/run.sh +13 -2
- package/evals/acceptance/test_knowledge_kit_live.sh +221 -0
- package/evals/acceptance/test_opencode_harness.sh +121 -0
- package/evals/acceptance/test_pi_harness.sh +113 -0
- package/evals/integration/test_bundle_install.sh +226 -1
- package/evals/integration/test_bundle_lifecycle.sh +641 -0
- package/evals/integration/test_runtime_adapter_activation.sh +113 -1
- package/evals/integration/test_utterance_check.sh +291 -44
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +137 -2
- package/integrations/strands/README.md +256 -0
- package/integrations/strands/example.py +74 -0
- package/integrations/strands/examples/knowledge_kit_live.py +461 -0
- package/integrations/strands/flow_agents_strands/__init__.py +27 -0
- package/integrations/strands/flow_agents_strands/hooks.py +194 -0
- package/integrations/strands/flow_agents_strands/policy.py +348 -0
- package/integrations/strands/flow_agents_strands/steering.py +225 -0
- package/integrations/strands/flow_agents_strands/telemetry.py +238 -0
- package/integrations/strands/pyproject.toml +38 -0
- package/integrations/strands/tests/__init__.py +0 -0
- package/integrations/strands/tests/test_hooks.py +392 -0
- package/integrations/strands/tests/test_policy.py +315 -0
- package/integrations/strands/tests/test_telemetry.py +184 -0
- package/integrations/strands-ts/README.md +224 -0
- package/integrations/strands-ts/bin/conformance-shim.mjs +257 -0
- package/integrations/strands-ts/package.json +53 -0
- package/integrations/strands-ts/src/hooks.ts +312 -0
- package/integrations/strands-ts/src/index.ts +22 -0
- package/integrations/strands-ts/src/policy.ts +345 -0
- package/integrations/strands-ts/src/telemetry.ts +251 -0
- package/integrations/strands-ts/test/test-policy.ts +322 -0
- package/integrations/strands-ts/test/test-steering.ts +159 -0
- package/integrations/strands-ts/test/test-telemetry.ts +226 -0
- package/integrations/strands-ts/tsconfig.json +20 -0
- package/kits/catalog.json +6 -0
- package/kits/knowledge/adapters/default-store/index.js +821 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1179 -0
- package/kits/knowledge/adapters/flow-runner/telemetry.js +174 -0
- package/kits/knowledge/docs/README.md +135 -0
- package/kits/knowledge/docs/store-contract.md +526 -0
- package/kits/knowledge/evals/consolidation/suite.test.js +1234 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +670 -0
- package/kits/knowledge/evals/ingest-compile/suite.test.js +574 -0
- package/kits/knowledge/evals/synthesis/suite.test.js +909 -0
- package/kits/knowledge/flows/compile.flow.json +60 -0
- package/kits/knowledge/flows/consolidate.flow.json +77 -0
- package/kits/knowledge/flows/ingest.flow.json +60 -0
- package/kits/knowledge/flows/store-contract.flow.json +48 -0
- package/kits/knowledge/flows/synthesize.flow.json +77 -0
- package/kits/knowledge/kit.json +78 -0
- package/package.json +7 -2
- package/packaging/conformance/README.md +142 -0
- package/packaging/conformance/fixtures/config-protection--allow-no-path.json +18 -0
- package/packaging/conformance/fixtures/config-protection--allow-safe-file.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-biome.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-eslintrc.json +20 -0
- package/packaging/conformance/fixtures/quality-gate--allow-no-path.json +17 -0
- package/packaging/conformance/fixtures/quality-gate--allow-nonexistent-file.json +19 -0
- package/packaging/conformance/fixtures/stop-goal-fit--allow-clean-cwd.json +17 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-strict-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +21 -0
- package/packaging/conformance/fixtures/workflow-steering--allow-no-state.json +16 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-active-state.json +29 -0
- package/packaging/conformance/fixtures/workflow-steering--inject-subagent-steering.json +25 -0
- package/packaging/conformance/package.json +4 -0
- package/packaging/conformance/run-conformance.js +322 -0
- package/packaging/manifest.json +59 -0
- package/schemas/flow-agents-settings.schema.json +48 -0
- package/scripts/README.md +4 -0
- package/scripts/dogfood.js +16 -0
- package/scripts/hooks/opencode-hook-adapter.js +123 -0
- package/scripts/hooks/opencode-telemetry-hook.js +101 -0
- package/scripts/hooks/pi-hook-adapter.js +123 -0
- package/scripts/hooks/pi-telemetry-hook.js +105 -0
- package/scripts/hooks/run-hook.js +8 -0
- package/scripts/hooks/utterance-check.js +124 -22
- package/scripts/telemetry/lib/config.sh +5 -1
- package/src/cli/flow-kit.ts +10 -4
- package/src/cli/init.ts +219 -6
- package/src/cli/runtime-adapter.ts +10 -5
- package/src/cli/telemetry-doctor.ts +4 -1
- package/src/cli/utterance-check.ts +71 -1
- package/src/runtime-adapters.ts +35 -0
- package/src/tools/build-universal-bundles.ts +283 -0
- package/src/tools/filter-installed-packs.ts +3 -0
- package/src/tools/validate-source-tree.ts +5 -1
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "quality-gate is non-blocking (exit 0) when no file path is in tool_input",
|
|
3
|
+
"policy_class": "quality-gate",
|
|
4
|
+
"canonical_event": "postToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "quality-gate",
|
|
7
|
+
"hook_script": "quality-gate.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PostToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {}
|
|
12
|
+
},
|
|
13
|
+
"expected": {
|
|
14
|
+
"exit_code": 0,
|
|
15
|
+
"stdout_echoes_input": true
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "quality-gate is non-blocking (exit 0) even for a .ts file that does not exist",
|
|
3
|
+
"policy_class": "quality-gate",
|
|
4
|
+
"canonical_event": "postToolUse",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "quality-gate",
|
|
7
|
+
"hook_script": "quality-gate.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PostToolUse",
|
|
10
|
+
"tool_name": "write",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"path": "/tmp/nonexistent-file-for-conformance-test.ts"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"expected": {
|
|
16
|
+
"exit_code": 0,
|
|
17
|
+
"stdout_echoes_input": true
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit passes (exit 0, no warnings) when cwd has no .flow-agents workflow artifacts",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "/tmp"
|
|
11
|
+
},
|
|
12
|
+
"expected": {
|
|
13
|
+
"exit_code": 0,
|
|
14
|
+
"stdout_echoes_input": true,
|
|
15
|
+
"stderr_is_empty": true
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit blocks (exit 2) in FLOW_AGENTS_GOAL_FIT_STRICT=true mode for an active delivery artifact missing DOD and Goal Fit Gate",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L2",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
|
|
15
|
+
},
|
|
16
|
+
"env": {
|
|
17
|
+
"FLOW_AGENTS_GOAL_FIT_STRICT": "true"
|
|
18
|
+
},
|
|
19
|
+
"expected": {
|
|
20
|
+
"exit_code": 2,
|
|
21
|
+
"stderr_contains": ["status:executing"]
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "stop-goal-fit warns (exit 0, stderr has warnings) for an active delivery artifact missing DOD and Goal Fit Gate",
|
|
3
|
+
"policy_class": "stop-goal-fit",
|
|
4
|
+
"canonical_event": "stop",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "stop-goal-fit",
|
|
7
|
+
"hook_script": "stop-goal-fit.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "Stop",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/my-task--deliver.md": "# My Task\n\nbranch: main\nstatus: executing\ntype: deliver\n\n## Plan\n\nWork TBD.\n"
|
|
15
|
+
},
|
|
16
|
+
"expected": {
|
|
17
|
+
"exit_code": 0,
|
|
18
|
+
"stdout_echoes_input": true,
|
|
19
|
+
"stderr_contains": ["status:executing", "Definition Of Done", "Goal Fit Gate"]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "workflow-steering passes through (exit 0, stdout echoes input) when cwd has no active workflow state",
|
|
3
|
+
"policy_class": "workflow-steering",
|
|
4
|
+
"canonical_event": "userPromptSubmit",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "workflow-steering",
|
|
7
|
+
"hook_script": "workflow-steering.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "UserPromptSubmit",
|
|
10
|
+
"cwd": "/tmp"
|
|
11
|
+
},
|
|
12
|
+
"expected": {
|
|
13
|
+
"exit_code": 0,
|
|
14
|
+
"stdout_echoes_input": true
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "workflow-steering injects STATE hint (exit 0, stdout contains STATE:) for blocked workflow state at UserPromptSubmit",
|
|
3
|
+
"policy_class": "workflow-steering",
|
|
4
|
+
"canonical_event": "userPromptSubmit",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "workflow-steering",
|
|
7
|
+
"hook_script": "workflow-steering.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "UserPromptSubmit",
|
|
10
|
+
"cwd": "__TEMP_WORKSPACE__"
|
|
11
|
+
},
|
|
12
|
+
"workspace_setup": {
|
|
13
|
+
"AGENTS.md": "# Test Repo\n",
|
|
14
|
+
".flow-agents/my-task/state.json": {
|
|
15
|
+
"task_slug": "my-task",
|
|
16
|
+
"status": "blocked",
|
|
17
|
+
"phase": "execute",
|
|
18
|
+
"next_action": {
|
|
19
|
+
"summary": "Needs decision from user",
|
|
20
|
+
"status": "needs_user",
|
|
21
|
+
"target_phase": "verify"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"expected": {
|
|
26
|
+
"exit_code": 0,
|
|
27
|
+
"stdout_contains": ["STATE:", "my-task", "blocked"]
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"description": "workflow-steering injects EXECUTION COMPLETE hint after tool-worker subagent call completes",
|
|
3
|
+
"policy_class": "workflow-steering",
|
|
4
|
+
"canonical_event": "postToolUse",
|
|
5
|
+
"conformance_level": "L1",
|
|
6
|
+
"hook_id": "workflow-steering",
|
|
7
|
+
"hook_script": "workflow-steering.js",
|
|
8
|
+
"payload": {
|
|
9
|
+
"hook_event_name": "PostToolUse",
|
|
10
|
+
"tool_name": "mcp",
|
|
11
|
+
"tool_input": {
|
|
12
|
+
"command": "InvokeSubagents",
|
|
13
|
+
"content": {
|
|
14
|
+
"subagents": [
|
|
15
|
+
{ "agent_name": "tool-worker" }
|
|
16
|
+
]
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"cwd": "/tmp"
|
|
20
|
+
},
|
|
21
|
+
"expected": {
|
|
22
|
+
"exit_code": 0,
|
|
23
|
+
"stdout_contains": ["EXECUTION COMPLETE"]
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* run-conformance.js — Flow Agents policy engine conformance test runner.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* node packaging/conformance/run-conformance.js --self
|
|
7
|
+
* node packaging/conformance/run-conformance.js --adapter-cmd "node my-adapter.js"
|
|
8
|
+
* node packaging/conformance/run-conformance.js --adapter-cmd "..." --level L1
|
|
9
|
+
*
|
|
10
|
+
* Options:
|
|
11
|
+
* --self Run against the canonical engine (must reach L2).
|
|
12
|
+
* --adapter-cmd CMD Shell command to test. Receives JSON payload on stdin,
|
|
13
|
+
* must produce exit code 0 (allow) or 2 (block).
|
|
14
|
+
* --level L0|L1|L2 Minimum level to enforce. Default: L2 for --self, L0 otherwise.
|
|
15
|
+
* --fixtures DIR Override fixture directory (default: same dir as this script).
|
|
16
|
+
* --verbose Print per-fixture payloads.
|
|
17
|
+
*
|
|
18
|
+
* No external npm dependencies — pure Node.js stdlib.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
'use strict';
|
|
22
|
+
|
|
23
|
+
const fs = require('fs');
|
|
24
|
+
const path = require('path');
|
|
25
|
+
const os = require('os');
|
|
26
|
+
const { spawnSync } = require('child_process');
|
|
27
|
+
|
|
28
|
+
const CONFORMANCE_DIR = __dirname;
|
|
29
|
+
const FIXTURES_DIR = path.join(CONFORMANCE_DIR, 'fixtures');
|
|
30
|
+
const HOOKS_DIR = path.resolve(CONFORMANCE_DIR, '../../scripts/hooks');
|
|
31
|
+
const RUN_HOOK = path.join(HOOKS_DIR, 'run-hook.js');
|
|
32
|
+
|
|
33
|
+
// Conformance levels — ordered so that L2 implies L1 implies L0.
|
|
34
|
+
const LEVEL_ORDER = ['L0', 'L1', 'L2'];
|
|
35
|
+
const LEVEL_POLICY_CLASSES = {
|
|
36
|
+
L0: new Set([]), // L0: telemetry only — no policy fixtures required
|
|
37
|
+
L1: new Set(['workflow-steering', 'stop-goal-fit']),
|
|
38
|
+
L2: new Set(['workflow-steering', 'stop-goal-fit', 'quality-gate', 'config-protection']),
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
// -----------------------------------------------------------------------
|
|
42
|
+
// CLI parsing
|
|
43
|
+
// -----------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
function parseArgs(argv) {
|
|
46
|
+
const args = { self: false, adapterCmd: null, level: null, fixturesDir: FIXTURES_DIR, verbose: false };
|
|
47
|
+
for (let i = 0; i < argv.length; i++) {
|
|
48
|
+
const arg = argv[i];
|
|
49
|
+
if (arg === '--self') { args.self = true; }
|
|
50
|
+
else if (arg === '--adapter-cmd') { args.adapterCmd = argv[++i]; }
|
|
51
|
+
else if (arg === '--level') { args.level = argv[++i]; }
|
|
52
|
+
else if (arg === '--fixtures') { args.fixturesDir = argv[++i]; }
|
|
53
|
+
else if (arg === '--verbose') { args.verbose = true; }
|
|
54
|
+
}
|
|
55
|
+
return args;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// -----------------------------------------------------------------------
|
|
59
|
+
// Workspace setup helpers
|
|
60
|
+
// -----------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
function createTempWorkspace(setup) {
|
|
63
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'fa-conformance-'));
|
|
64
|
+
for (const [relPath, content] of Object.entries(setup)) {
|
|
65
|
+
const fullPath = path.join(tmpDir, relPath);
|
|
66
|
+
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
|
|
67
|
+
const text = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
|
|
68
|
+
fs.writeFileSync(fullPath, text, 'utf8');
|
|
69
|
+
}
|
|
70
|
+
return tmpDir;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function cleanupWorkspace(tmpDir) {
|
|
74
|
+
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// -----------------------------------------------------------------------
|
|
78
|
+
// Fixture loading
|
|
79
|
+
// -----------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
function loadFixtures(fixturesDir) {
|
|
82
|
+
if (!fs.existsSync(fixturesDir)) {
|
|
83
|
+
throw new Error(`Fixtures directory not found: ${fixturesDir}`);
|
|
84
|
+
}
|
|
85
|
+
return fs.readdirSync(fixturesDir)
|
|
86
|
+
.filter(name => name.endsWith('.json'))
|
|
87
|
+
.sort()
|
|
88
|
+
.map(name => {
|
|
89
|
+
const filePath = path.join(fixturesDir, name);
|
|
90
|
+
try {
|
|
91
|
+
const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
92
|
+
return { name, ...data };
|
|
93
|
+
} catch (e) {
|
|
94
|
+
throw new Error(`Failed to load fixture ${name}: ${e.message}`);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// -----------------------------------------------------------------------
|
|
100
|
+
// Self-invocation (canonical engine)
|
|
101
|
+
// -----------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
function invokeSelf(fixture, tmpWorkspace) {
|
|
104
|
+
const payload = JSON.parse(JSON.stringify(fixture.payload));
|
|
105
|
+
if (tmpWorkspace && payload.cwd === '__TEMP_WORKSPACE__') {
|
|
106
|
+
payload.cwd = tmpWorkspace;
|
|
107
|
+
}
|
|
108
|
+
const input = JSON.stringify(payload);
|
|
109
|
+
const env = Object.assign({}, process.env, fixture.env || {});
|
|
110
|
+
const result = spawnSync(
|
|
111
|
+
process.execPath,
|
|
112
|
+
[RUN_HOOK, fixture.hook_id, fixture.hook_script],
|
|
113
|
+
{ input, encoding: 'utf8', env, timeout: 15000, cwd: process.cwd() }
|
|
114
|
+
);
|
|
115
|
+
return {
|
|
116
|
+
exit_code: result.status,
|
|
117
|
+
stdout: result.stdout || '',
|
|
118
|
+
stderr: result.stderr || '',
|
|
119
|
+
input,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// -----------------------------------------------------------------------
|
|
124
|
+
// Adapter invocation (third-party command)
|
|
125
|
+
// -----------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
function invokeAdapter(adapterCmd, fixture, tmpWorkspace) {
|
|
128
|
+
const payload = JSON.parse(JSON.stringify(fixture.payload));
|
|
129
|
+
if (tmpWorkspace && payload.cwd === '__TEMP_WORKSPACE__') {
|
|
130
|
+
payload.cwd = tmpWorkspace;
|
|
131
|
+
}
|
|
132
|
+
const input = JSON.stringify(payload);
|
|
133
|
+
const env = Object.assign({}, process.env, fixture.env || {});
|
|
134
|
+
const result = spawnSync('sh', ['-c', adapterCmd], {
|
|
135
|
+
input,
|
|
136
|
+
encoding: 'utf8',
|
|
137
|
+
env,
|
|
138
|
+
timeout: 15000,
|
|
139
|
+
cwd: process.cwd(),
|
|
140
|
+
});
|
|
141
|
+
return {
|
|
142
|
+
exit_code: result.status,
|
|
143
|
+
stdout: result.stdout || '',
|
|
144
|
+
stderr: result.stderr || '',
|
|
145
|
+
input,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// -----------------------------------------------------------------------
|
|
150
|
+
// Assertion evaluation
|
|
151
|
+
// -----------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
function evaluate(fixture, actual) {
|
|
154
|
+
const expected = fixture.expected;
|
|
155
|
+
const failures = [];
|
|
156
|
+
|
|
157
|
+
if (typeof expected.exit_code === 'number' && actual.exit_code !== expected.exit_code) {
|
|
158
|
+
failures.push(`exit_code: expected ${expected.exit_code}, got ${actual.exit_code}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (expected.stdout_echoes_input) {
|
|
162
|
+
// stdout should start with (or equal) the input JSON
|
|
163
|
+
const normalized = actual.stdout.trim();
|
|
164
|
+
const inputNorm = actual.input.trim();
|
|
165
|
+
if (!normalized.startsWith(inputNorm) && !normalized.includes(inputNorm.slice(0, 40))) {
|
|
166
|
+
failures.push(`stdout_echoes_input: stdout did not echo input payload`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (expected.stdout_is_empty && actual.stdout.trim()) {
|
|
171
|
+
failures.push(`stdout_is_empty: expected empty stdout, got: ${actual.stdout.slice(0, 60)}`);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (expected.stderr_is_empty && actual.stderr.trim()) {
|
|
175
|
+
failures.push(`stderr_is_empty: expected empty stderr, got: ${actual.stderr.slice(0, 60)}`);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (Array.isArray(expected.stdout_contains)) {
|
|
179
|
+
for (const needle of expected.stdout_contains) {
|
|
180
|
+
if (!actual.stdout.includes(needle)) {
|
|
181
|
+
failures.push(`stdout_contains: missing "${needle}"`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (Array.isArray(expected.stderr_contains)) {
|
|
187
|
+
for (const needle of expected.stderr_contains) {
|
|
188
|
+
if (!actual.stderr.includes(needle)) {
|
|
189
|
+
failures.push(`stderr_contains: missing "${needle}"`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
return failures;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// -----------------------------------------------------------------------
|
|
198
|
+
// Main runner
|
|
199
|
+
// -----------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
function run(argv) {
|
|
202
|
+
const args = parseArgs(argv);
|
|
203
|
+
|
|
204
|
+
if (!args.self && !args.adapterCmd) {
|
|
205
|
+
console.error('Usage: node run-conformance.js --self');
|
|
206
|
+
console.error(' node run-conformance.js --adapter-cmd "<command>"');
|
|
207
|
+
process.exit(2);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const targetLevel = args.level || (args.self ? 'L2' : 'L0');
|
|
211
|
+
if (!LEVEL_ORDER.includes(targetLevel)) {
|
|
212
|
+
console.error(`Unknown conformance level: ${targetLevel}. Use L0, L1, or L2.`);
|
|
213
|
+
process.exit(2);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Load fixtures
|
|
217
|
+
const fixtures = loadFixtures(args.fixturesDir);
|
|
218
|
+
console.log(`\nFlow Agents Conformance Test Runner`);
|
|
219
|
+
console.log(`====================================`);
|
|
220
|
+
console.log(`Mode: ${args.self ? 'self (canonical engine)' : `adapter: ${args.adapterCmd}`}`);
|
|
221
|
+
console.log(`Target: ${targetLevel}`);
|
|
222
|
+
console.log(`Fixtures: ${fixtures.length} loaded from ${args.fixturesDir}`);
|
|
223
|
+
console.log('');
|
|
224
|
+
|
|
225
|
+
const results = [];
|
|
226
|
+
|
|
227
|
+
for (const fixture of fixtures) {
|
|
228
|
+
let tmpWorkspace = null;
|
|
229
|
+
|
|
230
|
+
// Set up workspace if fixture needs one
|
|
231
|
+
if (fixture.workspace_setup) {
|
|
232
|
+
tmpWorkspace = createTempWorkspace(fixture.workspace_setup);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
let actual;
|
|
236
|
+
try {
|
|
237
|
+
actual = args.self
|
|
238
|
+
? invokeSelf(fixture, tmpWorkspace)
|
|
239
|
+
: invokeAdapter(args.adapterCmd, fixture, tmpWorkspace);
|
|
240
|
+
} finally {
|
|
241
|
+
if (tmpWorkspace) cleanupWorkspace(tmpWorkspace);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const failures = evaluate(fixture, actual);
|
|
245
|
+
const passed = failures.length === 0;
|
|
246
|
+
|
|
247
|
+
results.push({ fixture, actual, failures, passed });
|
|
248
|
+
|
|
249
|
+
const icon = passed ? ' PASS' : ' FAIL';
|
|
250
|
+
console.log(`${icon} [${fixture.conformance_level}] ${fixture.name}`);
|
|
251
|
+
if (!passed || args.verbose) {
|
|
252
|
+
console.log(` ${fixture.description}`);
|
|
253
|
+
if (!passed) {
|
|
254
|
+
for (const f of failures) {
|
|
255
|
+
console.log(` * ${f}`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
if (args.verbose) {
|
|
259
|
+
console.log(` exit_code: ${actual.exit_code}`);
|
|
260
|
+
if (actual.stderr.trim()) console.log(` stderr: ${actual.stderr.trim().slice(0, 120)}`);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
console.log('');
|
|
266
|
+
console.log('--- Per-level verdict ---');
|
|
267
|
+
|
|
268
|
+
const levelPassed = {};
|
|
269
|
+
for (const level of LEVEL_ORDER) {
|
|
270
|
+
// Fixtures at this level or below
|
|
271
|
+
const levelFixtures = results.filter(r => {
|
|
272
|
+
const fLevel = r.fixture.conformance_level;
|
|
273
|
+
return LEVEL_ORDER.indexOf(fLevel) <= LEVEL_ORDER.indexOf(level);
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
const requiredPolicies = LEVEL_POLICY_CLASSES[level];
|
|
277
|
+
// For L0 there are no policy fixtures required — only check that no fixture at L0 level fails
|
|
278
|
+
const requiredResults = level === 'L0'
|
|
279
|
+
? levelFixtures.filter(r => r.fixture.conformance_level === 'L0')
|
|
280
|
+
: levelFixtures.filter(r => requiredPolicies.has(r.fixture.policy_class) || r.fixture.conformance_level === 'L0');
|
|
281
|
+
|
|
282
|
+
const allPass = requiredResults.every(r => r.passed);
|
|
283
|
+
levelPassed[level] = allPass;
|
|
284
|
+
|
|
285
|
+
const passCount = requiredResults.filter(r => r.passed).length;
|
|
286
|
+
const icon = allPass ? 'PASS' : 'FAIL';
|
|
287
|
+
console.log(` ${icon} ${level} (${passCount}/${requiredResults.length} required fixtures pass)`);
|
|
288
|
+
|
|
289
|
+
if (!allPass) {
|
|
290
|
+
const failing = requiredResults.filter(r => !r.passed);
|
|
291
|
+
for (const r of failing.slice(0, 5)) {
|
|
292
|
+
console.log(` - ${r.fixture.name}: ${r.failures[0]}`);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
console.log('');
|
|
298
|
+
|
|
299
|
+
// Determine highest satisfied level
|
|
300
|
+
let highestLevel = null;
|
|
301
|
+
for (const level of LEVEL_ORDER) {
|
|
302
|
+
if (levelPassed[level]) highestLevel = level;
|
|
303
|
+
else break;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const totalPass = results.filter(r => r.passed).length;
|
|
307
|
+
const totalFail = results.filter(r => !r.passed).length;
|
|
308
|
+
console.log(`Total: ${totalPass} passed, ${totalFail} failed`);
|
|
309
|
+
console.log(`Highest conformance level achieved: ${highestLevel || 'none'}`);
|
|
310
|
+
|
|
311
|
+
// Exit code: 0 if target level reached, 1 otherwise
|
|
312
|
+
const targetReached = highestLevel !== null && LEVEL_ORDER.indexOf(highestLevel) >= LEVEL_ORDER.indexOf(targetLevel);
|
|
313
|
+
if (targetReached) {
|
|
314
|
+
console.log(`\nSELF-TEST VERDICT: ${targetLevel} PASS — adapter satisfies ${targetLevel} conformance.`);
|
|
315
|
+
process.exit(0);
|
|
316
|
+
} else {
|
|
317
|
+
console.log(`\nSELF-TEST VERDICT: ${targetLevel} FAIL — adapter does not satisfy ${targetLevel} conformance.`);
|
|
318
|
+
process.exit(1);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
run(process.argv.slice(2));
|
package/packaging/manifest.json
CHANGED
|
@@ -168,6 +168,65 @@
|
|
|
168
168
|
"from": "write files",
|
|
169
169
|
"to": "write files"
|
|
170
170
|
}
|
|
171
|
+
],
|
|
172
|
+
"opencode": [
|
|
173
|
+
{
|
|
174
|
+
"from": "Claude Code",
|
|
175
|
+
"to": "opencode"
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
"from": "todo tool",
|
|
179
|
+
"to": "todo tool"
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
"from": "delegate to a specialist agent",
|
|
183
|
+
"to": "delegate to a subagent"
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"from": "run shell commands",
|
|
187
|
+
"to": "run shell commands"
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
"from": "read files",
|
|
191
|
+
"to": "read files"
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
"from": "write files",
|
|
195
|
+
"to": "write files"
|
|
196
|
+
}
|
|
197
|
+
],
|
|
198
|
+
"pi": [
|
|
199
|
+
{
|
|
200
|
+
"from": "Claude Code",
|
|
201
|
+
"to": "pi"
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
"from": "todo tool",
|
|
205
|
+
"to": "todo tool"
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"from": "delegate to a specialist agent",
|
|
209
|
+
"to": "delegate to a subagent"
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"from": "run shell commands",
|
|
213
|
+
"to": "run shell commands"
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
"from": "read files",
|
|
217
|
+
"to": "read files"
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"from": "write files",
|
|
221
|
+
"to": "write files"
|
|
222
|
+
}
|
|
171
223
|
]
|
|
224
|
+
},
|
|
225
|
+
"opencode": {
|
|
226
|
+
"task_dir": ".flow-agents",
|
|
227
|
+
"default_agent_mode": "subagent"
|
|
228
|
+
},
|
|
229
|
+
"pi": {
|
|
230
|
+
"task_dir": ".flow-agents"
|
|
172
231
|
}
|
|
173
232
|
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://flow-agents.dev/schemas/flow-agents-settings.schema.json",
|
|
4
|
+
"title": "Flow Agents Settings",
|
|
5
|
+
"description": "Per-repo policy configuration for Flow Agents features.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["schema_version"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"$schema": { "type": "string" },
|
|
11
|
+
"schema_version": { "const": "1.0" },
|
|
12
|
+
"utteranceCheck": { "$ref": "#/$defs/utteranceCheckPolicy" }
|
|
13
|
+
},
|
|
14
|
+
"$defs": {
|
|
15
|
+
"utteranceCheckPolicy": {
|
|
16
|
+
"type": "object",
|
|
17
|
+
"additionalProperties": false,
|
|
18
|
+
"required": ["enabled"],
|
|
19
|
+
"description": "Policy for utterance evidence coverage checking via @kontourai/survey.",
|
|
20
|
+
"properties": {
|
|
21
|
+
"enabled": {
|
|
22
|
+
"type": "boolean",
|
|
23
|
+
"description": "Whether utterance checking is active for this repo. Default: false."
|
|
24
|
+
},
|
|
25
|
+
"mode": {
|
|
26
|
+
"enum": ["report", "strict"],
|
|
27
|
+
"description": "report = badges only, never blocks. strict = exit 2 on disputed/rejected/unsupported (routes hook back). Default when enabled: report."
|
|
28
|
+
},
|
|
29
|
+
"extractor": {
|
|
30
|
+
"enum": ["reference", "anthropic"],
|
|
31
|
+
"description": "Extractor used to split utterances into factual statements. reference = pattern-based (no API key needed). anthropic = model-backed via @kontourai/survey/anthropic (requires ANTHROPIC_API_KEY). Default: reference."
|
|
32
|
+
},
|
|
33
|
+
"bundlePath": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"description": "Path to a trust bundle JSON file for claim resolution. Relative paths are resolved from the repo root. Omit to use an empty bundle (all statements resolve as unsupported)."
|
|
36
|
+
},
|
|
37
|
+
"model": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"description": "Model identifier for anthropic extractor (e.g. claude-haiku-4-5). Only used when extractor is anthropic."
|
|
40
|
+
},
|
|
41
|
+
"agentId": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"description": "Agent identifier for provenance in the trust report. Default: flow-agents-hook."
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
package/scripts/README.md
CHANGED
|
@@ -54,6 +54,10 @@ renamed, or changes category, update the table and the validator together.
|
|
|
54
54
|
| `run-hook.js` | hook runner | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_goal_fit_hook.sh`, `evals/integration/test_workflow_steering_hook.sh` | Applies profile/disable flags, traversal checks, and hook execution. |
|
|
55
55
|
| `config-protection.js` | policy hook | `evals/integration/test_hook_category_behaviors.sh` | Blocks unsafe runtime config edits. |
|
|
56
56
|
| `governance-audit.sh` | policy hook | `evals/integration/test_hook_category_behaviors.sh`, `evals/integration/test_telemetry.sh` | Emits governance/Veritas audit context when configured. |
|
|
57
|
+
| `opencode-hook-adapter.js` | runtime adapter | `evals/integration/test_bundle_install.sh` | Translates opencode plugin events into the shared hook runner contract. |
|
|
58
|
+
| `opencode-telemetry-hook.js` | telemetry shim | `evals/integration/test_bundle_install.sh` | Captures opencode plugin telemetry and fails open. |
|
|
59
|
+
| `pi-hook-adapter.js` | runtime adapter | `evals/integration/test_bundle_install.sh` | Translates pi extension events into the shared hook runner contract. |
|
|
60
|
+
| `pi-telemetry-hook.js` | telemetry shim | `evals/integration/test_bundle_install.sh` | Captures pi extension telemetry and fails open. |
|
|
57
61
|
| `post-edit-accumulator.js` | policy hook | `evals/integration/test_hook_category_behaviors.sh` | Tracks edited files across a turn for later quality hooks. |
|
|
58
62
|
| `quality-gate.js` | policy hook | `evals/integration/test_hook_category_behaviors.sh` | Runs configured quality checks as hook policy. |
|
|
59
63
|
| `report-only-guard.js` | policy hook | `evals/integration/test_hook_category_behaviors.sh` | Protects report-only specialist roles from production edits. |
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* flow-agents dogfood wrapper.
|
|
4
|
+
*
|
|
5
|
+
* Invokes the dogfood subcommand from init.ts to write hook-wiring artifacts
|
|
6
|
+
* for the specified runtime into the current or target directory.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* node scripts/dogfood.js --runtime claude-code [--dest PATH]
|
|
10
|
+
* npm run dogfood -- --runtime claude-code [--dest PATH]
|
|
11
|
+
*
|
|
12
|
+
* This script is intentionally thin: it imports the built mainDogfood export
|
|
13
|
+
* from build/src/cli/init.js so all logic stays in one place and cannot drift.
|
|
14
|
+
* Run `npm run build` or `npm run build:bundles` first if the build is stale.
|
|
15
|
+
*/
|
|
16
|
+
import("../build/src/cli/init.js").then(({ mainDogfood }) => mainDogfood(process.argv.slice(2)).then((rc) => process.exit(rc)));
|