@fusionkit/ensemble 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +21 -0
- package/dist/agent.js +186 -0
- package/dist/artifacts.d.ts +21 -0
- package/dist/artifacts.js +36 -0
- package/dist/claude-code.d.ts +25 -0
- package/dist/claude-code.js +398 -0
- package/dist/codex.d.ts +69 -0
- package/dist/codex.js +467 -0
- package/dist/command.d.ts +15 -0
- package/dist/command.js +82 -0
- package/dist/dashboard.d.ts +62 -0
- package/dist/dashboard.js +788 -0
- package/dist/external-executor.d.ts +56 -0
- package/dist/external-executor.js +288 -0
- package/dist/harness.d.ts +337 -0
- package/dist/harness.js +1 -0
- package/dist/index.d.ts +30 -0
- package/dist/index.js +15 -0
- package/dist/isolation.d.ts +25 -0
- package/dist/isolation.js +509 -0
- package/dist/judge.d.ts +77 -0
- package/dist/judge.js +16 -0
- package/dist/mock.d.ts +20 -0
- package/dist/mock.js +56 -0
- package/dist/run.d.ts +5 -0
- package/dist/run.js +520 -0
- package/dist/synthesis.d.ts +25 -0
- package/dist/synthesis.js +221 -0
- package/dist/test/codex.test.d.ts +1 -0
- package/dist/test/codex.test.js +237 -0
- package/dist/test/dashboard.test.d.ts +1 -0
- package/dist/test/dashboard.test.js +214 -0
- package/dist/test/ensemble.test.d.ts +1 -0
- package/dist/test/ensemble.test.js +780 -0
- package/dist/test/external-executor.test.d.ts +1 -0
- package/dist/test/external-executor.test.js +273 -0
- package/dist/test/isolation.test.d.ts +1 -0
- package/dist/test/isolation.test.js +359 -0
- package/dist/test/tool-executor.test.d.ts +1 -0
- package/dist/test/tool-executor.test.js +113 -0
- package/dist/test/unified.test.d.ts +1 -0
- package/dist/test/unified.test.js +150 -0
- package/dist/tool-executor.d.ts +14 -0
- package/dist/tool-executor.js +156 -0
- package/dist/trace.d.ts +8 -0
- package/dist/trace.js +7 -0
- package/dist/unified.d.ts +101 -0
- package/dist/unified.js +422 -0
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +75 -0
- package/package.json +35 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { test } from "node:test";
|
|
6
|
+
import { assertToolExecutionRecordV1 } from "@fusionkit/protocol";
|
|
7
|
+
import { createToolExecutor, registerDemoTools } from "../tool-executor.js";
|
|
8
|
+
import { createMockHarness } from "../mock.js";
|
|
9
|
+
import { runEnsemble } from "../run.js";
|
|
10
|
+
function contract(overrides = {}) {
|
|
11
|
+
return {
|
|
12
|
+
executor_id: "exec_demo",
|
|
13
|
+
mode: "demo_safe",
|
|
14
|
+
environment_id: "env_a",
|
|
15
|
+
tool_policy_id: "policy_read",
|
|
16
|
+
allowed_tools: ["read_file", "echo"],
|
|
17
|
+
side_effects: ["none", "read"],
|
|
18
|
+
limits: { timeoutMs: 1000 },
|
|
19
|
+
audit_sink: "memory",
|
|
20
|
+
...overrides
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function repoFixture() {
|
|
24
|
+
const root = mkdtempSync(join(tmpdir(), "tool-executor-"));
|
|
25
|
+
const repo = join(root, "repo");
|
|
26
|
+
mkdirSync(repo);
|
|
27
|
+
writeFileSync(join(repo, "README.md"), "hello tools\n");
|
|
28
|
+
return { repo, cleanup: () => rmSync(root, { recursive: true, force: true }) };
|
|
29
|
+
}
|
|
30
|
+
test("read-only duplicate calls dedupe only under matching policy and environment", async () => {
|
|
31
|
+
const fixture = repoFixture();
|
|
32
|
+
try {
|
|
33
|
+
const executor = createToolExecutor(contract());
|
|
34
|
+
registerDemoTools(executor, fixture.repo);
|
|
35
|
+
const request = {
|
|
36
|
+
tool_name: "read_file",
|
|
37
|
+
arguments: { path: "README.md" },
|
|
38
|
+
side_effects: "read"
|
|
39
|
+
};
|
|
40
|
+
const first = await executor.execute(request);
|
|
41
|
+
const second = await executor.execute(request);
|
|
42
|
+
assert.equal(first.deduped, false);
|
|
43
|
+
assert.equal(second.deduped, true);
|
|
44
|
+
assert.equal(second.record.execution_id, first.record.execution_id);
|
|
45
|
+
const other = createToolExecutor(contract({ environment_id: "env_b" }));
|
|
46
|
+
registerDemoTools(other, fixture.repo);
|
|
47
|
+
const third = await other.execute(request);
|
|
48
|
+
assert.notEqual(third.record.execution_id, first.record.execution_id);
|
|
49
|
+
}
|
|
50
|
+
finally {
|
|
51
|
+
fixture.cleanup();
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
test("write and external calls are denied by default", async () => {
|
|
55
|
+
const executor = createToolExecutor(contract({ allowed_tools: ["write_file", "fetch"] }));
|
|
56
|
+
const write = await executor.execute({
|
|
57
|
+
tool_name: "write_file",
|
|
58
|
+
arguments: { path: "README.md" },
|
|
59
|
+
side_effects: "write"
|
|
60
|
+
});
|
|
61
|
+
const external = await executor.execute({
|
|
62
|
+
tool_name: "fetch",
|
|
63
|
+
arguments: { url: "https://example.com" },
|
|
64
|
+
side_effects: "external"
|
|
65
|
+
});
|
|
66
|
+
assert.equal(write.record.status, "failed");
|
|
67
|
+
assert.equal(write.record.error?.kind, "tool_denied");
|
|
68
|
+
assert.equal(external.record.status, "failed");
|
|
69
|
+
assert.equal(external.record.error?.kind, "tool_denied");
|
|
70
|
+
});
|
|
71
|
+
test("allowed read-only tools emit valid tool-execution-record.v1", async () => {
|
|
72
|
+
const fixture = repoFixture();
|
|
73
|
+
try {
|
|
74
|
+
const executor = createToolExecutor(contract());
|
|
75
|
+
registerDemoTools(executor, fixture.repo);
|
|
76
|
+
const result = await executor.execute({
|
|
77
|
+
tool_name: "read_file",
|
|
78
|
+
arguments: { path: "README.md" },
|
|
79
|
+
side_effects: "read"
|
|
80
|
+
});
|
|
81
|
+
assertToolExecutionRecordV1(result.record);
|
|
82
|
+
assert.equal(result.record.status, "succeeded");
|
|
83
|
+
assert.ok(result.record.output_hash?.startsWith("sha256:"));
|
|
84
|
+
}
|
|
85
|
+
finally {
|
|
86
|
+
fixture.cleanup();
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
test("candidate summaries include tool execution ids", async () => {
|
|
90
|
+
const toolRecord = {
|
|
91
|
+
execution_id: "exec_candidate_read",
|
|
92
|
+
plan_id: "plan_candidate_read",
|
|
93
|
+
status: "succeeded",
|
|
94
|
+
output_hash: "sha256:" + "a".repeat(64)
|
|
95
|
+
};
|
|
96
|
+
const result = await runEnsemble({
|
|
97
|
+
id: "tool_summary",
|
|
98
|
+
harness: createMockHarness({
|
|
99
|
+
candidates: {
|
|
100
|
+
fast: { toolRecords: [toolRecord] }
|
|
101
|
+
}
|
|
102
|
+
}),
|
|
103
|
+
models: [{ id: "fast", model: "fake-fast" }],
|
|
104
|
+
runtime: { id: "local" },
|
|
105
|
+
judge: { id: "none" },
|
|
106
|
+
policy: { id: "policy", allowedTools: ["read_file"], sideEffects: "read_only" },
|
|
107
|
+
prompt: "tool summary",
|
|
108
|
+
sourceRepo: "handoffkit",
|
|
109
|
+
baseGitSha: "a".repeat(40)
|
|
110
|
+
});
|
|
111
|
+
assert.equal(result.toolRecords[0]?.execution_id, "exec_candidate_read");
|
|
112
|
+
assert.deepEqual(result.summary?.candidates[0]?.toolExecutionIds, ["exec_candidate_read"]);
|
|
113
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { createServer } from "node:http";
|
|
3
|
+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { spawnSync } from "node:child_process";
|
|
7
|
+
import { test } from "node:test";
|
|
8
|
+
import { runUnifiedHarnessE2E } from "../unified.js";
|
|
9
|
+
async function readBody(req) {
|
|
10
|
+
const chunks = [];
|
|
11
|
+
for await (const chunk of req)
|
|
12
|
+
chunks.push(chunk);
|
|
13
|
+
return Buffer.concat(chunks);
|
|
14
|
+
}
|
|
15
|
+
async function closeServer(server) {
|
|
16
|
+
await new Promise((resolve, reject) => {
|
|
17
|
+
server.close((error) => (error ? reject(error) : resolve()));
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
async function startFusionBackend() {
|
|
21
|
+
const models = [];
|
|
22
|
+
const server = createServer((req, res) => {
|
|
23
|
+
void (async () => {
|
|
24
|
+
if (req.method !== "POST" || req.url !== "/v1/chat/completions") {
|
|
25
|
+
res.writeHead(404).end();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
const body = JSON.parse((await readBody(req)).toString("utf8"));
|
|
29
|
+
const model = body.model ?? "unknown";
|
|
30
|
+
models.push(model);
|
|
31
|
+
const system = body.messages?.find((message) => message.role === "system")?.content ?? "";
|
|
32
|
+
const content = system.includes("synthesize coding harness candidate evidence")
|
|
33
|
+
? `JUDGE_FINAL:${model}`
|
|
34
|
+
: `MODEL_REPLY:${model}`;
|
|
35
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
36
|
+
res.end(JSON.stringify({
|
|
37
|
+
choices: [{ message: { role: "assistant", content } }]
|
|
38
|
+
}));
|
|
39
|
+
})().catch((error) => {
|
|
40
|
+
res.writeHead(500, { "content-type": "application/json" });
|
|
41
|
+
res.end(JSON.stringify({ error: String(error) }));
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
await new Promise((resolve, reject) => {
|
|
45
|
+
server.once("error", reject);
|
|
46
|
+
server.listen(0, "127.0.0.1", () => {
|
|
47
|
+
server.off("error", reject);
|
|
48
|
+
resolve();
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
const address = server.address();
|
|
52
|
+
assert.ok(typeof address === "object" && address !== null);
|
|
53
|
+
return {
|
|
54
|
+
url: `http://127.0.0.1:${address.port}`,
|
|
55
|
+
models,
|
|
56
|
+
close: () => closeServer(server)
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function makeRepo() {
|
|
60
|
+
const root = mkdtempSync(join(tmpdir(), "unified-harness-e2e-"));
|
|
61
|
+
const repo = join(root, "repo");
|
|
62
|
+
const outputRoot = join(root, "out");
|
|
63
|
+
mkdirSync(repo);
|
|
64
|
+
spawnSync("git", ["init", "--quiet", "--initial-branch=main"], { cwd: repo });
|
|
65
|
+
spawnSync("git", ["config", "user.email", "unified@warrant.local"], { cwd: repo });
|
|
66
|
+
spawnSync("git", ["config", "user.name", "unified"], { cwd: repo });
|
|
67
|
+
writeFileSync(join(repo, "README.md"), "# unified fixture\n");
|
|
68
|
+
writeFileSync(join(repo, "candidate.js"), [
|
|
69
|
+
"const fs = require('node:fs');",
|
|
70
|
+
"(async () => {",
|
|
71
|
+
" const response = await fetch(process.env.FUSIONKIT_CHAT_COMPLETIONS_URL, {",
|
|
72
|
+
" method: 'POST',",
|
|
73
|
+
" headers: { 'content-type': 'application/json' },",
|
|
74
|
+
" body: JSON.stringify({",
|
|
75
|
+
" model: process.env.FUSIONKIT_MODEL,",
|
|
76
|
+
" messages: [{ role: 'user', content: 'candidate probe' }]",
|
|
77
|
+
" })",
|
|
78
|
+
" });",
|
|
79
|
+
" const body = await response.json();",
|
|
80
|
+
" fs.writeFileSync(`result-${process.env.HARNESS_MODEL_ID}.txt`, body.choices[0].message.content);",
|
|
81
|
+
" console.log(`MODEL_OK:${process.env.HARNESS_MODEL_ID}`);",
|
|
82
|
+
"})().catch((error) => { console.error(error); process.exit(1); });",
|
|
83
|
+
""
|
|
84
|
+
].join("\n"));
|
|
85
|
+
spawnSync("git", ["add", "-A"], { cwd: repo });
|
|
86
|
+
spawnSync("git", ["commit", "--quiet", "-m", "init"], { cwd: repo });
|
|
87
|
+
return { root, repo, outputRoot, cleanup: () => rmSync(root, { recursive: true, force: true }) };
|
|
88
|
+
}
|
|
89
|
+
test("unified runner routes each command candidate through FusionKit and synthesizes", async () => {
|
|
90
|
+
const fixture = makeRepo();
|
|
91
|
+
const backend = await startFusionBackend();
|
|
92
|
+
try {
|
|
93
|
+
const result = await runUnifiedHarnessE2E({
|
|
94
|
+
id: "unified_test",
|
|
95
|
+
fusionBackendUrl: backend.url,
|
|
96
|
+
repo: fixture.repo,
|
|
97
|
+
outputRoot: fixture.outputRoot,
|
|
98
|
+
prompt: "Run the candidate script and synthesize the result.",
|
|
99
|
+
harnesses: ["command"],
|
|
100
|
+
models: [
|
|
101
|
+
{ id: "alpha", model: "fusion-alpha" },
|
|
102
|
+
{ id: "beta", model: "fusion-beta" }
|
|
103
|
+
],
|
|
104
|
+
judgeModel: "fusion-judge",
|
|
105
|
+
command: "node candidate.js",
|
|
106
|
+
timeoutMs: 10_000
|
|
107
|
+
});
|
|
108
|
+
const row = result.results[0];
|
|
109
|
+
assert.equal(row?.status, "succeeded");
|
|
110
|
+
assert.equal(row?.ensemble?.candidates.length, 2);
|
|
111
|
+
assert.equal(row?.ensemble?.judgeSynthesisRecord?.final_output, "JUDGE_FINAL:fusion-judge");
|
|
112
|
+
assert.ok(row?.ensemble?.artifacts.some((artifact) => artifact.kind === "patch"));
|
|
113
|
+
assert.deepEqual(backend.models.sort(), ["fusion-alpha", "fusion-beta", "fusion-judge"]);
|
|
114
|
+
assert.ok(result.reportPath?.endsWith("unified-e2e-report.json"));
|
|
115
|
+
}
|
|
116
|
+
finally {
|
|
117
|
+
await backend.close();
|
|
118
|
+
fixture.cleanup();
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
test("unified runner includes Cursor ACP and desktop adapter results", async () => {
|
|
122
|
+
const fixture = makeRepo();
|
|
123
|
+
try {
|
|
124
|
+
const seen = [];
|
|
125
|
+
const result = await runUnifiedHarnessE2E({
|
|
126
|
+
id: "unified_cursor_test",
|
|
127
|
+
fusionBackendUrl: "http://127.0.0.1:9999",
|
|
128
|
+
repo: fixture.repo,
|
|
129
|
+
outputRoot: fixture.outputRoot,
|
|
130
|
+
prompt: "Run Cursor probes.",
|
|
131
|
+
harnesses: ["cursor-acp", "cursor-desktop"],
|
|
132
|
+
models: [{ id: "cursor-local", model: "local-model" }],
|
|
133
|
+
cursorRunner: async (input) => {
|
|
134
|
+
seen.push(input);
|
|
135
|
+
return {
|
|
136
|
+
status: "succeeded",
|
|
137
|
+
message: `${input.kind} ok`,
|
|
138
|
+
artifacts: { report: join(input.outDir, "report.json") },
|
|
139
|
+
details: { model: input.model.id }
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
assert.deepEqual(result.results.map((row) => row.harness), ["cursor-acp", "cursor-desktop"]);
|
|
144
|
+
assert.deepEqual(result.results.map((row) => row.status), ["succeeded", "succeeded"]);
|
|
145
|
+
assert.deepEqual(seen.map((input) => input.kind), ["cursor-acp", "cursor-desktop"]);
|
|
146
|
+
}
|
|
147
|
+
finally {
|
|
148
|
+
fixture.cleanup();
|
|
149
|
+
}
|
|
150
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { modelFusionSideEffects } from "@fusionkit/protocol";
|
|
2
|
+
import type { JsonValue, ToolDefinition, ToolExecutionRequest, ToolExecutionResult, ToolExecutorContract, ToolSideEffectClass } from "@fusionkit/protocol";
|
|
3
|
+
export type ToolImplementation = {
|
|
4
|
+
definition: ToolDefinition;
|
|
5
|
+
execute(args: JsonValue): Promise<JsonValue> | JsonValue;
|
|
6
|
+
};
|
|
7
|
+
export type ToolExecutor = {
|
|
8
|
+
contract: ToolExecutorContract;
|
|
9
|
+
register(tool: ToolImplementation): void;
|
|
10
|
+
execute(request: ToolExecutionRequest): Promise<ToolExecutionResult>;
|
|
11
|
+
};
|
|
12
|
+
export declare function createToolExecutor(contract: ToolExecutorContract): ToolExecutor;
|
|
13
|
+
export declare function registerDemoTools(executor: ToolExecutor, workspace: string): void;
|
|
14
|
+
export declare function sideEffectsForTool(sideEffects: ToolSideEffectClass): ReturnType<typeof modelFusionSideEffects>;
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { artifactHash, assertToolExecutionRecordV1, evaluateToolPolicy, MODEL_FUSION_SCHEMA_BUNDLE_HASH, modelFusionSideEffects, toolArgumentsHash, toolCallKey } from "@fusionkit/protocol";
|
|
3
|
+
import { resolveInsideWorkspace } from "@fusionkit/workspace";
|
|
4
|
+
function metadata(createdAt) {
|
|
5
|
+
return {
|
|
6
|
+
schema: "tool-execution-record.v1",
|
|
7
|
+
schema_version: "v1",
|
|
8
|
+
schema_bundle_hash: MODEL_FUSION_SCHEMA_BUNDLE_HASH,
|
|
9
|
+
producer: "handoffkit-ensemble",
|
|
10
|
+
producer_version: "0.1.0",
|
|
11
|
+
producer_git_sha: "0".repeat(40),
|
|
12
|
+
created_at: createdAt
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function asObject(value) {
|
|
16
|
+
return typeof value === "object" && value !== null && !Array.isArray(value)
|
|
17
|
+
? value
|
|
18
|
+
: {};
|
|
19
|
+
}
|
|
20
|
+
function executionRecord(input) {
|
|
21
|
+
const argumentsHash = toolArgumentsHash(input.request.arguments);
|
|
22
|
+
const record = {
|
|
23
|
+
...metadata(input.createdAt),
|
|
24
|
+
execution_id: `tool_exec_${toolCallKey({
|
|
25
|
+
contract: input.contract,
|
|
26
|
+
request: input.request
|
|
27
|
+
}).slice(0, 16)}`,
|
|
28
|
+
plan_id: input.request.plan_id ??
|
|
29
|
+
`tool_plan_${argumentsHash.slice("sha256:".length, "sha256:".length + 16)}`,
|
|
30
|
+
status: input.status,
|
|
31
|
+
...(input.output !== undefined ? { output_hash: artifactHash(JSON.stringify(input.output)) } : {}),
|
|
32
|
+
...(input.status !== "succeeded"
|
|
33
|
+
? {
|
|
34
|
+
error: {
|
|
35
|
+
kind: input.decision.decision === "deny" ? input.decision.errorKind : "internal_error",
|
|
36
|
+
message: input.decision.reason,
|
|
37
|
+
retryable: false
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
: {})
|
|
41
|
+
};
|
|
42
|
+
assertToolExecutionRecordV1(record);
|
|
43
|
+
return record;
|
|
44
|
+
}
|
|
45
|
+
export function createToolExecutor(contract) {
|
|
46
|
+
const tools = new Map();
|
|
47
|
+
const dedupe = new Map();
|
|
48
|
+
return {
|
|
49
|
+
contract,
|
|
50
|
+
register(tool) {
|
|
51
|
+
tools.set(tool.definition.tool_name, tool);
|
|
52
|
+
},
|
|
53
|
+
async execute(request) {
|
|
54
|
+
const decision = evaluateToolPolicy(contract, request);
|
|
55
|
+
const createdAt = new Date().toISOString();
|
|
56
|
+
if (decision.decision === "deny") {
|
|
57
|
+
return {
|
|
58
|
+
record: executionRecord({
|
|
59
|
+
contract,
|
|
60
|
+
request,
|
|
61
|
+
status: "failed",
|
|
62
|
+
decision,
|
|
63
|
+
createdAt
|
|
64
|
+
}),
|
|
65
|
+
deduped: false,
|
|
66
|
+
decision
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
if (decision.dedupeKey !== undefined && dedupe.has(decision.dedupeKey)) {
|
|
70
|
+
const cached = dedupe.get(decision.dedupeKey);
|
|
71
|
+
if (cached !== undefined) {
|
|
72
|
+
return {
|
|
73
|
+
record: executionRecord({
|
|
74
|
+
contract,
|
|
75
|
+
request,
|
|
76
|
+
status: cached.record.status,
|
|
77
|
+
output: cached.output,
|
|
78
|
+
decision,
|
|
79
|
+
createdAt
|
|
80
|
+
}),
|
|
81
|
+
...(cached.output !== undefined ? { output: cached.output } : {}),
|
|
82
|
+
deduped: true,
|
|
83
|
+
decision
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const tool = tools.get(request.tool_name);
|
|
88
|
+
if (tool === undefined) {
|
|
89
|
+
const denied = {
|
|
90
|
+
decision: "deny",
|
|
91
|
+
reason: `tool ${request.tool_name} is not registered`,
|
|
92
|
+
errorKind: "capability_missing"
|
|
93
|
+
};
|
|
94
|
+
return {
|
|
95
|
+
record: executionRecord({
|
|
96
|
+
contract,
|
|
97
|
+
request,
|
|
98
|
+
status: "unsupported",
|
|
99
|
+
decision: denied,
|
|
100
|
+
createdAt
|
|
101
|
+
}),
|
|
102
|
+
deduped: false,
|
|
103
|
+
decision: denied
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
const output = await tool.execute(request.arguments);
|
|
107
|
+
const result = {
|
|
108
|
+
record: executionRecord({
|
|
109
|
+
contract,
|
|
110
|
+
request,
|
|
111
|
+
status: "succeeded",
|
|
112
|
+
output,
|
|
113
|
+
decision,
|
|
114
|
+
createdAt
|
|
115
|
+
}),
|
|
116
|
+
output,
|
|
117
|
+
deduped: false,
|
|
118
|
+
decision
|
|
119
|
+
};
|
|
120
|
+
if (decision.dedupeKey !== undefined)
|
|
121
|
+
dedupe.set(decision.dedupeKey, result);
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
export function registerDemoTools(executor, workspace) {
|
|
127
|
+
executor.register({
|
|
128
|
+
definition: {
|
|
129
|
+
tool_name: "read_file",
|
|
130
|
+
side_effects: "read",
|
|
131
|
+
description: "Read a workspace-relative file."
|
|
132
|
+
},
|
|
133
|
+
execute(args) {
|
|
134
|
+
const path = asObject(args).path;
|
|
135
|
+
if (typeof path !== "string")
|
|
136
|
+
throw new Error("read_file requires path");
|
|
137
|
+
return {
|
|
138
|
+
path,
|
|
139
|
+
content: readFileSync(resolveInsideWorkspace(workspace, path), "utf8")
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
executor.register({
|
|
144
|
+
definition: {
|
|
145
|
+
tool_name: "echo",
|
|
146
|
+
side_effects: "none",
|
|
147
|
+
description: "Echo a JSON-safe value."
|
|
148
|
+
},
|
|
149
|
+
execute(args) {
|
|
150
|
+
return args;
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
export function sideEffectsForTool(sideEffects) {
|
|
155
|
+
return modelFusionSideEffects(sideEffects);
|
|
156
|
+
}
|
package/dist/trace.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ensemble-facing surface for the fusion-trace emitter. The canonical
|
|
3
|
+
* implementation lives in `@fusionkit/protocol` (a dependency-free leaf) so the
|
|
4
|
+
* gateway, the AI SDK worktree agent, and the CLI can share it without import
|
|
5
|
+
* cycles; this module simply re-exports it for ensemble call sites.
|
|
6
|
+
*/
|
|
7
|
+
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "@fusionkit/protocol";
|
|
8
|
+
export type { EmitInput, FusionTraceComponent, FusionTraceEvent, FusionTraceEventType } from "@fusionkit/protocol";
|
package/dist/trace.js
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ensemble-facing surface for the fusion-trace emitter. The canonical
|
|
3
|
+
* implementation lives in `@fusionkit/protocol` (a dependency-free leaf) so the
|
|
4
|
+
* gateway, the AI SDK worktree agent, and the CLI can share it without import
|
|
5
|
+
* cycles; this module simply re-exports it for ensemble call sites.
|
|
6
|
+
*/
|
|
7
|
+
export { ambientTraceId, emitTrace, getTraceEmitter, newSpanId, newTraceId, TRACE_CANDIDATE_HEADER, TRACE_ID_HEADER, TRACE_PARENT_SPAN_HEADER, TRACE_SPAN_HEADER, TraceEmitter } from "@fusionkit/protocol";
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import type { JsonValue, ModelFusionStatus } from "@fusionkit/protocol";
|
|
2
|
+
import type { EnsembleModel, EnsembleRunResult } from "./harness.js";
|
|
3
|
+
import type { JudgeSynthesizer } from "./judge.js";
|
|
4
|
+
export type UnifiedHarnessKind = "mock" | "command" | "agent" | "codex" | "claude-code" | "cursor-acp" | "cursor-desktop";
|
|
5
|
+
export type UnifiedHarnessMatrixResult = {
|
|
6
|
+
harness: UnifiedHarnessKind;
|
|
7
|
+
modelIds: string[];
|
|
8
|
+
status: ModelFusionStatus;
|
|
9
|
+
message: string;
|
|
10
|
+
ensemble?: EnsembleRunResult;
|
|
11
|
+
artifacts: Record<string, string>;
|
|
12
|
+
details: Record<string, JsonValue>;
|
|
13
|
+
};
|
|
14
|
+
export type UnifiedHarnessE2EResult = {
|
|
15
|
+
id: string;
|
|
16
|
+
generatedAt: string;
|
|
17
|
+
fusionBackendUrl: string;
|
|
18
|
+
repo: string;
|
|
19
|
+
results: UnifiedHarnessMatrixResult[];
|
|
20
|
+
reportPath?: string;
|
|
21
|
+
};
|
|
22
|
+
export type CursorHarnessRunnerInput = {
|
|
23
|
+
kind: Extract<UnifiedHarnessKind, "cursor-acp" | "cursor-desktop">;
|
|
24
|
+
model: EnsembleModel;
|
|
25
|
+
fusionBackendUrl: string;
|
|
26
|
+
repo: string;
|
|
27
|
+
outDir: string;
|
|
28
|
+
timeoutMs?: number;
|
|
29
|
+
cursorKitDir?: string;
|
|
30
|
+
};
|
|
31
|
+
export type CursorHarnessRunnerResult = {
|
|
32
|
+
status: ModelFusionStatus;
|
|
33
|
+
message: string;
|
|
34
|
+
artifacts?: Record<string, string>;
|
|
35
|
+
details?: Record<string, JsonValue>;
|
|
36
|
+
};
|
|
37
|
+
export type UnifiedHarnessE2EOptions = {
|
|
38
|
+
id?: string;
|
|
39
|
+
fusionBackendUrl: string;
|
|
40
|
+
fusionApiKey?: string;
|
|
41
|
+
repo: string;
|
|
42
|
+
outputRoot: string;
|
|
43
|
+
prompt: string;
|
|
44
|
+
harnesses: UnifiedHarnessKind[];
|
|
45
|
+
models: EnsembleModel[];
|
|
46
|
+
command?: string;
|
|
47
|
+
timeoutMs?: number;
|
|
48
|
+
judgeModel?: string;
|
|
49
|
+
cursorKitDir?: string;
|
|
50
|
+
cursorRunner?: (input: CursorHarnessRunnerInput) => Promise<CursorHarnessRunnerResult>;
|
|
51
|
+
/**
|
|
52
|
+
* Per-candidate model backend URLs keyed by `EnsembleModel.id`. When a
|
|
53
|
+
* candidate's model id is present, its command harness is pointed at that
|
|
54
|
+
* endpoint instead of the shared `fusionBackendUrl`, so each panel model can
|
|
55
|
+
* back its own real candidate (e.g. a local MLX trio).
|
|
56
|
+
*/
|
|
57
|
+
modelEndpoints?: Record<string, string>;
|
|
58
|
+
/**
|
|
59
|
+
* Observability correlation id. When set, the agent harness, panel-model
|
|
60
|
+
* calls, and the FusionKit trajectory synthesis are all tagged with this
|
|
61
|
+
* trace so the companion app can reconstruct one session.
|
|
62
|
+
*/
|
|
63
|
+
traceId?: string;
|
|
64
|
+
/** Session root span; panel candidate spans parent under it. */
|
|
65
|
+
parentSpanId?: string;
|
|
66
|
+
/** User-turn index this panel run belongs to (stamped on candidate events). */
|
|
67
|
+
turn?: number;
|
|
68
|
+
};
|
|
69
|
+
export declare function createFusionKitJudgeSynthesizer(input: {
|
|
70
|
+
fusionBackendUrl: string;
|
|
71
|
+
model: string;
|
|
72
|
+
apiKey?: string;
|
|
73
|
+
responseShape: string;
|
|
74
|
+
traceId?: string;
|
|
75
|
+
}): JudgeSynthesizer;
|
|
76
|
+
export type FusionPanelOptions = {
|
|
77
|
+
id?: string;
|
|
78
|
+
repo: string;
|
|
79
|
+
outputRoot: string;
|
|
80
|
+
prompt: string;
|
|
81
|
+
models: EnsembleModel[];
|
|
82
|
+
modelEndpoints?: Record<string, string>;
|
|
83
|
+
/** Fallback agent backend URL for models without a dedicated endpoint. */
|
|
84
|
+
fusionBackendUrl: string;
|
|
85
|
+
fusionApiKey?: string;
|
|
86
|
+
timeoutMs?: number;
|
|
87
|
+
traceId?: string;
|
|
88
|
+
/** Session root span so panel candidate spans nest under the session. */
|
|
89
|
+
parentSpanId?: string;
|
|
90
|
+
/** User-turn index this panel run belongs to (for per-turn grouping). */
|
|
91
|
+
turn?: number;
|
|
92
|
+
};
|
|
93
|
+
/**
|
|
94
|
+
* Run the panel once: each panel model executes the task as a real coding agent
|
|
95
|
+
* in its own git worktree, and we capture the resulting trajectories (the
|
|
96
|
+
* candidate reference solutions the judge fuses). This reuses the full agent
|
|
97
|
+
* harness via `runEnsemble` with a capturing judge — no fusion/synthesis call is
|
|
98
|
+
* made here; the trajectories are the product.
|
|
99
|
+
*/
|
|
100
|
+
export declare function runFusionPanels(options: FusionPanelOptions): Promise<Record<string, unknown>[]>;
|
|
101
|
+
export declare function runUnifiedHarnessE2E(options: UnifiedHarnessE2EOptions): Promise<UnifiedHarnessE2EResult>;
|