@fusionkit/adapter-ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +23 -0
- package/dist/index.js +17 -0
- package/dist/managed-server.d.ts +102 -0
- package/dist/managed-server.js +348 -0
- package/dist/mlx-env.d.ts +178 -0
- package/dist/mlx-env.js +371 -0
- package/dist/model.d.ts +88 -0
- package/dist/model.js +149 -0
- package/dist/remote-tools.d.ts +56 -0
- package/dist/remote-tools.js +57 -0
- package/dist/routed-model.d.ts +88 -0
- package/dist/routed-model.js +218 -0
- package/dist/swarm-tools.d.ts +149 -0
- package/dist/swarm-tools.js +324 -0
- package/dist/test/golden.test.d.ts +1 -0
- package/dist/test/golden.test.js +129 -0
- package/dist/test/managed-server.test.d.ts +1 -0
- package/dist/test/managed-server.test.js +198 -0
- package/dist/test/mlx-env.test.d.ts +1 -0
- package/dist/test/mlx-env.test.js +351 -0
- package/dist/test/model.test.d.ts +1 -0
- package/dist/test/model.test.js +110 -0
- package/dist/test/remote-tools.test.d.ts +1 -0
- package/dist/test/remote-tools.test.js +151 -0
- package/dist/test/routed-model.test.d.ts +1 -0
- package/dist/test/routed-model.test.js +223 -0
- package/dist/test/swarm-tools.test.d.ts +1 -0
- package/dist/test/swarm-tools.test.js +157 -0
- package/dist/worktree-agent.d.ts +53 -0
- package/dist/worktree-agent.js +303 -0
- package/package.json +39 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { generateText } from "ai";
|
|
4
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
5
|
+
import { loadRouterCard, routedModel, withRoutedModel } from "../routed-model.js";
|
|
6
|
+
const usage = {
|
|
7
|
+
inputTokens: {
|
|
8
|
+
total: 1,
|
|
9
|
+
noCache: 1,
|
|
10
|
+
cacheRead: undefined,
|
|
11
|
+
cacheWrite: undefined
|
|
12
|
+
},
|
|
13
|
+
outputTokens: { total: 1, text: 1, reasoning: undefined }
|
|
14
|
+
};
|
|
15
|
+
function textModel(id, text) {
|
|
16
|
+
return new MockLanguageModelV3({
|
|
17
|
+
modelId: id,
|
|
18
|
+
doGenerate: async () => ({
|
|
19
|
+
content: [{ type: "text", text }],
|
|
20
|
+
finishReason: { unified: "stop", raw: "stop" },
|
|
21
|
+
usage,
|
|
22
|
+
warnings: []
|
|
23
|
+
})
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
function failingModel(id, message) {
|
|
27
|
+
return new MockLanguageModelV3({
|
|
28
|
+
modelId: id,
|
|
29
|
+
doGenerate: async () => {
|
|
30
|
+
throw new Error(message);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
// Two clusters in a 2-d embedding space: axis 0 = "math", axis 1 = "code".
|
|
35
|
+
// math-llm aces cluster 0 and fails cluster 1; code-llm mirrors; generalist
|
|
36
|
+
// is mediocre everywhere but cheap.
|
|
37
|
+
const CARD = loadRouterCard({
|
|
38
|
+
version: "uniroute.router.v1",
|
|
39
|
+
embedder: { model: "fake-embedder", dims: 2 },
|
|
40
|
+
lambda: 0,
|
|
41
|
+
assignment: {
|
|
42
|
+
type: "centroids",
|
|
43
|
+
centroids: [
|
|
44
|
+
[1, 0],
|
|
45
|
+
[0, 1]
|
|
46
|
+
]
|
|
47
|
+
},
|
|
48
|
+
models: [
|
|
49
|
+
{ id: "math-llm", psi: [0.05, 0.9], cost: 2 },
|
|
50
|
+
{ id: "code-llm", psi: [0.9, 0.05], cost: 2 },
|
|
51
|
+
{ id: "generalist", psi: [0.4, 0.4], cost: 0.1 }
|
|
52
|
+
]
|
|
53
|
+
});
|
|
54
|
+
/** The fake embedder the card was "fitted" with: keyword axes. */
|
|
55
|
+
async function embed(text) {
|
|
56
|
+
return [text.includes("math") ? 1 : 0, text.includes("code") ? 1 : 0];
|
|
57
|
+
}
|
|
58
|
+
test("routes each prompt to its cluster specialist", async () => {
|
|
59
|
+
const decisions = [];
|
|
60
|
+
const model = routedModel({
|
|
61
|
+
card: CARD,
|
|
62
|
+
candidates: {
|
|
63
|
+
"math-llm": textModel("math-llm", "from math"),
|
|
64
|
+
"code-llm": textModel("code-llm", "from code"),
|
|
65
|
+
generalist: textModel("generalist", "from generalist")
|
|
66
|
+
},
|
|
67
|
+
embed,
|
|
68
|
+
onDecision: (d) => decisions.push(d)
|
|
69
|
+
});
|
|
70
|
+
const math = await generateText({ model, prompt: "math: integrate x^2" });
|
|
71
|
+
assert.equal(math.text, "from math");
|
|
72
|
+
const code = await generateText({ model, prompt: "code: write a loop" });
|
|
73
|
+
assert.equal(code.text, "from code");
|
|
74
|
+
assert.equal(decisions.length, 2);
|
|
75
|
+
assert.deepEqual(decisions.map((d) => d.model), ["math-llm", "code-llm"]);
|
|
76
|
+
assert.equal(decisions[0]?.fallback, false);
|
|
77
|
+
assert.ok(Math.abs((decisions[0]?.predictedError ?? 0) - 0.05) < 1e-9);
|
|
78
|
+
});
|
|
79
|
+
test("lambda trades quality for cost: large lambda picks the cheap generalist", async () => {
|
|
80
|
+
const model = routedModel({
|
|
81
|
+
card: CARD,
|
|
82
|
+
candidates: {
|
|
83
|
+
"math-llm": textModel("math-llm", "from math"),
|
|
84
|
+
"code-llm": textModel("code-llm", "from code"),
|
|
85
|
+
generalist: textModel("generalist", "from generalist")
|
|
86
|
+
},
|
|
87
|
+
embed,
|
|
88
|
+
lambda: 1 // 1 * cost(2) overwhelms any gamma difference in [0,1]
|
|
89
|
+
});
|
|
90
|
+
const result = await generateText({ model, prompt: "math: integrate x^2" });
|
|
91
|
+
assert.equal(result.text, "from generalist");
|
|
92
|
+
});
|
|
93
|
+
test("falls back to the next-best candidate when the chosen one fails", async () => {
|
|
94
|
+
const decisions = [];
|
|
95
|
+
const model = routedModel({
|
|
96
|
+
card: CARD,
|
|
97
|
+
candidates: {
|
|
98
|
+
"math-llm": failingModel("math-llm", "server crashed"),
|
|
99
|
+
"code-llm": textModel("code-llm", "from code"),
|
|
100
|
+
generalist: textModel("generalist", "from generalist")
|
|
101
|
+
},
|
|
102
|
+
embed,
|
|
103
|
+
onDecision: (d) => decisions.push(d)
|
|
104
|
+
});
|
|
105
|
+
const result = await generateText({ model, prompt: "math: integrate x^2" });
|
|
106
|
+
// Next-best for the math cluster at lambda 0 is the generalist (0.4 < 0.9).
|
|
107
|
+
assert.equal(result.text, "from generalist");
|
|
108
|
+
assert.equal(decisions.length, 2);
|
|
109
|
+
assert.match(decisions[0]?.reason ?? "", /call failed: server crashed/);
|
|
110
|
+
assert.equal(decisions[1]?.fallback, true);
|
|
111
|
+
assert.match(decisions[1]?.reason ?? "", /fallback/);
|
|
112
|
+
});
|
|
113
|
+
test("fallback: false surfaces the failure", async () => {
|
|
114
|
+
const model = routedModel({
|
|
115
|
+
card: CARD,
|
|
116
|
+
candidates: {
|
|
117
|
+
"math-llm": failingModel("math-llm", "server crashed"),
|
|
118
|
+
"code-llm": textModel("code-llm", "x"),
|
|
119
|
+
generalist: textModel("generalist", "x")
|
|
120
|
+
},
|
|
121
|
+
embed,
|
|
122
|
+
fallback: false
|
|
123
|
+
});
|
|
124
|
+
await assert.rejects(generateText({ model, prompt: "math: integrate x^2" }), /server crashed/);
|
|
125
|
+
});
|
|
126
|
+
test("softmax assignment cards route through the learned map", async () => {
|
|
127
|
+
// theta rows (with bias column) reproduce the keyword axes sharply.
|
|
128
|
+
const card = loadRouterCard({
|
|
129
|
+
version: "uniroute.router.v1",
|
|
130
|
+
embedder: { model: "fake-embedder", dims: 2 },
|
|
131
|
+
lambda: 0,
|
|
132
|
+
assignment: {
|
|
133
|
+
type: "softmax",
|
|
134
|
+
theta: [
|
|
135
|
+
[10, -10, 0],
|
|
136
|
+
[-10, 10, 0]
|
|
137
|
+
]
|
|
138
|
+
},
|
|
139
|
+
models: [
|
|
140
|
+
{ id: "math-llm", psi: [0.05, 0.9], cost: 1 },
|
|
141
|
+
{ id: "code-llm", psi: [0.9, 0.05], cost: 1 }
|
|
142
|
+
]
|
|
143
|
+
});
|
|
144
|
+
const model = routedModel({
|
|
145
|
+
card,
|
|
146
|
+
candidates: {
|
|
147
|
+
"math-llm": textModel("math-llm", "from math"),
|
|
148
|
+
"code-llm": textModel("code-llm", "from code")
|
|
149
|
+
},
|
|
150
|
+
embed
|
|
151
|
+
});
|
|
152
|
+
const result = await generateText({ model, prompt: "code: refactor this" });
|
|
153
|
+
assert.equal(result.text, "from code");
|
|
154
|
+
});
|
|
155
|
+
test("card validation fails closed", () => {
|
|
156
|
+
assert.throws(() => loadRouterCard({
|
|
157
|
+
version: "uniroute.router.v2",
|
|
158
|
+
embedder: { model: "e", dims: 2 },
|
|
159
|
+
lambda: 0,
|
|
160
|
+
assignment: { type: "centroids", centroids: [[0, 0]] },
|
|
161
|
+
models: [{ id: "a", psi: [0], cost: 1 }]
|
|
162
|
+
}), /version/);
|
|
163
|
+
assert.throws(() => loadRouterCard({
|
|
164
|
+
version: "uniroute.router.v1",
|
|
165
|
+
embedder: { model: "e", dims: 3 }, // centroids are 2-wide
|
|
166
|
+
lambda: 0,
|
|
167
|
+
assignment: { type: "centroids", centroids: [[0, 0]] },
|
|
168
|
+
models: [{ id: "a", psi: [0], cost: 1 }]
|
|
169
|
+
}), /dims/);
|
|
170
|
+
assert.throws(() => loadRouterCard({
|
|
171
|
+
version: "uniroute.router.v1",
|
|
172
|
+
embedder: { model: "e", dims: 2 },
|
|
173
|
+
lambda: 0,
|
|
174
|
+
assignment: { type: "centroids", centroids: [[0, 0]] },
|
|
175
|
+
models: [{ id: "a", psi: [0, 0], cost: 1 }] // psi length != clusters
|
|
176
|
+
}), /psi length/);
|
|
177
|
+
});
|
|
178
|
+
test("a candidate missing for a card model is rejected at construction", () => {
|
|
179
|
+
assert.throws(() => routedModel({
|
|
180
|
+
card: CARD,
|
|
181
|
+
candidates: { "math-llm": textModel("math-llm", "x") },
|
|
182
|
+
embed
|
|
183
|
+
}), /without candidates: code-llm, generalist/);
|
|
184
|
+
});
|
|
185
|
+
test("embedding dimension mismatches are rejected per call", async () => {
|
|
186
|
+
const model = routedModel({
|
|
187
|
+
card: CARD,
|
|
188
|
+
candidates: {
|
|
189
|
+
"math-llm": textModel("math-llm", "x"),
|
|
190
|
+
"code-llm": textModel("code-llm", "x"),
|
|
191
|
+
generalist: textModel("generalist", "x")
|
|
192
|
+
},
|
|
193
|
+
embed: async () => [1, 0, 0] // 3 dims against a 2-dim card
|
|
194
|
+
});
|
|
195
|
+
await assert.rejects(generateText({ model, prompt: "math" }), /dims/);
|
|
196
|
+
});
|
|
197
|
+
test("withRoutedModel reports decisions into the handoff trace shape", async () => {
|
|
198
|
+
const noted = [];
|
|
199
|
+
const stubHandoff = {
|
|
200
|
+
noteModelDecision: (decision) => noted.push(decision)
|
|
201
|
+
};
|
|
202
|
+
const h = withRoutedModel(
|
|
203
|
+
// Only noteModelDecision is exercised by the wiring under test.
|
|
204
|
+
stubHandoff, {
|
|
205
|
+
card: CARD,
|
|
206
|
+
candidates: {
|
|
207
|
+
"math-llm": failingModel("math-llm", "boom"),
|
|
208
|
+
"code-llm": textModel("code-llm", "x"),
|
|
209
|
+
generalist: textModel("generalist", "from generalist")
|
|
210
|
+
},
|
|
211
|
+
embed,
|
|
212
|
+
localModels: ["math-llm", "generalist"]
|
|
213
|
+
});
|
|
214
|
+
const result = await generateText({ model: h.model, prompt: "math: 1+1" });
|
|
215
|
+
assert.equal(result.text, "from generalist");
|
|
216
|
+
assert.deepEqual(noted.map((d) => {
|
|
217
|
+
const decision = d;
|
|
218
|
+
return [decision.model, decision.route, decision.escalated];
|
|
219
|
+
}), [
|
|
220
|
+
["math-llm", "local", false],
|
|
221
|
+
["generalist", "local", true]
|
|
222
|
+
]);
|
|
223
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { existsSync, rmSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { after, before, test } from "node:test";
|
|
5
|
+
import { agents, localFirst } from "@fusionkit/handoff";
|
|
6
|
+
import { makeRepo, startStack } from "@fusionkit/testkit";
|
|
7
|
+
import { swarmTools } from "../swarm-tools.js";
|
|
8
|
+
const POOL = "swarm-pool";
|
|
9
|
+
const TOOL_CTX = { toolCallId: "call", messages: [] };
|
|
10
|
+
let stack;
|
|
11
|
+
before(async () => {
|
|
12
|
+
// A concurrent runner so a worker fan-out actually runs in parallel; mock
|
|
13
|
+
// workers keep the test deterministic and key-free. Both worker and cloud
|
|
14
|
+
// (escalation) runs use the mock agent on the process tier here — the real
|
|
15
|
+
// swarm uses pi workers and a claude-code cloud target.
|
|
16
|
+
stack = await startStack({
|
|
17
|
+
pool: POOL,
|
|
18
|
+
startRunner: true,
|
|
19
|
+
concurrency: 4,
|
|
20
|
+
pollIntervalMs: 25,
|
|
21
|
+
policy: (policy) => {
|
|
22
|
+
policy.agents.allow = ["mock"];
|
|
23
|
+
}
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
after(async () => {
|
|
27
|
+
await stack.stop();
|
|
28
|
+
});
|
|
29
|
+
function makeSwarm(repoDir, overrides = {}) {
|
|
30
|
+
return swarmTools({
|
|
31
|
+
workspace: repoDir,
|
|
32
|
+
plane: { url: stack.planeUrl, adminToken: stack.adminToken },
|
|
33
|
+
workerPool: POOL,
|
|
34
|
+
cloudPool: POOL,
|
|
35
|
+
actor: { kind: "human", id: "orchestrator" },
|
|
36
|
+
// Mock workers and a mock cloud agent on the process tier keep CI key-free.
|
|
37
|
+
workerAgent: agents.mock(),
|
|
38
|
+
workerSession: "process",
|
|
39
|
+
cloudAgent: agents.mock(),
|
|
40
|
+
cloudSession: "process",
|
|
41
|
+
...overrides
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
test("dispatch fans workers out and a completed worker is judged from evidence and pulled", async () => {
|
|
45
|
+
const repoDir = makeRepo({ files: { "README.md": "# swarm fixture\n" } });
|
|
46
|
+
try {
|
|
47
|
+
const swarm = makeSwarm(repoDir);
|
|
48
|
+
const dispatch = swarm.tools.dispatch_workers.execute;
|
|
49
|
+
const pull = swarm.tools.pull_worker.execute;
|
|
50
|
+
assert.ok(dispatch && pull);
|
|
51
|
+
const dispatched = (await dispatch({ tasks: [{ prompt: "improve the docs", fileScope: ["MOCK_AGENT.md"] }] }, TOOL_CTX));
|
|
52
|
+
assert.equal(dispatched.budgetExceeded, false);
|
|
53
|
+
assert.equal(dispatched.dispatched.length, 1);
|
|
54
|
+
const runId = dispatched.dispatched[0]?.runId;
|
|
55
|
+
assert.ok(runId);
|
|
56
|
+
const pulled = (await pull({ runId }, TOOL_CTX));
|
|
57
|
+
assert.equal(pulled.verdict, "accepted");
|
|
58
|
+
assert.equal(pulled.status, "completed");
|
|
59
|
+
assert.ok(pulled.filesChanged.includes("MOCK_AGENT.md"));
|
|
60
|
+
assert.ok(pulled.scorecard, "an accepted worker carries a deterministic scorecard");
|
|
61
|
+
assert.equal(pulled.scorecard?.status, "completed");
|
|
62
|
+
assert.equal(pulled.scorecard?.exitCode, 0);
|
|
63
|
+
assert.ok(pulled.receipt?.verified, "the receipt must verify offline");
|
|
64
|
+
assert.match(pulled.receipt?.contractHash ?? "", /^[0-9a-f]{64}$/);
|
|
65
|
+
// The worker's change landed on the workspace of record.
|
|
66
|
+
assert.ok(existsSync(join(repoDir, "MOCK_AGENT.md")));
|
|
67
|
+
// The evidence record reflects the accepted verdict.
|
|
68
|
+
const records = swarm.calls();
|
|
69
|
+
assert.ok(records.some((r) => r.tool === "pull_worker" && r.verdict === "accepted" && r.runId === runId));
|
|
70
|
+
}
|
|
71
|
+
finally {
|
|
72
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
test("a worker overlapping already-pulled files is downgraded to escalate, not pulled", async () => {
|
|
76
|
+
const repoDir = makeRepo({ files: { "README.md": "# overlap fixture\n" } });
|
|
77
|
+
try {
|
|
78
|
+
const swarm = makeSwarm(repoDir);
|
|
79
|
+
const dispatch = swarm.tools.dispatch_workers.execute;
|
|
80
|
+
const pull = swarm.tools.pull_worker.execute;
|
|
81
|
+
assert.ok(dispatch && pull);
|
|
82
|
+
// Two mock workers both write MOCK_AGENT.md, so they necessarily collide.
|
|
83
|
+
const dispatched = (await dispatch({ tasks: [{ prompt: "task one" }, { prompt: "task two" }] }, TOOL_CTX));
|
|
84
|
+
assert.equal(dispatched.dispatched.length, 2);
|
|
85
|
+
const [a, b] = dispatched.dispatched;
|
|
86
|
+
assert.ok(a && b);
|
|
87
|
+
const first = (await pull({ runId: a.runId }, TOOL_CTX));
|
|
88
|
+
assert.equal(first.verdict, "accepted");
|
|
89
|
+
const second = (await pull({ runId: b.runId }, TOOL_CTX));
|
|
90
|
+
assert.equal(second.verdict, "escalate");
|
|
91
|
+
assert.ok(second.conflictingPaths?.includes("MOCK_AGENT.md"));
|
|
92
|
+
assert.match(second.reason, /overlaps already-pulled/);
|
|
93
|
+
// Evidence is still attached to the refused pull.
|
|
94
|
+
assert.ok(second.receipt?.verified);
|
|
95
|
+
}
|
|
96
|
+
finally {
|
|
97
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
test("worker_status reports without blocking and flags unknown ids", async () => {
|
|
101
|
+
const repoDir = makeRepo({ files: { "README.md": "# status fixture\n" } });
|
|
102
|
+
try {
|
|
103
|
+
const swarm = makeSwarm(repoDir);
|
|
104
|
+
const dispatch = swarm.tools.dispatch_workers.execute;
|
|
105
|
+
const status = swarm.tools.worker_status.execute;
|
|
106
|
+
assert.ok(dispatch && status);
|
|
107
|
+
const dispatched = (await dispatch({ tasks: [{ prompt: "do work" }] }, TOOL_CTX));
|
|
108
|
+
const runId = dispatched.dispatched[0]?.runId;
|
|
109
|
+
assert.ok(runId);
|
|
110
|
+
const reported = (await status({ runIds: [runId, "run_does_not_exist"] }, TOOL_CTX));
|
|
111
|
+
assert.equal(reported.statuses.length, 2);
|
|
112
|
+
assert.equal(reported.statuses.find((s) => s.runId === runId)?.known, true);
|
|
113
|
+
assert.equal(reported.statuses.find((s) => s.runId === "run_does_not_exist")?.known, false);
|
|
114
|
+
}
|
|
115
|
+
finally {
|
|
116
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
test("dispatch beyond the fan-out ceiling is refused with budgetExceeded", async () => {
|
|
120
|
+
const repoDir = makeRepo({ files: { "README.md": "# budget fixture\n" } });
|
|
121
|
+
try {
|
|
122
|
+
const swarm = makeSwarm(repoDir, {
|
|
123
|
+
policy: localFirst({ allowPools: [POOL], maxParallelRuns: 1 })
|
|
124
|
+
});
|
|
125
|
+
const dispatch = swarm.tools.dispatch_workers.execute;
|
|
126
|
+
assert.ok(dispatch);
|
|
127
|
+
const dispatched = (await dispatch({ tasks: [{ prompt: "one" }, { prompt: "two" }] }, TOOL_CTX));
|
|
128
|
+
assert.equal(dispatched.budgetExceeded, true);
|
|
129
|
+
assert.equal(dispatched.dispatched.length, 0);
|
|
130
|
+
assert.match(dispatched.reason, /exceeds policy ceiling/);
|
|
131
|
+
assert.equal(swarm.calls().length, 0, "a refused dispatch produces no records");
|
|
132
|
+
}
|
|
133
|
+
finally {
|
|
134
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
135
|
+
}
|
|
136
|
+
});
|
|
137
|
+
test("escalate_task runs the cloud agent as a governed run and is budget-bounded", async () => {
|
|
138
|
+
const repoDir = makeRepo({ files: { "README.md": "# escalate fixture\n" } });
|
|
139
|
+
try {
|
|
140
|
+
const swarm = makeSwarm(repoDir, { maxEscalations: 1 });
|
|
141
|
+
const escalate = swarm.tools.escalate_task.execute;
|
|
142
|
+
assert.ok(escalate);
|
|
143
|
+
const first = (await escalate({ task: "fix it properly" }, TOOL_CTX));
|
|
144
|
+
assert.equal(first.budgetExceeded, false);
|
|
145
|
+
assert.equal(first.status, "completed");
|
|
146
|
+
assert.ok(first.receipt?.verified);
|
|
147
|
+
assert.match(first.receipt?.contractHash ?? "", /^[0-9a-f]{64}$/);
|
|
148
|
+
const second = (await escalate({ task: "and again" }, TOOL_CTX));
|
|
149
|
+
assert.equal(second.budgetExceeded, true);
|
|
150
|
+
assert.match(second.reason, /budget exhausted/);
|
|
151
|
+
const records = swarm.calls();
|
|
152
|
+
assert.equal(records.filter((r) => r.tool === "escalate_task").length, 1);
|
|
153
|
+
}
|
|
154
|
+
finally {
|
|
155
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
156
|
+
}
|
|
157
|
+
});
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A uniform, real model-driven agent loop for trajectory-level fusion. One
|
|
3
|
+
* panel model drives an AI SDK tool loop over a real git worktree (read/list/
|
|
4
|
+
* grep/write/run), and the full reasoning/tool-call/observation/output sequence
|
|
5
|
+
* is captured as a normalized trajectory (`harness-trajectory.v1` shaped). No
|
|
6
|
+
* virtual filesystem and no mocks: the worktree is the isolation boundary and
|
|
7
|
+
* the tools touch it directly.
|
|
8
|
+
*/
|
|
9
|
+
export type TrajectoryStepType = "reasoning" | "tool_call" | "observation" | "output";
|
|
10
|
+
export type TrajectoryStep = {
|
|
11
|
+
index: number;
|
|
12
|
+
type: TrajectoryStepType;
|
|
13
|
+
text?: string;
|
|
14
|
+
tool_name?: string;
|
|
15
|
+
tool_call_id?: string;
|
|
16
|
+
tool_input?: string;
|
|
17
|
+
is_error?: boolean;
|
|
18
|
+
};
|
|
19
|
+
export type WorktreeAgentResult = {
|
|
20
|
+
status: "succeeded" | "failed";
|
|
21
|
+
steps: TrajectoryStep[];
|
|
22
|
+
finalOutput: string;
|
|
23
|
+
finishReason: string;
|
|
24
|
+
toolCallCount: number;
|
|
25
|
+
};
|
|
26
|
+
export type WorktreeAgentInput = {
|
|
27
|
+
/** Absolute path to the candidate's git worktree; all tools are scoped here. */
|
|
28
|
+
worktree: string;
|
|
29
|
+
/** The user's task/prompt for this turn. */
|
|
30
|
+
prompt: string;
|
|
31
|
+
/** OpenAI-compatible base URL for this candidate's model (without `/v1`). */
|
|
32
|
+
baseUrl: string;
|
|
33
|
+
/** Model name to request from the endpoint. */
|
|
34
|
+
model: string;
|
|
35
|
+
apiKey?: string;
|
|
36
|
+
/** Max agent steps (tool round-trips) before stopping. Defaults to 12. */
|
|
37
|
+
maxSteps?: number;
|
|
38
|
+
/** Per-`run` command timeout in ms. Defaults to 120000. */
|
|
39
|
+
commandTimeoutMs?: number;
|
|
40
|
+
abortSignal?: AbortSignal;
|
|
41
|
+
/** Observability correlation id; when set, steps and model calls are traced. */
|
|
42
|
+
traceId?: string;
|
|
43
|
+
/** Candidate id this agent run belongs to (for trace correlation). */
|
|
44
|
+
candidateId?: string;
|
|
45
|
+
/** Parent span (e.g. the ensemble candidate span) for waterfall linking. */
|
|
46
|
+
parentSpanId?: string;
|
|
47
|
+
/** User-turn index this run belongs to (stamped on model.call events). */
|
|
48
|
+
turn?: number;
|
|
49
|
+
};
|
|
50
|
+
/** Run one panel model as a real agent over the worktree and capture its trajectory. */
|
|
51
|
+
export declare function runWorktreeAgent(input: WorktreeAgentInput): Promise<WorktreeAgentResult>;
|
|
52
|
+
/** Compute the worktree's staged diff against a base ref (for patch evidence). */
|
|
53
|
+
export declare function worktreeDiff(root: string, baseGitSha: string): string;
|