@fusionkit/adapter-ai-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,324 @@
1
+ import { jsonSchema, tool } from "ai";
2
+ import { agents, handoff, scorecardFor, targets } from "@fusionkit/handoff";
3
+ import { PolicyDeniedError, verifyReceiptBundle } from "@fusionkit/protocol";
4
+ import { PlaneClient } from "@fusionkit/sdk";
5
+ const DEFAULT_TIMEOUT_MS = 10 * 60 * 1000;
6
+ const DEFAULT_DIFF_EXCERPT_BYTES = 4 * 1024;
7
+ const DEFAULT_WORKER_SESSION = "hermetic";
8
+ const DEFAULT_CLOUD_SESSION = "process";
9
+ /** Distinct workspace paths a run changed, from its receipt's boundary events. */
10
+ function changedPaths(bundle) {
11
+ const paths = new Set();
12
+ for (const entry of bundle.events) {
13
+ if (entry.event.type === "file.changed")
14
+ paths.add(entry.event.path);
15
+ }
16
+ return [...paths];
17
+ }
18
+ function withScope(task) {
19
+ if (!task.fileScope || task.fileScope.length === 0)
20
+ return task.prompt;
21
+ return (`${task.prompt}\n\nScope: confine your changes to these files: ` +
22
+ `${task.fileScope.join(", ")}. Do not modify files outside this set.`);
23
+ }
24
+ function receiptEvidence(bundle) {
25
+ return {
26
+ contractHash: bundle.receipt.contractHash,
27
+ eventsHead: bundle.receipt.eventsHead,
28
+ verified: verifyReceiptBundle(bundle).ok
29
+ };
30
+ }
31
+ export function swarmTools(config) {
32
+ const context = "context" in config
33
+ ? config.context
34
+ : handoff({
35
+ workspace: config.workspace,
36
+ plane: config.plane,
37
+ // The default agent is pi: dispatched workers are pi runs unless a
38
+ // call overrides. Escalations pass the cloud agent explicitly.
39
+ agent: config.workerAgent ?? agents.pi(),
40
+ ...(config.actor ? { actor: config.actor } : {}),
41
+ ...(config.policy ? { policy: config.policy } : {}),
42
+ ...(config.secrets ? { secrets: config.secrets } : {}),
43
+ ...(config.allowHosts ? { allowHosts: config.allowHosts } : {}),
44
+ ...(config.allowUntracked ? { allowUntracked: config.allowUntracked } : {})
45
+ });
46
+ const workerTarget = targets.pool(config.workerPool);
47
+ const cloudTarget = targets.pool(config.cloudPool);
48
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
49
+ const diffExcerptBytes = config.diffExcerptBytes ?? DEFAULT_DIFF_EXCERPT_BYTES;
50
+ const workerAgent = config.workerAgent ?? agents.pi();
51
+ const workerSession = config.workerSession ?? DEFAULT_WORKER_SESSION;
52
+ const cloudSession = config.cloudSession ?? DEFAULT_CLOUD_SESSION;
53
+ const cloudAgent = config.cloudAgent ?? agents.claudeCode();
54
+ const maxEscalations = config.maxEscalations;
55
+ // State across tool calls within one orchestrator session.
56
+ const runsById = new Map();
57
+ const pulledPaths = new Set();
58
+ const records = [];
59
+ let escalations = 0;
60
+ const client = config && "context" in config
61
+ ? undefined
62
+ : config.plane instanceof PlaneClient
63
+ ? config.plane
64
+ : new PlaneClient(config.plane.url, config.plane.adminToken);
65
+ async function diffExcerptFor(bundle) {
66
+ const diffHash = bundle.receipt.workspaceOut.diffHash;
67
+ if (!diffHash || !client)
68
+ return "";
69
+ const blob = await client.getBlob(diffHash);
70
+ const text = blob.toString("utf8");
71
+ return text.length > diffExcerptBytes ? text.slice(0, diffExcerptBytes) : text;
72
+ }
73
+ const dispatch_workers = tool({
74
+ description: "Fan a set of independent worker tasks out across cheap local Pi agents, " +
75
+ "each a governed run with a signed receipt. Returns one run id per task. " +
76
+ "Make the tasks independent and their file scopes disjoint; overlap is " +
77
+ "detected later from receipts and downgraded to escalation. Refused with " +
78
+ "budgetExceeded=true if the fan-out exceeds the continuation policy ceiling.",
79
+ inputSchema: jsonSchema({
80
+ type: "object",
81
+ properties: {
82
+ tasks: {
83
+ type: "array",
84
+ items: {
85
+ type: "object",
86
+ properties: {
87
+ prompt: { type: "string", description: "What this worker should do." },
88
+ fileScope: {
89
+ type: "array",
90
+ items: { type: "string" },
91
+ description: "Files this worker is meant to touch (kept disjoint across workers)."
92
+ }
93
+ },
94
+ required: ["prompt"],
95
+ additionalProperties: false
96
+ }
97
+ }
98
+ },
99
+ required: ["tasks"],
100
+ additionalProperties: false
101
+ }),
102
+ execute: async ({ tasks }) => {
103
+ if (tasks.length === 0) {
104
+ return { dispatched: [], budgetExceeded: false, reason: "no tasks supplied" };
105
+ }
106
+ const prompts = tasks.map(withScope);
107
+ try {
108
+ const runs = await context.parallel(prompts, workerTarget, {
109
+ agent: workerAgent,
110
+ session: workerSession,
111
+ reason: `swarm fan-out of ${tasks.length} worker(s)`
112
+ });
113
+ const dispatched = runs.map((run, i) => {
114
+ const prompt = prompts[i] ?? "";
115
+ runsById.set(run.runId, { run, prompt });
116
+ records.push({ tool: "dispatch_workers", runId: run.runId, status: "created" });
117
+ return { runId: run.runId, prompt };
118
+ });
119
+ return {
120
+ dispatched,
121
+ budgetExceeded: false,
122
+ reason: `dispatched ${dispatched.length} worker(s) to pool "${config.workerPool}"`
123
+ };
124
+ }
125
+ catch (error) {
126
+ if (error instanceof PolicyDeniedError) {
127
+ // Mirror Codex's budget_limited semantics with Warrant's own policy
128
+ // ceiling: the orchestrator sees the refusal as a tool result and
129
+ // can dispatch a smaller batch or escalate instead.
130
+ return {
131
+ dispatched: [],
132
+ budgetExceeded: true,
133
+ reason: error.reasons.join("; ")
134
+ };
135
+ }
136
+ throw error;
137
+ }
138
+ }
139
+ });
140
+ const worker_status = tool({
141
+ description: "Report the current status of dispatched workers without blocking, so the " +
142
+ "orchestrator can interleave its own work while the swarm runs.",
143
+ inputSchema: jsonSchema({
144
+ type: "object",
145
+ properties: {
146
+ runIds: { type: "array", items: { type: "string" } }
147
+ },
148
+ required: ["runIds"],
149
+ additionalProperties: false
150
+ }),
151
+ execute: async ({ runIds }) => {
152
+ const statuses = await Promise.all(runIds.map(async (runId) => {
153
+ const entry = runsById.get(runId);
154
+ if (!entry)
155
+ return { runId, status: "created", known: false };
156
+ return { runId, status: await entry.run.status(), known: true };
157
+ }));
158
+ return { statuses };
159
+ }
160
+ });
161
+ const pull_worker = tool({
162
+ description: "Wait for one worker to finish, then judge it from evidence. A failed worker " +
163
+ "or one whose files overlap already-pulled work is returned with verdict " +
164
+ "'escalate' and is NOT pulled. A clean, disjoint, completed worker is pulled " +
165
+ "onto the workspace of record and returned with verdict 'accepted', its " +
166
+ "deterministic scorecard, a diff excerpt, and its receipt.",
167
+ inputSchema: jsonSchema({
168
+ type: "object",
169
+ properties: { runId: { type: "string" } },
170
+ required: ["runId"],
171
+ additionalProperties: false
172
+ }),
173
+ execute: async ({ runId }) => {
174
+ const entry = runsById.get(runId);
175
+ if (!entry) {
176
+ return {
177
+ runId,
178
+ status: "created",
179
+ verdict: "escalate",
180
+ reason: "unknown run id; dispatch it before pulling",
181
+ filesChanged: []
182
+ };
183
+ }
184
+ const outcome = await entry.run.wait({ timeoutMs });
185
+ if (outcome.status !== "completed") {
186
+ records.push({
187
+ tool: "pull_worker",
188
+ runId,
189
+ status: outcome.status,
190
+ verdict: "escalate"
191
+ });
192
+ return {
193
+ runId,
194
+ status: outcome.status,
195
+ verdict: "escalate",
196
+ reason: outcome.status === "awaiting_approval"
197
+ ? `blocked on consent: ${outcome.consentRequirements.join("; ")}`
198
+ : `worker did not complete (status ${outcome.status})`,
199
+ filesChanged: []
200
+ };
201
+ }
202
+ const bundle = await entry.run.receipt();
203
+ const paths = changedPaths(bundle);
204
+ const conflicting = paths.filter((path) => pulledPaths.has(path));
205
+ const evidence = receiptEvidence(bundle);
206
+ if (conflicting.length > 0) {
207
+ // Deterministic overlap, computed from receipts — never asked of a
208
+ // model. Refuse the pull so two workers never both write a file; the
209
+ // orchestrator escalates this task to start from the updated tree.
210
+ records.push({
211
+ tool: "pull_worker",
212
+ runId,
213
+ status: outcome.status,
214
+ verdict: "escalate",
215
+ contractHash: evidence.contractHash,
216
+ receiptVerified: evidence.verified
217
+ });
218
+ return {
219
+ runId,
220
+ status: outcome.status,
221
+ verdict: "escalate",
222
+ reason: `output overlaps already-pulled files: ${conflicting.join(", ")}`,
223
+ filesChanged: paths,
224
+ conflictingPaths: conflicting,
225
+ receipt: evidence
226
+ };
227
+ }
228
+ const diffHash = bundle.receipt.workspaceOut.diffHash;
229
+ const diffBytes = diffHash && client ? (await client.getBlob(diffHash)).length : 0;
230
+ const scorecard = scorecardFor(bundle, diffBytes);
231
+ const diffExcerpt = await diffExcerptFor(bundle);
232
+ await entry.run.pull();
233
+ for (const path of paths)
234
+ pulledPaths.add(path);
235
+ records.push({
236
+ tool: "pull_worker",
237
+ runId,
238
+ status: outcome.status,
239
+ verdict: "accepted",
240
+ contractHash: evidence.contractHash,
241
+ receiptVerified: evidence.verified
242
+ });
243
+ return {
244
+ runId,
245
+ status: outcome.status,
246
+ verdict: "accepted",
247
+ reason: "completed, disjoint, and pulled onto the workspace of record",
248
+ filesChanged: paths,
249
+ scorecard,
250
+ diffExcerpt,
251
+ receipt: evidence
252
+ };
253
+ }
254
+ });
255
+ const escalate_task = tool({
256
+ description: "Re-run one task on the cloud target (a capable agent on a real-OS tier) " +
257
+ "as a governed run, then pull its result. Use for tasks a local worker " +
258
+ "failed or whose output collided. Bounded: refused with budgetExceeded=true " +
259
+ "once the escalation budget is exhausted.",
260
+ inputSchema: jsonSchema({
261
+ type: "object",
262
+ properties: {
263
+ task: { type: "string" },
264
+ reason: { type: "string" }
265
+ },
266
+ required: ["task"],
267
+ additionalProperties: false
268
+ }),
269
+ execute: async ({ task, reason }) => {
270
+ if (maxEscalations !== undefined && escalations >= maxEscalations) {
271
+ return {
272
+ budgetExceeded: true,
273
+ reason: `escalation budget exhausted (${maxEscalations})`
274
+ };
275
+ }
276
+ escalations += 1;
277
+ const run = await context.continueIn(cloudTarget, {
278
+ task,
279
+ agent: cloudAgent,
280
+ session: cloudSession,
281
+ reason: reason ?? "swarm escalation to cloud target"
282
+ });
283
+ const outcome = await run.wait({ timeoutMs });
284
+ const status = outcome.status;
285
+ if (status !== "completed") {
286
+ records.push({ tool: "escalate_task", runId: run.runId, status });
287
+ return {
288
+ budgetExceeded: false,
289
+ runId: run.runId,
290
+ status,
291
+ reason: status === "awaiting_approval"
292
+ ? `blocked on consent: ${outcome.consentRequirements.join("; ")}`
293
+ : `escalation did not complete (status ${status})`
294
+ };
295
+ }
296
+ const bundle = await run.receipt();
297
+ const paths = changedPaths(bundle);
298
+ const evidence = receiptEvidence(bundle);
299
+ await run.pull();
300
+ for (const path of paths)
301
+ pulledPaths.add(path);
302
+ records.push({
303
+ tool: "escalate_task",
304
+ runId: run.runId,
305
+ status,
306
+ contractHash: evidence.contractHash,
307
+ receiptVerified: evidence.verified
308
+ });
309
+ return {
310
+ budgetExceeded: false,
311
+ runId: run.runId,
312
+ status,
313
+ reason: "escalated, completed, and pulled onto the workspace of record",
314
+ filesChanged: paths,
315
+ receipt: evidence
316
+ };
317
+ }
318
+ });
319
+ return {
320
+ tools: { dispatch_workers, worker_status, pull_worker, escalate_task },
321
+ calls: () => [...records],
322
+ context
323
+ };
324
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,129 @@
1
+ import assert from "node:assert/strict";
2
+ import { rmSync } from "node:fs";
3
+ import { after, before, test } from "node:test";
4
+ import { generateText, jsonSchema, stepCountIs, tool } from "ai";
5
+ import { MockLanguageModelV3 } from "ai/test";
6
+ import { agents, handoff, localFirst, targets } from "@fusionkit/handoff";
7
+ import { makeRepo, startStack } from "@fusionkit/testkit";
8
+ const POOL = "eng-prod";
9
+ let stack;
10
+ let repoDir;
11
+ let h;
12
+ const usage = {
13
+ inputTokens: {
14
+ total: 1,
15
+ noCache: 1,
16
+ cacheRead: undefined,
17
+ cacheWrite: undefined
18
+ },
19
+ outputTokens: { total: 1, text: 1, reasoning: undefined }
20
+ };
21
+ before(async () => {
22
+ stack = await startStack({
23
+ pool: POOL,
24
+ startRunner: true,
25
+ policy: (policy) => {
26
+ policy.agents.allow = ["mock", "command"];
27
+ }
28
+ });
29
+ repoDir = makeRepo({ files: { "README.md": "# golden fixture\n" } });
30
+ h = handoff({
31
+ workspace: repoDir,
32
+ plane: { url: stack.planeUrl, adminToken: stack.adminToken },
33
+ actor: { kind: "human", id: "golden-tester" },
34
+ agent: agents.mock(),
35
+ policy: localFirst({ allowPools: [POOL] })
36
+ });
37
+ });
38
+ after(async () => {
39
+ await stack.stop();
40
+ rmSync(repoDir, { recursive: true, force: true });
41
+ });
42
+ test("the golden shape: generateText with h.tools, then h.needs/continueIn carrying the journal", async () => {
43
+ // Local tools wrapped by the context: capture, not orchestration.
44
+ const lookups = [];
45
+ const tools = h.tools({
46
+ lookup: tool({
47
+ description: "look up a fact in the local knowledge base",
48
+ inputSchema: jsonSchema({
49
+ type: "object",
50
+ properties: { key: { type: "string" } },
51
+ required: ["key"]
52
+ }),
53
+ execute: async ({ key }) => {
54
+ lookups.push(key);
55
+ return { key, value: `fact-about-${key}` };
56
+ }
57
+ })
58
+ });
59
+ let modelCalls = 0;
60
+ const model = new MockLanguageModelV3({
61
+ doGenerate: async () => {
62
+ modelCalls++;
63
+ if (modelCalls === 1) {
64
+ return {
65
+ content: [
66
+ {
67
+ type: "tool-call",
68
+ toolCallId: "call-1",
69
+ toolName: "lookup",
70
+ input: JSON.stringify({ key: "deploy-window" })
71
+ }
72
+ ],
73
+ finishReason: { unified: "tool-calls", raw: "tool-calls" },
74
+ usage,
75
+ warnings: []
76
+ };
77
+ }
78
+ return {
79
+ content: [{ type: "text", text: "deploys are fine after 14:00" }],
80
+ finishReason: { unified: "stop", raw: "stop" },
81
+ usage,
82
+ warnings: []
83
+ };
84
+ }
85
+ });
86
+ const result = await generateText({
87
+ model,
88
+ tools,
89
+ prompt: "when can we deploy?",
90
+ stopWhen: stepCountIs(2)
91
+ });
92
+ assert.equal(result.text, "deploys are fine after 14:00");
93
+ assert.deepEqual(lookups, ["deploy-window"], "the tool executed locally");
94
+ // The journaled call is in the local trace, hashes only.
95
+ const toolEvents = h.trace().filter((e) => e.type === "tool.called");
96
+ assert.equal(toolEvents.length, 1);
97
+ // The golden gesture, guarded by the deterministic policy check.
98
+ assert.equal(h.needs(targets.pool(POOL)), true);
99
+ assert.equal(h.needs(targets.pool("not-allowlisted")), false);
100
+ const run = await h.continueIn(targets.pool(POOL), {
101
+ task: "apply the deploy-window fact to the rollout plan",
102
+ reason: "loop established the fact; continue under governance"
103
+ });
104
+ const outcome = await run.wait({ timeoutMs: 60_000 });
105
+ assert.equal(outcome.status, "completed");
106
+ // The continuation carried the tool journal as content-addressed
107
+ // semantic state, pinned via the envelope inside the signed contract.
108
+ const journalHash = run.envelope.checkpoint.semantic?.toolJournalHash;
109
+ assert.ok(journalHash, "checkpoint must reference the tool journal");
110
+ const journal = JSON.parse((await stack.client.getBlob(journalHash)).toString("utf8"));
111
+ assert.equal(journal.version, "warrant.tooljournal.v1");
112
+ assert.equal(journal.entries.length, 1);
113
+ const entry = journal.entries[0];
114
+ assert.ok(entry);
115
+ assert.equal(entry.toolName, "lookup");
116
+ assert.deepEqual(entry.input, { key: "deploy-window" });
117
+ assert.deepEqual(entry.output, { key: "deploy-window", value: "fact-about-deploy-window" });
118
+ // And the summary recomputes the whole story.
119
+ const summary = await h.summary();
120
+ assert.equal(summary.toolCalls, 1);
121
+ assert.equal(summary.checkpoints, 1);
122
+ assert.equal(summary.continuations.planned >= 1, true);
123
+ assert.equal(summary.runs.length, 1);
124
+ const summaryRun = summary.runs[0];
125
+ assert.ok(summaryRun);
126
+ assert.equal(summaryRun.runId, run.runId);
127
+ assert.equal(summaryRun.status, "completed");
128
+ assert.equal(summaryRun.target, `pool:${POOL}`);
129
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,198 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { after, test } from "node:test";
6
+ import { generateText, streamText } from "ai";
7
+ import { MockLanguageModelV3 } from "ai/test";
8
+ import { handoffModel } from "../model.js";
9
+ import { managedModelServer } from "../managed-server.js";
10
+ import { MlxCapabilityError } from "../mlx-env.js";
11
+ /**
12
+ * Lifecycle tests against a fake OpenAI-compatible server (a node child
13
+ * process fully under the test's control) so they run on any host: lazy
14
+ * start, shared start across concurrent calls, idle scale-to-zero,
15
+ * transparent restart, stream leases, and cloud escalation when the env
16
+ * cannot be prepared.
17
+ */
18
+ // Serves /v1/models (health) and /v1/chat/completions (plain + SSE).
19
+ // FAKE_STREAM_GAP_MS inserts a mid-stream pause to exercise stream leases.
20
+ const FAKE_SERVER_SOURCE = `
21
+ const http = require("node:http");
22
+ const port = Number(process.argv[2]);
23
+ const gapMs = Number(process.env.FAKE_STREAM_GAP_MS || "0");
24
+ const server = http.createServer((req, res) => {
25
+ if (req.url === "/v1/models") {
26
+ res.writeHead(200, { "content-type": "application/json" });
27
+ res.end(JSON.stringify({ object: "list", data: [{ id: "fake", object: "model" }] }));
28
+ return;
29
+ }
30
+ if (req.url === "/v1/chat/completions" && req.method === "POST") {
31
+ let body = "";
32
+ req.on("data", (c) => (body += c));
33
+ req.on("end", () => {
34
+ const parsed = JSON.parse(body);
35
+ if (parsed.stream) {
36
+ res.writeHead(200, { "content-type": "text/event-stream" });
37
+ const chunk = (delta, finish) =>
38
+ "data: " + JSON.stringify({
39
+ id: "cmpl-1", object: "chat.completion.chunk", created: 1,
40
+ model: parsed.model,
41
+ choices: [{ index: 0, delta, finish_reason: finish }]
42
+ }) + "\\n\\n";
43
+ res.write(chunk({ role: "assistant", content: "hello " }, null));
44
+ setTimeout(() => {
45
+ res.write(chunk({ content: "world" }, null));
46
+ res.write(chunk({}, "stop"));
47
+ res.write("data: [DONE]\\n\\n");
48
+ res.end();
49
+ }, gapMs);
50
+ return;
51
+ }
52
+ res.writeHead(200, { "content-type": "application/json" });
53
+ res.end(JSON.stringify({
54
+ id: "cmpl-1", object: "chat.completion", created: 1,
55
+ model: parsed.model,
56
+ choices: [{ index: 0, message: { role: "assistant", content: "hello from fake" }, finish_reason: "stop" }],
57
+ usage: { prompt_tokens: 1, completion_tokens: 2, total_tokens: 3 }
58
+ }));
59
+ });
60
+ return;
61
+ }
62
+ res.writeHead(404);
63
+ res.end();
64
+ });
65
+ server.listen(port, "127.0.0.1");
66
+ `;
67
+ const scratch = mkdtempSync(join(tmpdir(), "warrant-managed-"));
68
+ const serverScript = join(scratch, "fake-server.cjs");
69
+ writeFileSync(serverScript, FAKE_SERVER_SOURCE);
70
+ after(() => rmSync(scratch, { recursive: true, force: true }));
71
+ function fakePrepare(env = {}) {
72
+ return (port) => Promise.resolve({
73
+ cmd: process.execPath,
74
+ args: [serverScript, String(port)],
75
+ env
76
+ });
77
+ }
78
+ async function waitFor(predicate, timeoutMs = 5_000) {
79
+ const deadline = Date.now() + timeoutMs;
80
+ while (!predicate()) {
81
+ if (Date.now() > deadline)
82
+ throw new Error("waitFor timed out");
83
+ await new Promise((resolve) => setTimeout(resolve, 20));
84
+ }
85
+ }
86
+ test("lazy start, shared start across concurrent calls, and roundtrip", async () => {
87
+ const events = [];
88
+ const model = managedModelServer({
89
+ prepare: fakePrepare(),
90
+ modelId: "fake-model",
91
+ idleShutdownMs: 0,
92
+ onEvent: (event) => events.push(event)
93
+ });
94
+ assert.equal(model.status(), "stopped", "nothing runs before the first call");
95
+ try {
96
+ const results = await Promise.all([
97
+ generateText({ model, prompt: "one" }),
98
+ generateText({ model, prompt: "two" }),
99
+ generateText({ model, prompt: "three" })
100
+ ]);
101
+ for (const result of results)
102
+ assert.equal(result.text, "hello from fake");
103
+ assert.equal(model.status(), "running");
104
+ assert.equal(events.filter((event) => event.type === "ready").length, 1, "three concurrent first calls share one server start");
105
+ }
106
+ finally {
107
+ await model.stop();
108
+ }
109
+ assert.equal(model.status(), "stopped");
110
+ const stopped = events.find((event) => event.type === "stopped");
111
+ assert.ok(stopped && stopped.type === "stopped" && stopped.reason === "explicit");
112
+ });
113
+ test("scales to zero when idle and transparently restarts", async () => {
114
+ const events = [];
115
+ const model = managedModelServer({
116
+ prepare: fakePrepare(),
117
+ modelId: "fake-model",
118
+ idleShutdownMs: 150,
119
+ onEvent: (event) => events.push(event)
120
+ });
121
+ try {
122
+ const first = await generateText({ model, prompt: "warm up" });
123
+ assert.equal(first.text, "hello from fake");
124
+ assert.equal(model.status(), "running");
125
+ await waitFor(() => model.status() === "stopped");
126
+ const stopped = events.find((event) => event.type === "stopped");
127
+ assert.ok(stopped && stopped.type === "stopped" && stopped.reason === "idle", "the idle sweep stopped the server");
128
+ // Next call cold-starts a fresh process without the caller noticing.
129
+ const second = await generateText({ model, prompt: "wake up" });
130
+ assert.equal(second.text, "hello from fake");
131
+ assert.equal(events.filter((event) => event.type === "ready").length, 2);
132
+ }
133
+ finally {
134
+ await model.stop();
135
+ }
136
+ });
137
+ test("a stream holds its lease: no idle shutdown mid-stream", async () => {
138
+ const events = [];
139
+ const model = managedModelServer({
140
+ // The mid-stream gap (400ms) far exceeds the idle window (120ms): only
141
+ // the held lease keeps the server alive across it.
142
+ prepare: fakePrepare({ FAKE_STREAM_GAP_MS: "400" }),
143
+ modelId: "fake-model",
144
+ idleShutdownMs: 120,
145
+ onEvent: (event) => events.push(event)
146
+ });
147
+ try {
148
+ const result = streamText({ model, prompt: "stream it" });
149
+ const text = await result.text;
150
+ assert.equal(text, "hello world");
151
+ assert.equal(events.filter((event) => event.type === "stopped").length, 0, "no shutdown while the stream was in flight");
152
+ await waitFor(() => model.status() === "stopped");
153
+ const stopped = events.find((event) => event.type === "stopped");
154
+ assert.ok(stopped && stopped.type === "stopped" && stopped.reason === "idle");
155
+ }
156
+ finally {
157
+ await model.stop();
158
+ }
159
+ });
160
+ test("startup failure surfaces with server output in the message", async () => {
161
+ const model = managedModelServer({
162
+ prepare: () => Promise.resolve({
163
+ cmd: process.execPath,
164
+ args: ["-e", "console.error('model weights not found'); process.exit(3)"],
165
+ env: {}
166
+ }),
167
+ modelId: "fake-model",
168
+ startupTimeoutMs: 5_000
169
+ });
170
+ await assert.rejects(() => generateText({ model, prompt: "boom" }), /exited during startup .*model weights not found/s);
171
+ assert.equal(model.status(), "stopped");
172
+ });
173
+ test("under handoffModel, an unpreparable env escalates to cloud", async () => {
174
+ const local = managedModelServer({
175
+ prepare: () => Promise.reject(new MlxCapabilityError("MLX requires macOS on Apple Silicon")),
176
+ modelId: "mlx-community/some-model"
177
+ });
178
+ const cloud = new MockLanguageModelV3({
179
+ modelId: "frontier-cloud",
180
+ doGenerate: async () => ({
181
+ content: [{ type: "text", text: "cloud handled it" }],
182
+ finishReason: { unified: "stop", raw: "stop" },
183
+ usage: {
184
+ inputTokens: {
185
+ total: 1,
186
+ noCache: 1,
187
+ cacheRead: undefined,
188
+ cacheWrite: undefined
189
+ },
190
+ outputTokens: { total: 1, text: 1, reasoning: undefined }
191
+ },
192
+ warnings: []
193
+ })
194
+ });
195
+ const model = handoffModel({ local, cloud });
196
+ const result = await generateText({ model, prompt: "hello" });
197
+ assert.equal(result.text, "cloud handled it");
198
+ });
@@ -0,0 +1 @@
1
+ export {};