@fusionkit/adapter-ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +23 -0
- package/dist/index.js +17 -0
- package/dist/managed-server.d.ts +102 -0
- package/dist/managed-server.js +348 -0
- package/dist/mlx-env.d.ts +178 -0
- package/dist/mlx-env.js +371 -0
- package/dist/model.d.ts +88 -0
- package/dist/model.js +149 -0
- package/dist/remote-tools.d.ts +56 -0
- package/dist/remote-tools.js +57 -0
- package/dist/routed-model.d.ts +88 -0
- package/dist/routed-model.js +218 -0
- package/dist/swarm-tools.d.ts +149 -0
- package/dist/swarm-tools.js +324 -0
- package/dist/test/golden.test.d.ts +1 -0
- package/dist/test/golden.test.js +129 -0
- package/dist/test/managed-server.test.d.ts +1 -0
- package/dist/test/managed-server.test.js +198 -0
- package/dist/test/mlx-env.test.d.ts +1 -0
- package/dist/test/mlx-env.test.js +351 -0
- package/dist/test/model.test.d.ts +1 -0
- package/dist/test/model.test.js +110 -0
- package/dist/test/remote-tools.test.d.ts +1 -0
- package/dist/test/remote-tools.test.js +151 -0
- package/dist/test/routed-model.test.d.ts +1 -0
- package/dist/test/routed-model.test.js +223 -0
- package/dist/test/swarm-tools.test.d.ts +1 -0
- package/dist/test/swarm-tools.test.js +157 -0
- package/dist/worktree-agent.d.ts +53 -0
- package/dist/worktree-agent.js +303 -0
- package/package.json +39 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { spawnSync } from "node:child_process";
|
|
3
|
+
import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { after, test } from "node:test";
|
|
7
|
+
import { mlxServer } from "../managed-server.js";
|
|
8
|
+
import { MLX_LM_STRUCTURED_PIN, MlxCapabilityError, MlxEnv } from "../mlx-env.js";
|
|
9
|
+
/**
|
|
10
|
+
* Exercises real environment ownership without MLX or macOS: the package
|
|
11
|
+
* spec, import name, and install step are injectable, so these tests
|
|
12
|
+
* provision a genuine venv into a temp dir and "install" a stub module
|
|
13
|
+
* directly into its site-packages — the full own-the-env chain (venv
|
|
14
|
+
* creation, import verification, manifest, repair, destroy) minus the
|
|
15
|
+
* network-dependent pip download.
|
|
16
|
+
*/
|
|
17
|
+
const pythonAvailable = spawnSync("python3", ["-c", "import ensurepip"], { encoding: "utf8" })
|
|
18
|
+
.status === 0;
|
|
19
|
+
const skip = pythonAvailable
|
|
20
|
+
? false
|
|
21
|
+
: "python3 with venv support is not available on this host";
|
|
22
|
+
const uvAvailable = spawnSync("uv", ["--version"], { encoding: "utf8" }).status === 0;
|
|
23
|
+
const skipUv = uvAvailable ? false : "uv is not available on this host";
|
|
24
|
+
const tempDirs = [];
|
|
25
|
+
function tempDir() {
|
|
26
|
+
const dir = mkdtempSync(join(tmpdir(), "warrant-mlxenv-"));
|
|
27
|
+
tempDirs.push(dir);
|
|
28
|
+
return dir;
|
|
29
|
+
}
|
|
30
|
+
after(() => {
|
|
31
|
+
for (const dir of tempDirs)
|
|
32
|
+
rmSync(dir, { recursive: true, force: true });
|
|
33
|
+
});
|
|
34
|
+
/**
|
|
35
|
+
* Write stub modules into the venv's site-packages. Dotted names create
|
|
36
|
+
* package directories (e.g. "mlx_lm.structured.integration" becomes
|
|
37
|
+
* mlx_lm/structured/integration.py with __init__.py files along the way).
|
|
38
|
+
*/
|
|
39
|
+
function stubInstaller(counter, moduleNames = ["warrant_stub"]) {
|
|
40
|
+
return (venvPython, packageSpec, extraPackageSpecs) => {
|
|
41
|
+
counter.installs++;
|
|
42
|
+
counter.specs?.push([packageSpec, ...extraPackageSpecs]);
|
|
43
|
+
const purelib = spawnSync(venvPython, ["-c", "import sysconfig; print(sysconfig.get_paths()['purelib'])"], { encoding: "utf8" }).stdout.trim();
|
|
44
|
+
for (const name of moduleNames) {
|
|
45
|
+
const parts = name.split(".");
|
|
46
|
+
let dir = purelib;
|
|
47
|
+
for (const pkg of parts.slice(0, -1)) {
|
|
48
|
+
dir = join(dir, pkg);
|
|
49
|
+
mkdirSync(dir, { recursive: true });
|
|
50
|
+
writeFileSync(join(dir, "__init__.py"), "");
|
|
51
|
+
}
|
|
52
|
+
writeFileSync(join(dir, `${parts[parts.length - 1]}.py`), "VALUE = 1\n");
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
test("provisions an owned venv, writes the manifest, and is idempotent", { skip }, async () => {
|
|
57
|
+
const dir = tempDir();
|
|
58
|
+
const counter = { installs: 0 };
|
|
59
|
+
const env = new MlxEnv({
|
|
60
|
+
dir,
|
|
61
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
62
|
+
importName: "warrant_stub",
|
|
63
|
+
requirePlatform: false,
|
|
64
|
+
uv: false,
|
|
65
|
+
install: stubInstaller(counter)
|
|
66
|
+
});
|
|
67
|
+
assert.equal(env.verify(), false, "nothing provisioned yet");
|
|
68
|
+
const manifest = await env.ensureProvisioned();
|
|
69
|
+
assert.equal(counter.installs, 1);
|
|
70
|
+
assert.equal(manifest.packageSpec, "warrant-stub==1.0.0");
|
|
71
|
+
assert.equal(manifest.interpreterPath, env.venvPython);
|
|
72
|
+
assert.ok(existsSync(env.venvPython), "venv interpreter exists");
|
|
73
|
+
assert.ok(existsSync(env.manifestPath), "manifest written");
|
|
74
|
+
assert.equal(env.verify(), true);
|
|
75
|
+
// Re-provisioning with a matching manifest is a no-op.
|
|
76
|
+
const again = await env.ensureProvisioned();
|
|
77
|
+
assert.equal(counter.installs, 1, "no second install");
|
|
78
|
+
assert.equal(again.createdAt, manifest.createdAt);
|
|
79
|
+
const info = env.info();
|
|
80
|
+
assert.equal(info.provisioned, true);
|
|
81
|
+
assert.ok(info.diskBytes > 0, "owned dir has a measurable footprint");
|
|
82
|
+
});
|
|
83
|
+
test("a pin change re-provisions the env in place", { skip }, async () => {
|
|
84
|
+
const dir = tempDir();
|
|
85
|
+
const counter = { installs: 0 };
|
|
86
|
+
const v1 = new MlxEnv({
|
|
87
|
+
dir,
|
|
88
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
89
|
+
importName: "warrant_stub",
|
|
90
|
+
requirePlatform: false,
|
|
91
|
+
uv: false,
|
|
92
|
+
install: stubInstaller(counter)
|
|
93
|
+
});
|
|
94
|
+
await v1.ensureProvisioned();
|
|
95
|
+
assert.equal(counter.installs, 1);
|
|
96
|
+
const v2 = new MlxEnv({
|
|
97
|
+
dir,
|
|
98
|
+
packageSpec: "warrant-stub==2.0.0",
|
|
99
|
+
importName: "warrant_stub",
|
|
100
|
+
requirePlatform: false,
|
|
101
|
+
uv: false,
|
|
102
|
+
install: stubInstaller(counter)
|
|
103
|
+
});
|
|
104
|
+
assert.equal(v2.verify(), false, "old manifest does not satisfy the new pin");
|
|
105
|
+
const manifest = await v2.ensureProvisioned();
|
|
106
|
+
assert.equal(counter.installs, 2, "pin bump rebuilt the env");
|
|
107
|
+
assert.equal(manifest.packageSpec, "warrant-stub==2.0.0");
|
|
108
|
+
assert.equal(v2.verify(), true);
|
|
109
|
+
});
|
|
110
|
+
test("prepare() spawns from the owned env with contained caches", { skip }, async () => {
|
|
111
|
+
const dir = tempDir();
|
|
112
|
+
const env = new MlxEnv({
|
|
113
|
+
dir,
|
|
114
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
115
|
+
importName: "warrant_stub",
|
|
116
|
+
requirePlatform: false,
|
|
117
|
+
uv: false,
|
|
118
|
+
install: stubInstaller({ installs: 0 })
|
|
119
|
+
});
|
|
120
|
+
const spec = await env.prepare("mlx-community/test-model", 12345, ["--max-tokens", "64"]);
|
|
121
|
+
assert.equal(spec.cmd, env.venvPython, "always the venv interpreter, never PATH");
|
|
122
|
+
assert.deepEqual(spec.args.slice(0, 3), ["-m", "mlx_lm", "server"]);
|
|
123
|
+
assert.ok(spec.args.includes("mlx-community/test-model"));
|
|
124
|
+
assert.ok(spec.args.includes("12345"));
|
|
125
|
+
assert.ok(spec.args.includes("--max-tokens"));
|
|
126
|
+
assert.equal(spec.env.HF_HOME, env.hfCacheDir, "model cache lives in the owned dir");
|
|
127
|
+
assert.ok(spec.env.HF_HOME.startsWith(dir));
|
|
128
|
+
assert.ok(spec.logFile?.startsWith(dir), "server logs live in the owned dir");
|
|
129
|
+
});
|
|
130
|
+
test("extra package specs are installed, recorded, and verified", { skip }, async () => {
|
|
131
|
+
const dir = tempDir();
|
|
132
|
+
const counter = { installs: 0, specs: [] };
|
|
133
|
+
const env = new MlxEnv({
|
|
134
|
+
dir,
|
|
135
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
136
|
+
extraPackageSpecs: ["outlines-core==0.0.0", "/path/to/overlay"],
|
|
137
|
+
importName: "warrant_stub",
|
|
138
|
+
extraImportNames: ["warrant_overlay_stub"],
|
|
139
|
+
requirePlatform: false,
|
|
140
|
+
uv: false,
|
|
141
|
+
install: stubInstaller(counter, ["warrant_stub", "warrant_overlay_stub"])
|
|
142
|
+
});
|
|
143
|
+
const manifest = await env.ensureProvisioned();
|
|
144
|
+
assert.deepEqual(counter.specs, [
|
|
145
|
+
["warrant-stub==1.0.0", "outlines-core==0.0.0", "/path/to/overlay"]
|
|
146
|
+
]);
|
|
147
|
+
assert.deepEqual(manifest.extraPackageSpecs, [
|
|
148
|
+
"outlines-core==0.0.0",
|
|
149
|
+
"/path/to/overlay"
|
|
150
|
+
]);
|
|
151
|
+
assert.equal(env.verify(), true);
|
|
152
|
+
// Changing the extras invalidates the env: the next ensureProvisioned
|
|
153
|
+
// rebuilds it.
|
|
154
|
+
const changed = new MlxEnv({
|
|
155
|
+
dir,
|
|
156
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
157
|
+
extraPackageSpecs: ["outlines-core==9.9.9", "/path/to/overlay"],
|
|
158
|
+
importName: "warrant_stub",
|
|
159
|
+
extraImportNames: ["warrant_overlay_stub"],
|
|
160
|
+
requirePlatform: false,
|
|
161
|
+
uv: false,
|
|
162
|
+
install: stubInstaller(counter, ["warrant_stub", "warrant_overlay_stub"])
|
|
163
|
+
});
|
|
164
|
+
assert.equal(changed.verify(), false, "extras drift fails verification");
|
|
165
|
+
await changed.ensureProvisioned();
|
|
166
|
+
assert.equal(counter.installs, 2, "extras drift re-provisioned");
|
|
167
|
+
assert.equal(changed.verify(), true);
|
|
168
|
+
});
|
|
169
|
+
test("a missing extra import fails verification", { skip }, async () => {
|
|
170
|
+
const dir = tempDir();
|
|
171
|
+
const env = new MlxEnv({
|
|
172
|
+
dir,
|
|
173
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
174
|
+
extraPackageSpecs: ["/path/to/overlay"],
|
|
175
|
+
importName: "warrant_stub",
|
|
176
|
+
extraImportNames: ["warrant_overlay_stub"],
|
|
177
|
+
requirePlatform: false,
|
|
178
|
+
uv: false,
|
|
179
|
+
// Installs only the primary stub: the overlay import must fail.
|
|
180
|
+
install: stubInstaller({ installs: 0 }, ["warrant_stub"])
|
|
181
|
+
});
|
|
182
|
+
await assert.rejects(() => env.ensureProvisioned(), /cannot import "warrant_stub, warrant_overlay_stub"/);
|
|
183
|
+
assert.equal(env.verify(), false);
|
|
184
|
+
});
|
|
185
|
+
test("a manifest without extras does not satisfy options with extras", { skip }, async () => {
|
|
186
|
+
const dir = tempDir();
|
|
187
|
+
const plain = new MlxEnv({
|
|
188
|
+
dir,
|
|
189
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
190
|
+
importName: "warrant_stub",
|
|
191
|
+
requirePlatform: false,
|
|
192
|
+
uv: false,
|
|
193
|
+
install: stubInstaller({ installs: 0 })
|
|
194
|
+
});
|
|
195
|
+
await plain.ensureProvisioned();
|
|
196
|
+
assert.equal(plain.verify(), true);
|
|
197
|
+
const withExtras = new MlxEnv({
|
|
198
|
+
dir,
|
|
199
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
200
|
+
extraPackageSpecs: ["/path/to/overlay"],
|
|
201
|
+
importName: "warrant_stub",
|
|
202
|
+
requirePlatform: false,
|
|
203
|
+
uv: false,
|
|
204
|
+
install: stubInstaller({ installs: 0 })
|
|
205
|
+
});
|
|
206
|
+
assert.equal(withExtras.verify(), false);
|
|
207
|
+
});
|
|
208
|
+
test("prepare() spawns an overridden server module when configured", { skip }, async () => {
|
|
209
|
+
const dir = tempDir();
|
|
210
|
+
const env = new MlxEnv({
|
|
211
|
+
dir,
|
|
212
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
213
|
+
importName: "warrant_stub",
|
|
214
|
+
serverModule: "my_custom.server",
|
|
215
|
+
requirePlatform: false,
|
|
216
|
+
uv: false,
|
|
217
|
+
install: stubInstaller({ installs: 0 })
|
|
218
|
+
});
|
|
219
|
+
const spec = await env.prepare("mlx-community/test-model", 12345);
|
|
220
|
+
assert.deepEqual(spec.args.slice(0, 2), ["-m", "my_custom.server"]);
|
|
221
|
+
assert.ok(!spec.args.includes("server"), "no stray stock subcommand");
|
|
222
|
+
assert.ok(spec.args.includes("mlx-community/test-model"));
|
|
223
|
+
});
|
|
224
|
+
test("destroy() removes the entire owned footprint", { skip }, async () => {
|
|
225
|
+
const dir = tempDir();
|
|
226
|
+
const env = new MlxEnv({
|
|
227
|
+
dir,
|
|
228
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
229
|
+
importName: "warrant_stub",
|
|
230
|
+
requirePlatform: false,
|
|
231
|
+
uv: false,
|
|
232
|
+
install: stubInstaller({ installs: 0 })
|
|
233
|
+
});
|
|
234
|
+
await env.ensureProvisioned();
|
|
235
|
+
assert.equal(env.verify(), true);
|
|
236
|
+
env.destroy();
|
|
237
|
+
assert.equal(existsSync(dir), false, "env, manifest, caches, and logs are gone");
|
|
238
|
+
assert.equal(env.verify(), false);
|
|
239
|
+
});
|
|
240
|
+
test("mlxServer with structured provisions the self-contained fork", { skip }, async () => {
|
|
241
|
+
const dir = tempDir();
|
|
242
|
+
const counter = { installs: 0, specs: [] };
|
|
243
|
+
const server = mlxServer({
|
|
244
|
+
model: "mlx-community/test-model",
|
|
245
|
+
structured: true,
|
|
246
|
+
env: {
|
|
247
|
+
dir,
|
|
248
|
+
requirePlatform: false,
|
|
249
|
+
uv: false,
|
|
250
|
+
install: stubInstaller(counter, ["mlx_lm", "mlx_lm.structured.integration"])
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
// Drive the env directly (starting the real server needs a model).
|
|
254
|
+
const spec = await server.env.prepare("mlx-community/test-model", 12345);
|
|
255
|
+
assert.equal(counter.installs, 1);
|
|
256
|
+
assert.deepEqual(counter.specs[0], [MLX_LM_STRUCTURED_PIN], "the fork with its [structured] extra is the only spec");
|
|
257
|
+
// The fork keeps the stock entry point; the hooks activate because the
|
|
258
|
+
// structured extra's dependencies import, not via a different module.
|
|
259
|
+
assert.deepEqual(spec.args.slice(0, 3), ["-m", "mlx_lm", "server"]);
|
|
260
|
+
assert.equal(server.env.verify(), true);
|
|
261
|
+
});
|
|
262
|
+
test("structured verification requires the structured subpackage import", { skip }, async () => {
|
|
263
|
+
const env = mlxServer({
|
|
264
|
+
model: "mlx-community/test-model",
|
|
265
|
+
structured: true,
|
|
266
|
+
env: {
|
|
267
|
+
dir: tempDir(),
|
|
268
|
+
requirePlatform: false,
|
|
269
|
+
uv: false,
|
|
270
|
+
// Installs mlx_lm without the structured subpackage: must fail.
|
|
271
|
+
install: stubInstaller({ installs: 0 }, ["mlx_lm"])
|
|
272
|
+
}
|
|
273
|
+
}).env;
|
|
274
|
+
await assert.rejects(() => env.ensureProvisioned(), /cannot import "mlx_lm, mlx_lm\.structured\.integration"/);
|
|
275
|
+
});
|
|
276
|
+
test("explicit env options win over structured defaults", { skip }, async () => {
|
|
277
|
+
const counter = { installs: 0, specs: [] };
|
|
278
|
+
const server = mlxServer({
|
|
279
|
+
model: "mlx-community/test-model",
|
|
280
|
+
structured: true,
|
|
281
|
+
env: {
|
|
282
|
+
dir: tempDir(),
|
|
283
|
+
packageSpec: "mlx-lm[structured] @ git+https://example.invalid/fork@my-rev",
|
|
284
|
+
requirePlatform: false,
|
|
285
|
+
uv: false,
|
|
286
|
+
install: stubInstaller(counter, ["mlx_lm", "mlx_lm.structured.integration"])
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
await server.env.prepare("mlx-community/test-model", 12345);
|
|
290
|
+
assert.equal(counter.specs[0]?.[0], "mlx-lm[structured] @ git+https://example.invalid/fork@my-rev");
|
|
291
|
+
});
|
|
292
|
+
test("mlxServer without structured keeps the stock entry point", { skip }, async () => {
|
|
293
|
+
const server = mlxServer({
|
|
294
|
+
model: "mlx-community/test-model",
|
|
295
|
+
env: {
|
|
296
|
+
dir: tempDir(),
|
|
297
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
298
|
+
importName: "warrant_stub",
|
|
299
|
+
requirePlatform: false,
|
|
300
|
+
uv: false,
|
|
301
|
+
install: stubInstaller({ installs: 0 })
|
|
302
|
+
}
|
|
303
|
+
});
|
|
304
|
+
const spec = await server.env.prepare("mlx-community/test-model", 12345);
|
|
305
|
+
assert.deepEqual(spec.args.slice(0, 3), ["-m", "mlx_lm", "server"]);
|
|
306
|
+
});
|
|
307
|
+
test("structured cannot be combined with a pre-built MlxEnv", () => {
|
|
308
|
+
const env = new MlxEnv({ dir: tempDir(), requirePlatform: false });
|
|
309
|
+
assert.throws(() => mlxServer({ model: "m", env, structured: true }), /configure extraPackageSpecs/);
|
|
310
|
+
});
|
|
311
|
+
test("a missing interpreter is a clear capability error", { skip }, async () => {
|
|
312
|
+
const env = new MlxEnv({
|
|
313
|
+
dir: tempDir(),
|
|
314
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
315
|
+
importName: "warrant_stub",
|
|
316
|
+
requirePlatform: false,
|
|
317
|
+
python: "/definitely/not/a/python"
|
|
318
|
+
});
|
|
319
|
+
await assert.rejects(() => env.ensureProvisioned(), (error) => error instanceof MlxCapabilityError && /no usable Python/.test(error.message));
|
|
320
|
+
});
|
|
321
|
+
test("the platform gate refuses non-Apple-Silicon hosts", { skip: process.platform === "darwin" && process.arch === "arm64" }, async () => {
|
|
322
|
+
const env = new MlxEnv({ dir: tempDir() });
|
|
323
|
+
await assert.rejects(() => env.ensureProvisioned(), (error) => error instanceof MlxCapabilityError &&
|
|
324
|
+
/macOS on Apple Silicon/.test(error.message));
|
|
325
|
+
});
|
|
326
|
+
test("provisions with uv when it is available", { skip: skipUv }, async () => {
|
|
327
|
+
const dir = tempDir();
|
|
328
|
+
const counter = { installs: 0 };
|
|
329
|
+
const env = new MlxEnv({
|
|
330
|
+
dir,
|
|
331
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
332
|
+
importName: "warrant_stub",
|
|
333
|
+
requirePlatform: false,
|
|
334
|
+
install: stubInstaller(counter)
|
|
335
|
+
});
|
|
336
|
+
const manifest = await env.ensureProvisioned();
|
|
337
|
+
assert.match(manifest.toolchain, /^uv /, "uv was preferred over venv+pip");
|
|
338
|
+
assert.equal(counter.installs, 1);
|
|
339
|
+
assert.equal(env.verify(), true);
|
|
340
|
+
assert.ok(existsSync(env.venvPython), "uv-built venv interpreter exists");
|
|
341
|
+
});
|
|
342
|
+
test("an explicitly requested uv that cannot run is an error, not a fallback", async () => {
|
|
343
|
+
const env = new MlxEnv({
|
|
344
|
+
dir: tempDir(),
|
|
345
|
+
packageSpec: "warrant-stub==1.0.0",
|
|
346
|
+
importName: "warrant_stub",
|
|
347
|
+
requirePlatform: false,
|
|
348
|
+
uv: "/definitely/not/a/uv"
|
|
349
|
+
});
|
|
350
|
+
await assert.rejects(() => env.ensureProvisioned(), (error) => error instanceof MlxCapabilityError && /not runnable/.test(error.message));
|
|
351
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { test } from "node:test";
|
|
3
|
+
import { generateText } from "ai";
|
|
4
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
5
|
+
import { handoff, localFirst, targets, triggers } from "@fusionkit/handoff";
|
|
6
|
+
import { handoffModel, withModel } from "../model.js";
|
|
7
|
+
const usage = {
|
|
8
|
+
inputTokens: {
|
|
9
|
+
total: 1,
|
|
10
|
+
noCache: 1,
|
|
11
|
+
cacheRead: undefined,
|
|
12
|
+
cacheWrite: undefined
|
|
13
|
+
},
|
|
14
|
+
outputTokens: { total: 1, text: 1, reasoning: undefined }
|
|
15
|
+
};
|
|
16
|
+
function textModel(id, text) {
|
|
17
|
+
return new MockLanguageModelV3({
|
|
18
|
+
modelId: id,
|
|
19
|
+
doGenerate: async () => ({
|
|
20
|
+
content: [{ type: "text", text }],
|
|
21
|
+
finishReason: { unified: "stop", raw: "stop" },
|
|
22
|
+
usage,
|
|
23
|
+
warnings: []
|
|
24
|
+
})
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
function failingModel(id, message) {
|
|
28
|
+
return new MockLanguageModelV3({
|
|
29
|
+
modelId: id,
|
|
30
|
+
doGenerate: async () => {
|
|
31
|
+
throw new Error(message);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
test("local-first: healthy local model handles the call", async () => {
|
|
36
|
+
const decisions = [];
|
|
37
|
+
const model = handoffModel({
|
|
38
|
+
local: textModel("tiny-local", "answer from local"),
|
|
39
|
+
cloud: textModel("frontier-cloud", "answer from cloud"),
|
|
40
|
+
onDecision: (d) => decisions.push(d)
|
|
41
|
+
});
|
|
42
|
+
const result = await generateText({ model, prompt: "hello" });
|
|
43
|
+
assert.equal(result.text, "answer from local");
|
|
44
|
+
assert.deepEqual(decisions, [
|
|
45
|
+
{
|
|
46
|
+
model: "tiny-local",
|
|
47
|
+
route: "local",
|
|
48
|
+
escalated: false,
|
|
49
|
+
reason: "local-first policy"
|
|
50
|
+
}
|
|
51
|
+
]);
|
|
52
|
+
});
|
|
53
|
+
test("escalates on local failure, classifies overflow, and stays sticky", async () => {
|
|
54
|
+
const decisions = [];
|
|
55
|
+
const model = handoffModel({
|
|
56
|
+
local: failingModel("tiny-local", "prompt exceeds maximum context length"),
|
|
57
|
+
cloud: textModel("frontier-cloud", "cloud handled it"),
|
|
58
|
+
onDecision: (d) => decisions.push(d)
|
|
59
|
+
});
|
|
60
|
+
const first = await generateText({ model, prompt: "long prompt" });
|
|
61
|
+
assert.equal(first.text, "cloud handled it");
|
|
62
|
+
const escalation = decisions.find((d) => d.escalated);
|
|
63
|
+
assert.ok(escalation);
|
|
64
|
+
assert.equal(escalation.route, "cloud");
|
|
65
|
+
assert.match(escalation.reason, /context-overflow/);
|
|
66
|
+
// Sticky: the next call goes straight to cloud without touching local.
|
|
67
|
+
const second = await generateText({ model, prompt: "another" });
|
|
68
|
+
assert.equal(second.text, "cloud handled it");
|
|
69
|
+
const last = decisions.at(-1);
|
|
70
|
+
assert.ok(last);
|
|
71
|
+
assert.equal(last.route, "cloud");
|
|
72
|
+
assert.equal(last.escalated, false);
|
|
73
|
+
assert.match(last.reason, /sticky/);
|
|
74
|
+
});
|
|
75
|
+
test("prompt-size threshold escalates before trying local", async () => {
|
|
76
|
+
const decisions = [];
|
|
77
|
+
const model = handoffModel({
|
|
78
|
+
local: failingModel("tiny-local", "should never be called"),
|
|
79
|
+
cloud: textModel("frontier-cloud", "cloud handled the big prompt"),
|
|
80
|
+
maxLocalPromptBytes: 8,
|
|
81
|
+
onDecision: (d) => decisions.push(d)
|
|
82
|
+
});
|
|
83
|
+
const result = await generateText({
|
|
84
|
+
model,
|
|
85
|
+
prompt: "a prompt comfortably larger than eight bytes"
|
|
86
|
+
});
|
|
87
|
+
assert.equal(result.text, "cloud handled the big prompt");
|
|
88
|
+
const decision = decisions[0];
|
|
89
|
+
assert.ok(decision);
|
|
90
|
+
assert.equal(decision.escalated, true);
|
|
91
|
+
assert.match(decision.reason, /over the local threshold/);
|
|
92
|
+
});
|
|
93
|
+
test("withModel records routing in the trace and gates needs()", async () => {
|
|
94
|
+
const h = withModel(handoff({
|
|
95
|
+
workspace: ".",
|
|
96
|
+
plane: { url: "http://127.0.0.1:9", adminToken: "unused" },
|
|
97
|
+
policy: localFirst({ continueWhen: [triggers.modelEscalated()] })
|
|
98
|
+
}), {
|
|
99
|
+
local: failingModel("tiny-local", "boom"),
|
|
100
|
+
cloud: textModel("frontier-cloud", "recovered in cloud")
|
|
101
|
+
});
|
|
102
|
+
assert.equal(h.needs(targets.pool("eng-prod")), false, "nothing escalated yet");
|
|
103
|
+
const result = await generateText({ model: h.model, prompt: "do the thing" });
|
|
104
|
+
assert.equal(result.text, "recovered in cloud");
|
|
105
|
+
const routed = h.trace().filter((event) => event.type === "model.routed");
|
|
106
|
+
assert.ok(routed.some((event) => event.type === "model.routed" && event.escalated));
|
|
107
|
+
assert.equal(h.needs(targets.pool("eng-prod")), true, "the escalation makes continuation needed");
|
|
108
|
+
const summary = await h.summary();
|
|
109
|
+
assert.equal(summary.modelRoutes.escalations, 1);
|
|
110
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { existsSync, readFileSync, rmSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { after, before, test } from "node:test";
|
|
5
|
+
import { generateText, stepCountIs } from "ai";
|
|
6
|
+
import { MockLanguageModelV3 } from "ai/test";
|
|
7
|
+
import { localFirst } from "@fusionkit/handoff";
|
|
8
|
+
import { PolicyDeniedError } from "@fusionkit/protocol";
|
|
9
|
+
import { makeRepo, startStack } from "@fusionkit/testkit";
|
|
10
|
+
import { remoteTools } from "../remote-tools.js";
|
|
11
|
+
const POOL = "eng-prod";
|
|
12
|
+
let stack;
|
|
13
|
+
let repoDir;
|
|
14
|
+
const usage = {
|
|
15
|
+
inputTokens: {
|
|
16
|
+
total: 1,
|
|
17
|
+
noCache: 1,
|
|
18
|
+
cacheRead: undefined,
|
|
19
|
+
cacheWrite: undefined
|
|
20
|
+
},
|
|
21
|
+
outputTokens: { total: 1, text: 1, reasoning: undefined }
|
|
22
|
+
};
|
|
23
|
+
before(async () => {
|
|
24
|
+
stack = await startStack({
|
|
25
|
+
pool: POOL,
|
|
26
|
+
startRunner: true,
|
|
27
|
+
policy: (policy) => {
|
|
28
|
+
policy.agents.allow = ["command"];
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
repoDir = makeRepo({
|
|
32
|
+
files: { "README.md": "# app-owned loop fixture\n", "data.txt": "alpha beta gamma\n" }
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
after(async () => {
|
|
36
|
+
await stack.stop();
|
|
37
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
38
|
+
});
|
|
39
|
+
test("generateText executes tool calls in governed sessions and pulls results", async () => {
|
|
40
|
+
const rt = remoteTools({
|
|
41
|
+
workspace: repoDir,
|
|
42
|
+
plane: { url: stack.planeUrl, adminToken: stack.adminToken },
|
|
43
|
+
pool: POOL,
|
|
44
|
+
actor: { kind: "human", id: "loop-owner" }
|
|
45
|
+
});
|
|
46
|
+
const command = "wc -w < data.txt > word-count.txt && echo governed-session-output && cat word-count.txt";
|
|
47
|
+
// A scripted two-step model: first request a shell tool call, then close
|
|
48
|
+
// out with text once the (governed) tool result is in the conversation.
|
|
49
|
+
let modelCalls = 0;
|
|
50
|
+
const model = new MockLanguageModelV3({
|
|
51
|
+
doGenerate: async () => {
|
|
52
|
+
modelCalls++;
|
|
53
|
+
if (modelCalls === 1) {
|
|
54
|
+
return {
|
|
55
|
+
content: [
|
|
56
|
+
{
|
|
57
|
+
type: "tool-call",
|
|
58
|
+
toolCallId: "call-1",
|
|
59
|
+
toolName: "shell",
|
|
60
|
+
input: JSON.stringify({ command })
|
|
61
|
+
}
|
|
62
|
+
],
|
|
63
|
+
finishReason: { unified: "tool-calls", raw: "tool-calls" },
|
|
64
|
+
usage,
|
|
65
|
+
warnings: []
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
content: [{ type: "text", text: "the word count is recorded" }],
|
|
70
|
+
finishReason: { unified: "stop", raw: "stop" },
|
|
71
|
+
usage,
|
|
72
|
+
warnings: []
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
const result = await generateText({
|
|
77
|
+
model,
|
|
78
|
+
tools: rt.tools,
|
|
79
|
+
prompt: "count the words in data.txt inside the governed sandbox",
|
|
80
|
+
stopWhen: stepCountIs(2)
|
|
81
|
+
});
|
|
82
|
+
// The loop stayed app-owned: the mock model drove two steps.
|
|
83
|
+
assert.equal(result.text, "the word count is recorded");
|
|
84
|
+
// The tool call executed remotely, with evidence.
|
|
85
|
+
const calls = rt.calls();
|
|
86
|
+
assert.equal(calls.length, 1);
|
|
87
|
+
const call = calls[0];
|
|
88
|
+
assert.ok(call);
|
|
89
|
+
assert.equal(call.toolName, "shell");
|
|
90
|
+
assert.equal(call.command, command);
|
|
91
|
+
assert.equal(call.status, "completed");
|
|
92
|
+
assert.equal(call.exitCode, 0);
|
|
93
|
+
assert.equal(call.receiptVerified, true, "receipt must verify offline");
|
|
94
|
+
assert.equal(call.pullMode, "applied");
|
|
95
|
+
assert.match(call.contractHash, /^[0-9a-f]{64}$/);
|
|
96
|
+
// The tool result the model saw came from the governed session log.
|
|
97
|
+
const toolResult = result.steps
|
|
98
|
+
.flatMap((step) => step.toolResults)
|
|
99
|
+
.find((r) => r.toolName === "shell");
|
|
100
|
+
assert.ok(toolResult);
|
|
101
|
+
const output = toolResult.output;
|
|
102
|
+
assert.ok(output.output.includes("governed-session-output"));
|
|
103
|
+
assert.equal(output.exitCode, 0);
|
|
104
|
+
// The session's workspace output was pulled back into the local repo.
|
|
105
|
+
assert.ok(existsSync(join(repoDir, "word-count.txt")));
|
|
106
|
+
assert.equal(readFileSync(join(repoDir, "word-count.txt"), "utf8").trim(), "3");
|
|
107
|
+
// The continuation trace explains the boundary crossing.
|
|
108
|
+
const types = rt.context.trace().map((event) => event.type);
|
|
109
|
+
assert.ok(types.includes("envelope.created"));
|
|
110
|
+
assert.ok(types.includes("results.pulled"));
|
|
111
|
+
});
|
|
112
|
+
test("tool execution fails closed when continuation policy denies the pool", async () => {
|
|
113
|
+
const rt = remoteTools({
|
|
114
|
+
workspace: repoDir,
|
|
115
|
+
plane: { url: stack.planeUrl, adminToken: stack.adminToken },
|
|
116
|
+
pool: POOL,
|
|
117
|
+
policy: localFirst({ denyPools: [POOL] })
|
|
118
|
+
});
|
|
119
|
+
const execute = rt.tools.shell.execute;
|
|
120
|
+
assert.ok(execute);
|
|
121
|
+
await assert.rejects(() => Promise.resolve(execute({ command: "echo should-not-run" }, { toolCallId: "call-x", messages: [] })), (error) => {
|
|
122
|
+
assert.ok(error instanceof PolicyDeniedError);
|
|
123
|
+
return true;
|
|
124
|
+
});
|
|
125
|
+
assert.equal(rt.calls().length, 0, "denied calls must not produce records");
|
|
126
|
+
});
|
|
127
|
+
test("org policy denies the command harness when not allowlisted", async () => {
|
|
128
|
+
const restricted = await startStack({
|
|
129
|
+
pool: "locked-pool",
|
|
130
|
+
policy: (policy) => {
|
|
131
|
+
policy.agents.allow = ["mock"];
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
try {
|
|
135
|
+
const rt = remoteTools({
|
|
136
|
+
workspace: repoDir,
|
|
137
|
+
plane: { url: restricted.planeUrl, adminToken: restricted.adminToken },
|
|
138
|
+
pool: "locked-pool"
|
|
139
|
+
});
|
|
140
|
+
const execute = rt.tools.shell.execute;
|
|
141
|
+
assert.ok(execute);
|
|
142
|
+
await assert.rejects(() => Promise.resolve(execute({ command: "echo nope" }, { toolCallId: "call-y", messages: [] })), (error) => {
|
|
143
|
+
assert.ok(error instanceof Error);
|
|
144
|
+
assert.match(error.message, /not allowed/);
|
|
145
|
+
return true;
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
finally {
|
|
149
|
+
await restricted.stop();
|
|
150
|
+
}
|
|
151
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|