@fusionkit/adapter-ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +23 -0
- package/dist/index.js +17 -0
- package/dist/managed-server.d.ts +102 -0
- package/dist/managed-server.js +348 -0
- package/dist/mlx-env.d.ts +178 -0
- package/dist/mlx-env.js +371 -0
- package/dist/model.d.ts +88 -0
- package/dist/model.js +149 -0
- package/dist/remote-tools.d.ts +56 -0
- package/dist/remote-tools.js +57 -0
- package/dist/routed-model.d.ts +88 -0
- package/dist/routed-model.js +218 -0
- package/dist/swarm-tools.d.ts +149 -0
- package/dist/swarm-tools.js +324 -0
- package/dist/test/golden.test.d.ts +1 -0
- package/dist/test/golden.test.js +129 -0
- package/dist/test/managed-server.test.d.ts +1 -0
- package/dist/test/managed-server.test.js +198 -0
- package/dist/test/mlx-env.test.d.ts +1 -0
- package/dist/test/mlx-env.test.js +351 -0
- package/dist/test/model.test.d.ts +1 -0
- package/dist/test/model.test.js +110 -0
- package/dist/test/remote-tools.test.d.ts +1 -0
- package/dist/test/remote-tools.test.js +151 -0
- package/dist/test/routed-model.test.d.ts +1 -0
- package/dist/test/routed-model.test.js +223 -0
- package/dist/test/swarm-tools.test.d.ts +1 -0
- package/dist/test/swarm-tools.test.js +157 -0
- package/dist/worktree-agent.d.ts +53 -0
- package/dist/worktree-agent.js +303 -0
- package/package.json +39 -0
package/dist/mlx-env.js
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import { spawnSync } from "node:child_process";
|
|
2
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { delimiter, dirname, join } from "node:path";
|
|
5
|
+
/**
|
|
6
|
+
* Warrant-owned MLX environment.
|
|
7
|
+
*
|
|
8
|
+
* The managed MLX backend does not shell out to whatever `mlx_lm.server`
|
|
9
|
+
* happens to be on PATH — it owns the entire stack. This provisioner
|
|
10
|
+
* materializes and maintains a dedicated directory containing:
|
|
11
|
+
*
|
|
12
|
+
* <dir>/venv/ a private Python venv with mlx-lm at an exact pin
|
|
13
|
+
* <dir>/env.json a manifest of what was provisioned (and from where)
|
|
14
|
+
* <dir>/hf-cache/ HF_HOME, so model weights live inside the owned dir
|
|
15
|
+
* <dir>/logs/ server stdout/stderr
|
|
16
|
+
*
|
|
17
|
+
* The whole footprint is one directory: inspectable (info), verifiable
|
|
18
|
+
* (verify), repairable (re-provision on pin mismatch), and removable
|
|
19
|
+
* (destroy). The server process is always spawned via the venv's own
|
|
20
|
+
* interpreter — never a PATH lookup.
|
|
21
|
+
*
|
|
22
|
+
* The mlx-lm pin follows the same trusted-pin policy as the repo's npm
|
|
23
|
+
* allowlist: exact version, bumped only as a reviewed code change.
|
|
24
|
+
*
|
|
25
|
+
* Toolchain: provisioning prefers `uv` when available (an explicit path,
|
|
26
|
+
* WARRANT_UV, or PATH discovery) — it is much faster and can supply its own
|
|
27
|
+
* managed CPython, removing even the system-python requirement. Without uv
|
|
28
|
+
* it falls back to stdlib `python3 -m venv` + pip, so uv is an upgrade,
|
|
29
|
+
* never a dependency. uv's caches and managed interpreters are contained
|
|
30
|
+
* inside the owned directory, so destroy() removes them too.
|
|
31
|
+
*/
|
|
32
|
+
/** Exact-pinned mlx-lm version this provisioner installs. */
|
|
33
|
+
export const MLX_LM_PIN = "0.31.3";
|
|
34
|
+
/**
|
|
35
|
+
* The velum-labs/mlx-lm fork installed in structured mode: upstream mlx-lm
|
|
36
|
+
* plus the self-contained mlx_lm.structured package (see the fork's
|
|
37
|
+
* STRUCTURED.md). Pinned to the current reviewed head of the fork's main
|
|
38
|
+
* branch; refresh this SHA when we intentionally pick up fork fixes.
|
|
39
|
+
*/
|
|
40
|
+
export const MLX_LM_STRUCTURED_PIN = "mlx-lm[structured] @ git+https://github.com/velum-labs/mlx-lm@2ee2d570d365a1fcee9ba90a298f1bae865fccda";
|
|
41
|
+
/** Python version requested from uv (which can download it if absent). */
|
|
42
|
+
export const PYTHON_PIN = "3.12";
|
|
43
|
+
/** Minimum Python the venv may be built from. */
|
|
44
|
+
const MIN_PYTHON = { major: 3, minor: 9 };
|
|
45
|
+
/** Default owned directory for the MLX stack. */
|
|
46
|
+
export function defaultMlxDir() {
|
|
47
|
+
return join(homedir(), ".warrant", "mlx");
|
|
48
|
+
}
|
|
49
|
+
/** A capability the current host cannot satisfy (wrong OS, no Python). */
|
|
50
|
+
export class MlxCapabilityError extends Error {
|
|
51
|
+
code = "capability_mismatch";
|
|
52
|
+
constructor(message) {
|
|
53
|
+
super(message);
|
|
54
|
+
this.name = "MlxCapabilityError";
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/** Run a command; `extraEnv` overlays (never replaces) the process env. */
|
|
58
|
+
function run(cmd, args, extraEnv) {
|
|
59
|
+
const result = spawnSync(cmd, args, {
|
|
60
|
+
encoding: "utf8",
|
|
61
|
+
...(extraEnv ? { env: { ...process.env, ...extraEnv } } : {})
|
|
62
|
+
});
|
|
63
|
+
if (result.error) {
|
|
64
|
+
return { status: 127, stdout: "", stderr: result.error.message };
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
status: result.status ?? 1,
|
|
68
|
+
stdout: result.stdout ?? "",
|
|
69
|
+
stderr: result.stderr ?? ""
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
function directorySizeBytes(dir) {
|
|
73
|
+
let total = 0;
|
|
74
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
75
|
+
const full = join(dir, entry.name);
|
|
76
|
+
if (entry.isDirectory())
|
|
77
|
+
total += directorySizeBytes(full);
|
|
78
|
+
else if (entry.isFile())
|
|
79
|
+
total += statSync(full).size;
|
|
80
|
+
}
|
|
81
|
+
return total;
|
|
82
|
+
}
|
|
83
|
+
export class MlxEnv {
|
|
84
|
+
dir;
|
|
85
|
+
packageSpec;
|
|
86
|
+
extraPackageSpecs;
|
|
87
|
+
importName;
|
|
88
|
+
extraImportNames;
|
|
89
|
+
serverModule;
|
|
90
|
+
requirePlatform;
|
|
91
|
+
explicitPython;
|
|
92
|
+
uvOption;
|
|
93
|
+
pythonVersion;
|
|
94
|
+
installHook;
|
|
95
|
+
provisionPromise;
|
|
96
|
+
constructor(options = {}) {
|
|
97
|
+
this.dir = options.dir ?? defaultMlxDir();
|
|
98
|
+
this.packageSpec = options.packageSpec ?? `mlx-lm==${MLX_LM_PIN}`;
|
|
99
|
+
this.extraPackageSpecs = options.extraPackageSpecs ?? [];
|
|
100
|
+
this.importName = options.importName ?? "mlx_lm";
|
|
101
|
+
this.extraImportNames = options.extraImportNames ?? [];
|
|
102
|
+
this.serverModule = options.serverModule;
|
|
103
|
+
this.requirePlatform = options.requirePlatform ?? true;
|
|
104
|
+
this.explicitPython = options.python;
|
|
105
|
+
this.uvOption = options.uv;
|
|
106
|
+
this.pythonVersion = options.pythonVersion ?? PYTHON_PIN;
|
|
107
|
+
this.installHook = options.install;
|
|
108
|
+
}
|
|
109
|
+
get manifestPath() {
|
|
110
|
+
return join(this.dir, "env.json");
|
|
111
|
+
}
|
|
112
|
+
get venvDir() {
|
|
113
|
+
return join(this.dir, "venv");
|
|
114
|
+
}
|
|
115
|
+
get venvPython() {
|
|
116
|
+
const binDir = process.platform === "win32" ? "Scripts" : "bin";
|
|
117
|
+
const exe = process.platform === "win32" ? "python.exe" : "python";
|
|
118
|
+
return join(this.venvDir, binDir, exe);
|
|
119
|
+
}
|
|
120
|
+
get hfCacheDir() {
|
|
121
|
+
return join(this.dir, "hf-cache");
|
|
122
|
+
}
|
|
123
|
+
get logsDir() {
|
|
124
|
+
return join(this.dir, "logs");
|
|
125
|
+
}
|
|
126
|
+
/** uv's caches and managed interpreters, contained in the owned dir. */
|
|
127
|
+
get uvEnv() {
|
|
128
|
+
return {
|
|
129
|
+
UV_CACHE_DIR: join(this.dir, "uv-cache"),
|
|
130
|
+
UV_PYTHON_INSTALL_DIR: join(this.dir, "uv-python")
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
readManifest() {
|
|
134
|
+
if (!existsSync(this.manifestPath))
|
|
135
|
+
return undefined;
|
|
136
|
+
try {
|
|
137
|
+
const parsed = JSON.parse(readFileSync(this.manifestPath, "utf8"));
|
|
138
|
+
if (parsed.version !== "warrant.mlxenv.v1" ||
|
|
139
|
+
typeof parsed.packageSpec !== "string" ||
|
|
140
|
+
typeof parsed.interpreterPath !== "string") {
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
if (parsed.extraPackageSpecs !== undefined &&
|
|
144
|
+
!(Array.isArray(parsed.extraPackageSpecs) &&
|
|
145
|
+
parsed.extraPackageSpecs.every((spec) => typeof spec === "string"))) {
|
|
146
|
+
return undefined;
|
|
147
|
+
}
|
|
148
|
+
return parsed;
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
return undefined;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
assertPlatform() {
|
|
155
|
+
if (!this.requirePlatform)
|
|
156
|
+
return;
|
|
157
|
+
if (process.platform !== "darwin" || process.arch !== "arm64") {
|
|
158
|
+
throw new MlxCapabilityError(`MLX requires macOS on Apple Silicon; this host is ${process.platform}/${process.arch}. ` +
|
|
159
|
+
"Use a cloud model (or handoffModel escalation) on this machine.");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Pick the provisioning toolchain. An explicit `python` option forces
|
|
164
|
+
* stdlib venv+pip with that interpreter; otherwise uv is preferred
|
|
165
|
+
* (explicit path, WARRANT_UV, or PATH discovery) with venv+pip as the
|
|
166
|
+
* no-extra-requirements fallback.
|
|
167
|
+
*/
|
|
168
|
+
resolveToolchain() {
|
|
169
|
+
if (this.explicitPython !== undefined) {
|
|
170
|
+
return { kind: "venv-pip", python: this.checkPython(this.explicitPython) };
|
|
171
|
+
}
|
|
172
|
+
if (this.uvOption !== false) {
|
|
173
|
+
const explicitUv = this.uvOption ?? process.env.WARRANT_UV;
|
|
174
|
+
const candidate = explicitUv ?? "uv";
|
|
175
|
+
const probe = run(candidate, ["--version"]);
|
|
176
|
+
if (probe.status === 0) {
|
|
177
|
+
return {
|
|
178
|
+
kind: "uv",
|
|
179
|
+
bin: candidate,
|
|
180
|
+
version: probe.stdout.trim().replace(/^uv\s+/, "")
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
// An explicitly requested uv that does not run is an error, not a
|
|
184
|
+
// silent fallback; PATH discovery falling through is expected.
|
|
185
|
+
if (explicitUv !== undefined) {
|
|
186
|
+
throw new MlxCapabilityError(`requested uv ("${candidate}") is not runnable: ${probe.stderr.trim() || "not found"}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return { kind: "venv-pip", python: this.checkPython("python3") };
|
|
190
|
+
}
|
|
191
|
+
/** Sanity-check a base interpreter for the stdlib venv+pip path. */
|
|
192
|
+
checkPython(candidate) {
|
|
193
|
+
const probe = run(candidate, [
|
|
194
|
+
"-c",
|
|
195
|
+
"import sys; print(f'{sys.version_info[0]}.{sys.version_info[1]}')"
|
|
196
|
+
]);
|
|
197
|
+
if (probe.status !== 0) {
|
|
198
|
+
throw new MlxCapabilityError(`no usable Python interpreter ("${candidate}"): ${probe.stderr.trim() || "not found"} ` +
|
|
199
|
+
"(install python3, or install uv and Warrant will manage Python itself)");
|
|
200
|
+
}
|
|
201
|
+
const [major = 0, minor = 0] = probe.stdout.trim().split(".").map(Number);
|
|
202
|
+
if (major < MIN_PYTHON.major ||
|
|
203
|
+
(major === MIN_PYTHON.major && minor < MIN_PYTHON.minor)) {
|
|
204
|
+
throw new MlxCapabilityError(`Python ${probe.stdout.trim()} is too old; mlx-lm needs >= ${MIN_PYTHON.major}.${MIN_PYTHON.minor}`);
|
|
205
|
+
}
|
|
206
|
+
return candidate;
|
|
207
|
+
}
|
|
208
|
+
/** Does the venv interpreter exist and import the managed packages? */
|
|
209
|
+
importWorks() {
|
|
210
|
+
if (!existsSync(this.venvPython))
|
|
211
|
+
return false;
|
|
212
|
+
const imports = [this.importName, ...this.extraImportNames].join(", ");
|
|
213
|
+
return run(this.venvPython, ["-c", `import ${imports}`]).status === 0;
|
|
214
|
+
}
|
|
215
|
+
extrasMatch(manifest) {
|
|
216
|
+
const recorded = manifest.extraPackageSpecs ?? [];
|
|
217
|
+
return (recorded.length === this.extraPackageSpecs.length &&
|
|
218
|
+
recorded.every((spec, i) => spec === this.extraPackageSpecs[i]));
|
|
219
|
+
}
|
|
220
|
+
/** Manifest matches the current pins and the env actually works. */
|
|
221
|
+
verify() {
|
|
222
|
+
const manifest = this.readManifest();
|
|
223
|
+
return (manifest !== undefined &&
|
|
224
|
+
manifest.packageSpec === this.packageSpec &&
|
|
225
|
+
this.extrasMatch(manifest) &&
|
|
226
|
+
manifest.importName === this.importName &&
|
|
227
|
+
this.importWorks());
|
|
228
|
+
}
|
|
229
|
+
/** Manifest plus on-disk footprint of the owned directory. */
|
|
230
|
+
info() {
|
|
231
|
+
const manifest = this.readManifest();
|
|
232
|
+
return {
|
|
233
|
+
dir: this.dir,
|
|
234
|
+
provisioned: this.verify(),
|
|
235
|
+
...(manifest ? { manifest } : {}),
|
|
236
|
+
diskBytes: existsSync(this.dir) ? directorySizeBytes(this.dir) : 0
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
/** Remove the entire owned footprint: venv, manifest, weights, logs. */
|
|
240
|
+
destroy() {
|
|
241
|
+
this.provisionPromise = undefined;
|
|
242
|
+
rmSync(this.dir, { recursive: true, force: true });
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Idempotently provision the env. A matching manifest plus a passing
|
|
246
|
+
* import check is a no-op; anything else (fresh host, pin bump, broken
|
|
247
|
+
* venv) provisions in place. Concurrent callers share one provision run.
|
|
248
|
+
*/
|
|
249
|
+
ensureProvisioned() {
|
|
250
|
+
const existing = this.readManifest();
|
|
251
|
+
if (existing && this.verify())
|
|
252
|
+
return Promise.resolve(existing);
|
|
253
|
+
if (!this.provisionPromise) {
|
|
254
|
+
this.provisionPromise = this.provision().finally(() => {
|
|
255
|
+
this.provisionPromise = undefined;
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
return this.provisionPromise;
|
|
259
|
+
}
|
|
260
|
+
async provision() {
|
|
261
|
+
this.assertPlatform();
|
|
262
|
+
const toolchain = this.resolveToolchain();
|
|
263
|
+
mkdirSync(this.dir, { recursive: true });
|
|
264
|
+
mkdirSync(this.hfCacheDir, { recursive: true });
|
|
265
|
+
mkdirSync(this.logsDir, { recursive: true });
|
|
266
|
+
// A stale or pin-mismatched venv is rebuilt from scratch rather than
|
|
267
|
+
// upgraded in place: rebuilds are cheap and exact, upgrades are neither.
|
|
268
|
+
if (existsSync(this.venvDir)) {
|
|
269
|
+
rmSync(this.venvDir, { recursive: true, force: true });
|
|
270
|
+
}
|
|
271
|
+
this.createVenv(toolchain);
|
|
272
|
+
if (this.installHook) {
|
|
273
|
+
this.installHook(this.venvPython, this.packageSpec, this.extraPackageSpecs);
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
this.installPackages(toolchain);
|
|
277
|
+
}
|
|
278
|
+
if (!this.importWorks()) {
|
|
279
|
+
const imports = [this.importName, ...this.extraImportNames].join(", ");
|
|
280
|
+
throw new Error(`provisioned env cannot import "${imports}"; the install is broken`);
|
|
281
|
+
}
|
|
282
|
+
const versionProbe = run(this.venvPython, [
|
|
283
|
+
"-c",
|
|
284
|
+
"import sys; print(f'{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}')"
|
|
285
|
+
]);
|
|
286
|
+
const manifest = {
|
|
287
|
+
version: "warrant.mlxenv.v1",
|
|
288
|
+
packageSpec: this.packageSpec,
|
|
289
|
+
extraPackageSpecs: this.extraPackageSpecs,
|
|
290
|
+
importName: this.importName,
|
|
291
|
+
toolchain: toolchain.kind === "uv"
|
|
292
|
+
? `uv ${toolchain.version}`
|
|
293
|
+
: `venv+pip via ${toolchain.python}`,
|
|
294
|
+
interpreterPath: this.venvPython,
|
|
295
|
+
pythonVersion: versionProbe.stdout.trim(),
|
|
296
|
+
createdAt: new Date().toISOString()
|
|
297
|
+
};
|
|
298
|
+
writeFileSync(this.manifestPath, JSON.stringify(manifest, null, 2));
|
|
299
|
+
return manifest;
|
|
300
|
+
}
|
|
301
|
+
createVenv(toolchain) {
|
|
302
|
+
if (toolchain.kind === "uv") {
|
|
303
|
+
// uv resolves the pinned Python from the system or downloads a
|
|
304
|
+
// managed CPython into the owned dir — no system-python requirement.
|
|
305
|
+
const result = run(toolchain.bin, ["venv", "--python", this.pythonVersion, this.venvDir], this.uvEnv);
|
|
306
|
+
if (result.status !== 0) {
|
|
307
|
+
throw new MlxCapabilityError(`uv venv (python ${this.pythonVersion}) failed: ${result.stderr.trim() || result.stdout.trim()}`);
|
|
308
|
+
}
|
|
309
|
+
return;
|
|
310
|
+
}
|
|
311
|
+
const result = run(toolchain.python, ["-m", "venv", this.venvDir]);
|
|
312
|
+
if (result.status !== 0) {
|
|
313
|
+
throw new MlxCapabilityError(`failed to create venv with ${toolchain.python} -m venv: ${result.stderr.trim() || result.stdout.trim()}`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
installPackages(toolchain) {
|
|
317
|
+
const specs = [this.packageSpec, ...this.extraPackageSpecs];
|
|
318
|
+
const result = toolchain.kind === "uv"
|
|
319
|
+
? run(toolchain.bin, ["pip", "install", "--python", this.venvPython, ...specs], this.uvEnv)
|
|
320
|
+
: run(this.venvPython, [
|
|
321
|
+
"-m",
|
|
322
|
+
"pip",
|
|
323
|
+
"install",
|
|
324
|
+
"--no-input",
|
|
325
|
+
"--disable-pip-version-check",
|
|
326
|
+
...specs
|
|
327
|
+
]);
|
|
328
|
+
if (result.status !== 0) {
|
|
329
|
+
throw new Error(`installing ${specs.join(", ")} failed: ${result.stderr.trim().slice(-2000)}`);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Provision (if needed) and produce the spawn spec for the server:
|
|
334
|
+
* the venv's interpreter running `-m mlx_lm server` with a minimal,
|
|
335
|
+
* explicit environment whose caches live inside the owned dir.
|
|
336
|
+
*/
|
|
337
|
+
async prepare(model, port, extraArgs = []) {
|
|
338
|
+
await this.ensureProvisioned();
|
|
339
|
+
// The stock entry point is the `server` subcommand of the mlx_lm
|
|
340
|
+
// module; an override (e.g. the structured overlay) is a module that is
|
|
341
|
+
// itself the server and takes the same flags.
|
|
342
|
+
const moduleArgs = this.serverModule
|
|
343
|
+
? ["-m", this.serverModule]
|
|
344
|
+
: ["-m", "mlx_lm", "server"];
|
|
345
|
+
return {
|
|
346
|
+
cmd: this.venvPython,
|
|
347
|
+
args: [
|
|
348
|
+
...moduleArgs,
|
|
349
|
+
"--model",
|
|
350
|
+
model,
|
|
351
|
+
"--host",
|
|
352
|
+
"127.0.0.1",
|
|
353
|
+
"--port",
|
|
354
|
+
String(port),
|
|
355
|
+
...extraArgs
|
|
356
|
+
],
|
|
357
|
+
env: {
|
|
358
|
+
// Explicit, minimal environment: the venv's bin first (so any
|
|
359
|
+
// helper the server execs resolves inside the env), model caches
|
|
360
|
+
// contained in the owned dir, no inherited surprises.
|
|
361
|
+
PATH: [dirname(this.venvPython), "/usr/bin", "/bin"].join(delimiter),
|
|
362
|
+
HOME: homedir(),
|
|
363
|
+
HF_HOME: this.hfCacheDir,
|
|
364
|
+
HF_HUB_DISABLE_TELEMETRY: "1",
|
|
365
|
+
VIRTUAL_ENV: this.venvDir
|
|
366
|
+
},
|
|
367
|
+
cwd: this.dir,
|
|
368
|
+
logFile: join(this.logsDir, "server.log")
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
}
|
package/dist/model.d.ts
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { LanguageModelV3, LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3StreamResult } from "@ai-sdk/provider";
|
|
2
|
+
import type { Handoff, ModelDecision } from "@fusionkit/handoff";
|
|
3
|
+
/**
|
|
4
|
+
* Why a call left the local model. Deterministic and observable: an error
|
|
5
|
+
* from the local provider, a context/token-length failure (classified from
|
|
6
|
+
* the error), a prompt-size threshold, or sticky escalation after a prior
|
|
7
|
+
* failure.
|
|
8
|
+
*/
|
|
9
|
+
export type EscalationReason = "local-error" | "context-overflow" | "prompt-threshold" | "sticky";
|
|
10
|
+
export type HandoffModelConfig = {
|
|
11
|
+
/** The model work starts on. */
|
|
12
|
+
local: LanguageModelV3;
|
|
13
|
+
/** The model work escalates to. */
|
|
14
|
+
cloud: LanguageModelV3;
|
|
15
|
+
/**
|
|
16
|
+
* Escalate without trying local when the serialized prompt exceeds this
|
|
17
|
+
* many bytes — the deterministic stand-in for "context too large".
|
|
18
|
+
*/
|
|
19
|
+
maxLocalPromptBytes?: number;
|
|
20
|
+
/**
|
|
21
|
+
* Once escalated, stay on the cloud model for subsequent calls in this
|
|
22
|
+
* model instance. Defaults to true: thrash-free and easier to reason
|
|
23
|
+
* about than per-call retries.
|
|
24
|
+
*/
|
|
25
|
+
sticky?: boolean;
|
|
26
|
+
/** Observer for every routing decision (withModel wires this to h.trace). */
|
|
27
|
+
onDecision?: (decision: ModelDecision) => void;
|
|
28
|
+
/**
|
|
29
|
+
* Override the overflow classifier. Providers do not standardize
|
|
30
|
+
* context-overflow errors, so the default is a message heuristic; supply
|
|
31
|
+
* a provider-specific predicate when you know the exact error shape.
|
|
32
|
+
*/
|
|
33
|
+
isContextOverflow?: (error: unknown) => boolean;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* An AI SDK-compatible model that starts local and escalates to cloud
|
|
37
|
+
* under deterministic, explainable conditions. Honest semantics:
|
|
38
|
+
*
|
|
39
|
+
* - Escalation happens *between* generate/stream calls (a failed local call
|
|
40
|
+
* is retried in full on the cloud model). There is no mid-generation
|
|
41
|
+
* handoff: once a stream has started emitting, it belongs to the model
|
|
42
|
+
* that produced it. A local stream that fails to *start* escalates; a
|
|
43
|
+
* stream that dies midway surfaces the error to the caller.
|
|
44
|
+
* - Every routing decision is reported via onDecision, so a Handoff context
|
|
45
|
+
* can record it (`model.routed` trace events) and triggers.modelEscalated()
|
|
46
|
+
* can gate continuation.
|
|
47
|
+
*/
|
|
48
|
+
export declare class HandoffModel implements LanguageModelV3 {
|
|
49
|
+
readonly specificationVersion: "v3";
|
|
50
|
+
readonly provider = "warrant-handoff";
|
|
51
|
+
readonly modelId: string;
|
|
52
|
+
private readonly config;
|
|
53
|
+
private escalatedSticky;
|
|
54
|
+
constructor(config: HandoffModelConfig);
|
|
55
|
+
get supportedUrls(): LanguageModelV3["supportedUrls"];
|
|
56
|
+
private decide;
|
|
57
|
+
private note;
|
|
58
|
+
private markEscalated;
|
|
59
|
+
/**
|
|
60
|
+
* The one dispatch shared by doGenerate and doStream: route per `decide`,
|
|
61
|
+
* try local, classify a local failure, escalate to cloud, and report
|
|
62
|
+
* every decision. The two entry points differ only in which provider
|
|
63
|
+
* method runs and how a local failure is phrased.
|
|
64
|
+
*/
|
|
65
|
+
private dispatch;
|
|
66
|
+
doGenerate(options: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult>;
|
|
67
|
+
doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
|
|
68
|
+
}
|
|
69
|
+
/** Create an escalating local-first model. */
|
|
70
|
+
export declare function handoffModel(config: HandoffModelConfig): HandoffModel;
|
|
71
|
+
/**
|
|
72
|
+
* Attach a model to a continuation context as `h.model`. The single
|
|
73
|
+
* golden-shape attach used by both `withModel` and `withRoutedModel`;
|
|
74
|
+
* decision-to-trace mapping stays with each adapter, the composition does
|
|
75
|
+
* not.
|
|
76
|
+
*/
|
|
77
|
+
export declare function attachModel<H extends Handoff, M>(h: H, model: M): H & {
|
|
78
|
+
model: M;
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* The golden-shape composition for the model half: attach `h.model` to a
|
|
82
|
+
* continuation context. Routing decisions land in the context's trace as
|
|
83
|
+
* `model.routed` events, and escalations make triggers.modelEscalated()
|
|
84
|
+
* fire for `h.needs(...)`.
|
|
85
|
+
*/
|
|
86
|
+
export declare function withModel<H extends Handoff>(h: H, config: Omit<HandoffModelConfig, "onDecision">): H & {
|
|
87
|
+
model: HandoffModel;
|
|
88
|
+
};
|
package/dist/model.js
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// Providers report context overflow as free-text error messages with no
|
|
2
|
+
// standard code, so a message heuristic is the only provider-agnostic
|
|
3
|
+
// default. Either reason escalates to cloud; the classification only
|
|
4
|
+
// affects the recorded reason. Override via config.isContextOverflow.
|
|
5
|
+
const OVERFLOW_PATTERN = /context|token|length|too.?(long|large)/i;
|
|
6
|
+
function classify(error, isOverflow) {
|
|
7
|
+
if (isOverflow)
|
|
8
|
+
return isOverflow(error) ? "context-overflow" : "local-error";
|
|
9
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10
|
+
return OVERFLOW_PATTERN.test(message) ? "context-overflow" : "local-error";
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Deterministic proxy for prompt size: the byte length of the serialized
|
|
14
|
+
* prompt. The threshold gate needs a cheap, stable measure that correlates
|
|
15
|
+
* with token count, not an exact tokenizer (which would be model-specific).
|
|
16
|
+
*/
|
|
17
|
+
function promptBytes(options) {
|
|
18
|
+
try {
|
|
19
|
+
return Buffer.byteLength(JSON.stringify(options.prompt), "utf8");
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return 0;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* An AI SDK-compatible model that starts local and escalates to cloud
|
|
27
|
+
* under deterministic, explainable conditions. Honest semantics:
|
|
28
|
+
*
|
|
29
|
+
* - Escalation happens *between* generate/stream calls (a failed local call
|
|
30
|
+
* is retried in full on the cloud model). There is no mid-generation
|
|
31
|
+
* handoff: once a stream has started emitting, it belongs to the model
|
|
32
|
+
* that produced it. A local stream that fails to *start* escalates; a
|
|
33
|
+
* stream that dies midway surfaces the error to the caller.
|
|
34
|
+
* - Every routing decision is reported via onDecision, so a Handoff context
|
|
35
|
+
* can record it (`model.routed` trace events) and triggers.modelEscalated()
|
|
36
|
+
* can gate continuation.
|
|
37
|
+
*/
|
|
38
|
+
export class HandoffModel {
|
|
39
|
+
specificationVersion = "v3";
|
|
40
|
+
provider = "warrant-handoff";
|
|
41
|
+
modelId;
|
|
42
|
+
config;
|
|
43
|
+
escalatedSticky = false;
|
|
44
|
+
constructor(config) {
|
|
45
|
+
this.config = config;
|
|
46
|
+
this.modelId = `local-first(${config.local.modelId} → ${config.cloud.modelId})`;
|
|
47
|
+
}
|
|
48
|
+
get supportedUrls() {
|
|
49
|
+
return this.config.local.supportedUrls;
|
|
50
|
+
}
|
|
51
|
+
decide(options) {
|
|
52
|
+
if (this.escalatedSticky) {
|
|
53
|
+
return {
|
|
54
|
+
model: this.config.cloud,
|
|
55
|
+
route: "cloud",
|
|
56
|
+
escalated: false,
|
|
57
|
+
reason: "sticky escalation from an earlier local failure"
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
const threshold = this.config.maxLocalPromptBytes;
|
|
61
|
+
if (threshold !== undefined) {
|
|
62
|
+
const bytes = promptBytes(options);
|
|
63
|
+
if (bytes > threshold) {
|
|
64
|
+
return {
|
|
65
|
+
model: this.config.cloud,
|
|
66
|
+
route: "cloud",
|
|
67
|
+
escalated: true,
|
|
68
|
+
reason: `prompt is ${bytes} bytes, over the local threshold of ${threshold}`
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
model: this.config.local,
|
|
74
|
+
route: "local",
|
|
75
|
+
escalated: false,
|
|
76
|
+
reason: "local-first policy"
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
note(route, escalated, reason) {
|
|
80
|
+
this.config.onDecision?.({
|
|
81
|
+
model: route === "local" ? this.config.local.modelId : this.config.cloud.modelId,
|
|
82
|
+
route,
|
|
83
|
+
escalated,
|
|
84
|
+
reason
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
markEscalated() {
|
|
88
|
+
if (this.config.sticky ?? true)
|
|
89
|
+
this.escalatedSticky = true;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* The one dispatch shared by doGenerate and doStream: route per `decide`,
|
|
93
|
+
* try local, classify a local failure, escalate to cloud, and report
|
|
94
|
+
* every decision. The two entry points differ only in which provider
|
|
95
|
+
* method runs and how a local failure is phrased.
|
|
96
|
+
*/
|
|
97
|
+
async dispatch(options, call, localFailurePhrase) {
|
|
98
|
+
const decision = this.decide(options);
|
|
99
|
+
if (decision.route === "cloud") {
|
|
100
|
+
if (decision.escalated)
|
|
101
|
+
this.markEscalated();
|
|
102
|
+
this.note("cloud", decision.escalated, decision.reason);
|
|
103
|
+
return call(this.config.cloud);
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
const result = await call(this.config.local);
|
|
107
|
+
this.note("local", false, decision.reason);
|
|
108
|
+
return result;
|
|
109
|
+
}
|
|
110
|
+
catch (error) {
|
|
111
|
+
const why = classify(error, this.config.isContextOverflow);
|
|
112
|
+
this.markEscalated();
|
|
113
|
+
const reason = `${localFailurePhrase} (${why}): ${error instanceof Error ? error.message : String(error)}`;
|
|
114
|
+
this.note("cloud", true, reason);
|
|
115
|
+
return call(this.config.cloud);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
async doGenerate(options) {
|
|
119
|
+
return this.dispatch(options, (model) => model.doGenerate(options), "local model failed");
|
|
120
|
+
}
|
|
121
|
+
async doStream(options) {
|
|
122
|
+
return this.dispatch(options, (model) => model.doStream(options), "local model failed to start streaming");
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/** Create an escalating local-first model. */
|
|
126
|
+
export function handoffModel(config) {
|
|
127
|
+
return new HandoffModel(config);
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Attach a model to a continuation context as `h.model`. The single
|
|
131
|
+
* golden-shape attach used by both `withModel` and `withRoutedModel`;
|
|
132
|
+
* decision-to-trace mapping stays with each adapter, the composition does
|
|
133
|
+
* not.
|
|
134
|
+
*/
|
|
135
|
+
export function attachModel(h, model) {
|
|
136
|
+
return Object.assign(h, { model });
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* The golden-shape composition for the model half: attach `h.model` to a
|
|
140
|
+
* continuation context. Routing decisions land in the context's trace as
|
|
141
|
+
* `model.routed` events, and escalations make triggers.modelEscalated()
|
|
142
|
+
* fire for `h.needs(...)`.
|
|
143
|
+
*/
|
|
144
|
+
export function withModel(h, config) {
|
|
145
|
+
return attachModel(h, handoffModel({
|
|
146
|
+
...config,
|
|
147
|
+
onDecision: (decision) => h.noteModelDecision(decision)
|
|
148
|
+
}));
|
|
149
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Tool } from "ai";
|
|
2
|
+
import { Handoff } from "@fusionkit/handoff";
|
|
3
|
+
import type { CommandHarnessConfig, GovernedRunRecord } from "@fusionkit/handoff";
|
|
4
|
+
import type { RunStatus } from "@fusionkit/protocol";
|
|
5
|
+
export type RemoteToolsConfig = CommandHarnessConfig & {
|
|
6
|
+
/** Pull workspace changes back after each call. Defaults to true. */
|
|
7
|
+
pullResults?: boolean;
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Alternative wiring: attach the remote tools to an existing continuation
|
|
11
|
+
* context (e.g. the golden-interface `handoff(...)`) so tool calls,
|
|
12
|
+
* continuations, and sandbox commands share one workspace, policy, and
|
|
13
|
+
* trace instead of forking a second context.
|
|
14
|
+
*/
|
|
15
|
+
export type RemoteToolsContextConfig = {
|
|
16
|
+
context: Handoff;
|
|
17
|
+
/** Runner pool that executes the tool calls. */
|
|
18
|
+
pool: string;
|
|
19
|
+
/** Pull workspace changes back after each call. Defaults to true. */
|
|
20
|
+
pullResults?: boolean;
|
|
21
|
+
/** Per-call wait ceiling. Defaults to 5 minutes. */
|
|
22
|
+
timeoutMs?: number;
|
|
23
|
+
};
|
|
24
|
+
export type ShellToolInput = {
|
|
25
|
+
command: string;
|
|
26
|
+
};
|
|
27
|
+
export type ShellToolOutput = {
|
|
28
|
+
runId: string;
|
|
29
|
+
status: RunStatus;
|
|
30
|
+
exitCode: number | undefined;
|
|
31
|
+
output: string;
|
|
32
|
+
};
|
|
33
|
+
export type RemoteToolCallRecord = GovernedRunRecord & {
|
|
34
|
+
toolName: "shell";
|
|
35
|
+
};
|
|
36
|
+
export type RemoteToolSet = {
|
|
37
|
+
shell: Tool<ShellToolInput, ShellToolOutput>;
|
|
38
|
+
};
|
|
39
|
+
export type RemoteTools = {
|
|
40
|
+
/** AI SDK-compatible tools; pass directly to generateText/streamText. */
|
|
41
|
+
tools: RemoteToolSet;
|
|
42
|
+
/** One record per executed tool call: run id, receipt hash, verification. */
|
|
43
|
+
calls(): RemoteToolCallRecord[];
|
|
44
|
+
/** The underlying continuation context (trace, lastEnvelope, …). */
|
|
45
|
+
context: Handoff;
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* App-owned loops, honestly labeled (spec §6.2): the model loop stays in the
|
|
49
|
+
* caller's process and carries no durability claim. What Warrant adds is the
|
|
50
|
+
* execution boundary — every tool call becomes a signed run contract executed
|
|
51
|
+
* in a governed session and returns alongside an offline-verifiable receipt.
|
|
52
|
+
*
|
|
53
|
+
* There is no `handoff-needed` stream event and no mid-generation
|
|
54
|
+
* continuation; those are deliberately out of scope.
|
|
55
|
+
*/
|
|
56
|
+
export declare function remoteTools(config: RemoteToolsConfig | RemoteToolsContextConfig): RemoteTools;
|