@fusionkit/adapter-ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +23 -0
- package/dist/index.js +17 -0
- package/dist/managed-server.d.ts +102 -0
- package/dist/managed-server.js +348 -0
- package/dist/mlx-env.d.ts +178 -0
- package/dist/mlx-env.js +371 -0
- package/dist/model.d.ts +88 -0
- package/dist/model.js +149 -0
- package/dist/remote-tools.d.ts +56 -0
- package/dist/remote-tools.js +57 -0
- package/dist/routed-model.d.ts +88 -0
- package/dist/routed-model.js +218 -0
- package/dist/swarm-tools.d.ts +149 -0
- package/dist/swarm-tools.js +324 -0
- package/dist/test/golden.test.d.ts +1 -0
- package/dist/test/golden.test.js +129 -0
- package/dist/test/managed-server.test.d.ts +1 -0
- package/dist/test/managed-server.test.js +198 -0
- package/dist/test/mlx-env.test.d.ts +1 -0
- package/dist/test/mlx-env.test.js +351 -0
- package/dist/test/model.test.d.ts +1 -0
- package/dist/test/model.test.js +110 -0
- package/dist/test/remote-tools.test.d.ts +1 -0
- package/dist/test/remote-tools.test.js +151 -0
- package/dist/test/routed-model.test.d.ts +1 -0
- package/dist/test/routed-model.test.js +223 -0
- package/dist/test/swarm-tools.test.d.ts +1 -0
- package/dist/test/swarm-tools.test.js +157 -0
- package/dist/worktree-agent.d.ts +53 -0
- package/dist/worktree-agent.js +303 -0
- package/package.json +39 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fusionkit/adapter-ai-sdk — the AI SDK side of Warrant for app-owned loops.
|
|
3
|
+
*
|
|
4
|
+
* The application keeps its own `generateText`/`streamText` loop and its own
|
|
5
|
+
* model; Warrant governs the execution boundary. `remoteTools(...)` returns
|
|
6
|
+
* AI SDK-compatible tools whose calls run as signed contracts in governed
|
|
7
|
+
* runner sessions and return with offline-verifiable receipts. The model
|
|
8
|
+
* surfaces (`withModel`, `routedModel`, `mlxServer`) route the caller's own
|
|
9
|
+
* loop across local and cloud models with every decision recorded.
|
|
10
|
+
*/
|
|
11
|
+
export { remoteTools } from "./remote-tools.js";
|
|
12
|
+
export type { RemoteToolCallRecord, RemoteTools, RemoteToolsConfig, RemoteToolsContextConfig } from "./remote-tools.js";
|
|
13
|
+
export { swarmTools } from "./swarm-tools.js";
|
|
14
|
+
export type { DispatchInput, DispatchOutput, EscalateInput, EscalateOutput, PullInput, PullOutput, StatusInput, StatusOutput, SwarmPlane, SwarmRunRecord, SwarmTools, SwarmToolsConfig, SwarmToolsContextConfig, SwarmToolSet, WorkerTaskInput } from "./swarm-tools.js";
|
|
15
|
+
export { handoffModel, withModel } from "./model.js";
|
|
16
|
+
export type { EscalationReason, HandoffModelConfig } from "./model.js";
|
|
17
|
+
export { loadRouterCard, routedModel, withRoutedModel } from "./routed-model.js";
|
|
18
|
+
export type { RouteDecision, RoutedModelConfig, RouterCard } from "./routed-model.js";
|
|
19
|
+
export { runWorktreeAgent, worktreeDiff } from "./worktree-agent.js";
|
|
20
|
+
export type { TrajectoryStep, TrajectoryStepType, WorktreeAgentInput, WorktreeAgentResult } from "./worktree-agent.js";
|
|
21
|
+
export { defaultMlxDir, MlxCapabilityError, MlxEnv } from "./mlx-env.js";
|
|
22
|
+
export { managedModelServer, mlxServer } from "./managed-server.js";
|
|
23
|
+
export type { ManagedModelServerOptions, ManagedServerEvent, MlxServerOptions } from "./managed-server.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fusionkit/adapter-ai-sdk — the AI SDK side of Warrant for app-owned loops.
|
|
3
|
+
*
|
|
4
|
+
* The application keeps its own `generateText`/`streamText` loop and its own
|
|
5
|
+
* model; Warrant governs the execution boundary. `remoteTools(...)` returns
|
|
6
|
+
* AI SDK-compatible tools whose calls run as signed contracts in governed
|
|
7
|
+
* runner sessions and return with offline-verifiable receipts. The model
|
|
8
|
+
* surfaces (`withModel`, `routedModel`, `mlxServer`) route the caller's own
|
|
9
|
+
* loop across local and cloud models with every decision recorded.
|
|
10
|
+
*/
|
|
11
|
+
export { remoteTools } from "./remote-tools.js";
|
|
12
|
+
export { swarmTools } from "./swarm-tools.js";
|
|
13
|
+
export { handoffModel, withModel } from "./model.js";
|
|
14
|
+
export { loadRouterCard, routedModel, withRoutedModel } from "./routed-model.js";
|
|
15
|
+
export { runWorktreeAgent, worktreeDiff } from "./worktree-agent.js";
|
|
16
|
+
export { defaultMlxDir, MlxCapabilityError, MlxEnv } from "./mlx-env.js";
|
|
17
|
+
export { managedModelServer, mlxServer } from "./managed-server.js";
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import type { LanguageModelV3, LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3StreamResult } from "@ai-sdk/provider";
|
|
2
|
+
import { MlxEnv } from "./mlx-env.js";
|
|
3
|
+
import type { MlxEnvOptions, SpawnSpec } from "./mlx-env.js";
|
|
4
|
+
export type ManagedServerEvent = {
|
|
5
|
+
type: "starting";
|
|
6
|
+
port: number;
|
|
7
|
+
} | {
|
|
8
|
+
type: "ready";
|
|
9
|
+
baseURL: string;
|
|
10
|
+
pid: number;
|
|
11
|
+
startupMs: number;
|
|
12
|
+
} | {
|
|
13
|
+
type: "stopped";
|
|
14
|
+
reason: "idle" | "explicit";
|
|
15
|
+
} | {
|
|
16
|
+
type: "crashed";
|
|
17
|
+
exitCode: number | null;
|
|
18
|
+
};
|
|
19
|
+
export type ManagedServerStatus = "stopped" | "starting" | "running";
|
|
20
|
+
export type ManagedModelServerOptions = {
|
|
21
|
+
/** Produce the spawn spec for a given port (env provisioning included). */
|
|
22
|
+
prepare: (port: number) => Promise<SpawnSpec>;
|
|
23
|
+
/** Model id requests are made with (and reported as `modelId`). */
|
|
24
|
+
modelId: string;
|
|
25
|
+
/** Fixed port; defaults to a free port picked per start. */
|
|
26
|
+
port?: number;
|
|
27
|
+
/** Health endpoint polled until the server answers. */
|
|
28
|
+
healthPath?: string;
|
|
29
|
+
startupTimeoutMs?: number;
|
|
30
|
+
/** Idle period after which the process is stopped; 0 disables. */
|
|
31
|
+
idleShutdownMs?: number;
|
|
32
|
+
shutdownGraceMs?: number;
|
|
33
|
+
onEvent?: (event: ManagedServerEvent) => void;
|
|
34
|
+
/** Whether the OpenAI-compatible endpoint enforces schema response formats. */
|
|
35
|
+
supportsStructuredOutputs?: boolean;
|
|
36
|
+
/** Build the inner model once the server is up. */
|
|
37
|
+
createModel?: (baseURL: string, modelId: string) => LanguageModelV3;
|
|
38
|
+
};
|
|
39
|
+
export declare class ManagedModelServer implements LanguageModelV3 {
|
|
40
|
+
readonly specificationVersion: "v3";
|
|
41
|
+
readonly provider = "warrant-managed-server";
|
|
42
|
+
readonly modelId: string;
|
|
43
|
+
private readonly options;
|
|
44
|
+
private state;
|
|
45
|
+
private child;
|
|
46
|
+
private inner;
|
|
47
|
+
private currentBaseURL;
|
|
48
|
+
private startPromise;
|
|
49
|
+
private leases;
|
|
50
|
+
private lastUsedMs;
|
|
51
|
+
private idleTimer;
|
|
52
|
+
private outputTail;
|
|
53
|
+
private stopping;
|
|
54
|
+
constructor(options: ManagedModelServerOptions);
|
|
55
|
+
get supportedUrls(): LanguageModelV3["supportedUrls"];
|
|
56
|
+
status(): ManagedServerStatus;
|
|
57
|
+
/** The server's base URL while running (changes across restarts). */
|
|
58
|
+
baseURL(): string | undefined;
|
|
59
|
+
/** Eagerly start (optional — calls start lazily on their own). */
|
|
60
|
+
start(): Promise<void>;
|
|
61
|
+
/** Stop the process and scale to zero. In-flight calls will fail. */
|
|
62
|
+
stop(): Promise<void>;
|
|
63
|
+
private ensureStarted;
|
|
64
|
+
private startProcess;
|
|
65
|
+
private openLog;
|
|
66
|
+
private armIdleTimer;
|
|
67
|
+
private clearRunning;
|
|
68
|
+
private killChild;
|
|
69
|
+
private stopProcess;
|
|
70
|
+
private acquire;
|
|
71
|
+
private release;
|
|
72
|
+
private requireInner;
|
|
73
|
+
doGenerate(options: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult>;
|
|
74
|
+
doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
|
|
75
|
+
}
|
|
76
|
+
/** Create a managed local model server from a prepare hook. */
|
|
77
|
+
export declare function managedModelServer(options: ManagedModelServerOptions): ManagedModelServer;
|
|
78
|
+
export type MlxServerOptions = {
|
|
79
|
+
/** Hugging Face repo id the server loads (e.g. mlx-community/...). */
|
|
80
|
+
model: string;
|
|
81
|
+
/** Owned-environment configuration, or a pre-built MlxEnv. */
|
|
82
|
+
env?: MlxEnvOptions | MlxEnv;
|
|
83
|
+
/** Extra mlx_lm server flags (e.g. --max-tokens). */
|
|
84
|
+
extraArgs?: string[];
|
|
85
|
+
/**
|
|
86
|
+
* Enable structured decoding (`response_format`, `guided_json`,
|
|
87
|
+
* `guided_regex`, `guided_choice`): the env installs the velum-labs
|
|
88
|
+
* mlx-lm fork with its [structured] extra, which is self-contained
|
|
89
|
+
* (see the fork's STRUCTURED.md). With this set the AI SDK's JSON output
|
|
90
|
+
* modes (generateObject, responseFormat) are actually enforced by the
|
|
91
|
+
* server.
|
|
92
|
+
*/
|
|
93
|
+
structured?: boolean;
|
|
94
|
+
} & Omit<ManagedModelServerOptions, "prepare" | "modelId" | "createModel"> & Pick<Partial<ManagedModelServerOptions>, "createModel">;
|
|
95
|
+
/**
|
|
96
|
+
* The MLX preset: a managed server whose Python environment is owned by
|
|
97
|
+
* Warrant (see MlxEnv) and whose process is spawned from that env's own
|
|
98
|
+
* interpreter. `handle.env` exposes verify/info/destroy for the footprint.
|
|
99
|
+
*/
|
|
100
|
+
export declare function mlxServer(options: MlxServerOptions): ManagedModelServer & {
|
|
101
|
+
env: MlxEnv;
|
|
102
|
+
};
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { createWriteStream, mkdirSync } from "node:fs";
|
|
3
|
+
import { createServer } from "node:net";
|
|
4
|
+
import { dirname } from "node:path";
|
|
5
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
6
|
+
import { MLX_LM_STRUCTURED_PIN, MlxEnv } from "./mlx-env.js";
|
|
7
|
+
/**
|
|
8
|
+
* A managed local model server: a LanguageModelV3 whose backing process is
|
|
9
|
+
* owned by this object. The first generate/stream call starts the server
|
|
10
|
+
* (prepare → spawn → health), concurrent calls share one process, and an
|
|
11
|
+
* idle period with no in-flight calls scales it to zero; the next call
|
|
12
|
+
* transparently restarts it.
|
|
13
|
+
*
|
|
14
|
+
* Composes as the `local` leg of handoffModel: a provisioning failure,
|
|
15
|
+
* cold-start timeout, or crash surfaces as a failed local call, which the
|
|
16
|
+
* routing layer escalates to cloud.
|
|
17
|
+
*
|
|
18
|
+
* This is the app-process, local-first path. Runner/plane-side model-server
|
|
19
|
+
* pools (governed, receipt-producing model serving) are a separate feature.
|
|
20
|
+
*/
|
|
21
|
+
/** Defaults; every one is overridable per server. */
|
|
22
|
+
const DEFAULT_STARTUP_TIMEOUT_MS = 120_000;
|
|
23
|
+
const DEFAULT_IDLE_SHUTDOWN_MS = 5 * 60 * 1000;
|
|
24
|
+
const DEFAULT_SHUTDOWN_GRACE_MS = 5_000;
|
|
25
|
+
const HEALTH_POLL_MS = 250;
|
|
26
|
+
/** Last bytes of server output kept for diagnostics. */
|
|
27
|
+
const OUTPUT_TAIL_BYTES = 64 * 1024;
|
|
28
|
+
function defaultCreateModel(baseURL, modelId, supportsStructuredOutputs) {
|
|
29
|
+
return createOpenAICompatible({
|
|
30
|
+
name: "warrant-managed-server",
|
|
31
|
+
// The provider appends route paths (e.g. /chat/completions) directly,
|
|
32
|
+
// so the OpenAI-compatible API prefix belongs on the base URL.
|
|
33
|
+
baseURL: `${baseURL}/v1`,
|
|
34
|
+
apiKey: "not-needed",
|
|
35
|
+
supportsStructuredOutputs
|
|
36
|
+
})(modelId);
|
|
37
|
+
}
|
|
38
|
+
function freePort() {
|
|
39
|
+
return new Promise((resolve, reject) => {
|
|
40
|
+
const probe = createServer();
|
|
41
|
+
probe.once("error", reject);
|
|
42
|
+
probe.listen(0, "127.0.0.1", () => {
|
|
43
|
+
const address = probe.address();
|
|
44
|
+
const port = typeof address === "object" && address ? address.port : 0;
|
|
45
|
+
probe.close(() => resolve(port));
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
50
|
+
export class ManagedModelServer {
|
|
51
|
+
specificationVersion = "v3";
|
|
52
|
+
provider = "warrant-managed-server";
|
|
53
|
+
modelId;
|
|
54
|
+
options;
|
|
55
|
+
state = "stopped";
|
|
56
|
+
child;
|
|
57
|
+
inner;
|
|
58
|
+
currentBaseURL;
|
|
59
|
+
startPromise;
|
|
60
|
+
leases = 0;
|
|
61
|
+
lastUsedMs = 0;
|
|
62
|
+
idleTimer;
|
|
63
|
+
outputTail = "";
|
|
64
|
+
stopping = false;
|
|
65
|
+
constructor(options) {
|
|
66
|
+
this.options = options;
|
|
67
|
+
this.modelId = options.modelId;
|
|
68
|
+
}
|
|
69
|
+
get supportedUrls() {
|
|
70
|
+
return this.inner?.supportedUrls ?? {};
|
|
71
|
+
}
|
|
72
|
+
status() {
|
|
73
|
+
return this.state;
|
|
74
|
+
}
|
|
75
|
+
/** The server's base URL while running (changes across restarts). */
|
|
76
|
+
baseURL() {
|
|
77
|
+
return this.currentBaseURL;
|
|
78
|
+
}
|
|
79
|
+
/** Eagerly start (optional — calls start lazily on their own). */
|
|
80
|
+
async start() {
|
|
81
|
+
await this.ensureStarted();
|
|
82
|
+
}
|
|
83
|
+
/** Stop the process and scale to zero. In-flight calls will fail. */
|
|
84
|
+
async stop() {
|
|
85
|
+
await this.stopProcess("explicit");
|
|
86
|
+
}
|
|
87
|
+
// ---- lifecycle ----
|
|
88
|
+
ensureStarted() {
|
|
89
|
+
if (this.state === "running")
|
|
90
|
+
return Promise.resolve();
|
|
91
|
+
if (!this.startPromise) {
|
|
92
|
+
this.startPromise = this.startProcess().finally(() => {
|
|
93
|
+
this.startPromise = undefined;
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
return this.startPromise;
|
|
97
|
+
}
|
|
98
|
+
async startProcess() {
|
|
99
|
+
const startedAt = Date.now();
|
|
100
|
+
this.state = "starting";
|
|
101
|
+
try {
|
|
102
|
+
const port = this.options.port ?? (await freePort());
|
|
103
|
+
this.options.onEvent?.({ type: "starting", port });
|
|
104
|
+
const spec = await this.options.prepare(port);
|
|
105
|
+
this.outputTail = "";
|
|
106
|
+
const child = spawn(spec.cmd, spec.args, {
|
|
107
|
+
env: spec.env,
|
|
108
|
+
...(spec.cwd ? { cwd: spec.cwd } : {}),
|
|
109
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
110
|
+
});
|
|
111
|
+
this.child = child;
|
|
112
|
+
const log = spec.logFile ? this.openLog(spec.logFile) : undefined;
|
|
113
|
+
const capture = (chunk) => {
|
|
114
|
+
this.outputTail = (this.outputTail + chunk.toString("utf8")).slice(-OUTPUT_TAIL_BYTES);
|
|
115
|
+
log?.write(chunk);
|
|
116
|
+
};
|
|
117
|
+
child.stdout?.on("data", capture);
|
|
118
|
+
child.stderr?.on("data", capture);
|
|
119
|
+
let exited = false;
|
|
120
|
+
let exitCode = null;
|
|
121
|
+
child.on("exit", (code) => {
|
|
122
|
+
exited = true;
|
|
123
|
+
exitCode = code;
|
|
124
|
+
log?.end();
|
|
125
|
+
// A process that dies while we believe it is running is a crash:
|
|
126
|
+
// reset so the next call respawns instead of hitting a dead URL.
|
|
127
|
+
if (this.state === "running" && this.child === child && !this.stopping) {
|
|
128
|
+
this.clearRunning();
|
|
129
|
+
this.options.onEvent?.({ type: "crashed", exitCode: code });
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
child.on("error", (error) => {
|
|
133
|
+
capture(Buffer.from(`spawn error: ${error.message}\n`, "utf8"));
|
|
134
|
+
exited = true;
|
|
135
|
+
});
|
|
136
|
+
const baseURL = `http://127.0.0.1:${port}`;
|
|
137
|
+
const healthURL = `${baseURL}${this.options.healthPath ?? "/v1/models"}`;
|
|
138
|
+
const deadline = startedAt + (this.options.startupTimeoutMs ?? DEFAULT_STARTUP_TIMEOUT_MS);
|
|
139
|
+
for (;;) {
|
|
140
|
+
if (exited) {
|
|
141
|
+
throw new Error(`server exited during startup (code ${exitCode}): ${this.outputTail.slice(-2000)}`);
|
|
142
|
+
}
|
|
143
|
+
if (Date.now() > deadline) {
|
|
144
|
+
throw new Error(`server did not become healthy within ${this.options.startupTimeoutMs ?? DEFAULT_STARTUP_TIMEOUT_MS}ms: ${this.outputTail.slice(-2000)}`);
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
const response = await fetch(healthURL);
|
|
148
|
+
await response.arrayBuffer();
|
|
149
|
+
if (response.ok)
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
catch {
|
|
153
|
+
// not up yet
|
|
154
|
+
}
|
|
155
|
+
await sleep(HEALTH_POLL_MS);
|
|
156
|
+
}
|
|
157
|
+
this.currentBaseURL = baseURL;
|
|
158
|
+
this.inner = this.options.createModel
|
|
159
|
+
? this.options.createModel(baseURL, this.options.modelId)
|
|
160
|
+
: defaultCreateModel(baseURL, this.options.modelId, this.options.supportsStructuredOutputs ?? false);
|
|
161
|
+
this.state = "running";
|
|
162
|
+
this.lastUsedMs = Date.now();
|
|
163
|
+
this.armIdleTimer();
|
|
164
|
+
this.options.onEvent?.({
|
|
165
|
+
type: "ready",
|
|
166
|
+
baseURL,
|
|
167
|
+
pid: child.pid ?? -1,
|
|
168
|
+
startupMs: Date.now() - startedAt
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
catch (error) {
|
|
172
|
+
await this.killChild();
|
|
173
|
+
this.clearRunning();
|
|
174
|
+
throw error;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
openLog(path) {
|
|
178
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
179
|
+
return createWriteStream(path, { flags: "a" });
|
|
180
|
+
}
|
|
181
|
+
armIdleTimer() {
|
|
182
|
+
const idleMs = this.options.idleShutdownMs ?? DEFAULT_IDLE_SHUTDOWN_MS;
|
|
183
|
+
if (idleMs <= 0)
|
|
184
|
+
return;
|
|
185
|
+
const interval = Math.max(25, Math.floor(idleMs / 4));
|
|
186
|
+
this.idleTimer = setInterval(() => {
|
|
187
|
+
if (this.state === "running" &&
|
|
188
|
+
this.leases === 0 &&
|
|
189
|
+
Date.now() - this.lastUsedMs >= idleMs) {
|
|
190
|
+
void this.stopProcess("idle");
|
|
191
|
+
}
|
|
192
|
+
}, interval);
|
|
193
|
+
this.idleTimer.unref?.();
|
|
194
|
+
}
|
|
195
|
+
clearRunning() {
|
|
196
|
+
if (this.idleTimer) {
|
|
197
|
+
clearInterval(this.idleTimer);
|
|
198
|
+
this.idleTimer = undefined;
|
|
199
|
+
}
|
|
200
|
+
this.state = "stopped";
|
|
201
|
+
this.child = undefined;
|
|
202
|
+
this.inner = undefined;
|
|
203
|
+
this.currentBaseURL = undefined;
|
|
204
|
+
}
|
|
205
|
+
async killChild() {
|
|
206
|
+
const child = this.child;
|
|
207
|
+
if (!child || child.exitCode !== null)
|
|
208
|
+
return;
|
|
209
|
+
const graceMs = this.options.shutdownGraceMs ?? DEFAULT_SHUTDOWN_GRACE_MS;
|
|
210
|
+
const exited = new Promise((resolve) => {
|
|
211
|
+
child.once("exit", () => resolve());
|
|
212
|
+
});
|
|
213
|
+
child.kill("SIGTERM");
|
|
214
|
+
const timer = setTimeout(() => child.kill("SIGKILL"), graceMs);
|
|
215
|
+
timer.unref?.();
|
|
216
|
+
await exited;
|
|
217
|
+
clearTimeout(timer);
|
|
218
|
+
}
|
|
219
|
+
async stopProcess(reason) {
|
|
220
|
+
if (this.state === "stopped" || this.stopping)
|
|
221
|
+
return;
|
|
222
|
+
this.stopping = true;
|
|
223
|
+
try {
|
|
224
|
+
await this.killChild();
|
|
225
|
+
this.clearRunning();
|
|
226
|
+
this.options.onEvent?.({ type: "stopped", reason });
|
|
227
|
+
}
|
|
228
|
+
finally {
|
|
229
|
+
this.stopping = false;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
// ---- leases ----
|
|
233
|
+
acquire() {
|
|
234
|
+
this.leases++;
|
|
235
|
+
this.lastUsedMs = Date.now();
|
|
236
|
+
}
|
|
237
|
+
release() {
|
|
238
|
+
this.leases = Math.max(0, this.leases - 1);
|
|
239
|
+
this.lastUsedMs = Date.now();
|
|
240
|
+
}
|
|
241
|
+
requireInner() {
|
|
242
|
+
if (!this.inner) {
|
|
243
|
+
throw new Error("managed server is not running (it may have crashed)");
|
|
244
|
+
}
|
|
245
|
+
return this.inner;
|
|
246
|
+
}
|
|
247
|
+
// ---- LanguageModelV3 ----
|
|
248
|
+
async doGenerate(options) {
|
|
249
|
+
this.acquire();
|
|
250
|
+
try {
|
|
251
|
+
await this.ensureStarted();
|
|
252
|
+
return await this.requireInner().doGenerate(options);
|
|
253
|
+
}
|
|
254
|
+
finally {
|
|
255
|
+
this.release();
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
async doStream(options) {
|
|
259
|
+
this.acquire();
|
|
260
|
+
let released = false;
|
|
261
|
+
const releaseOnce = () => {
|
|
262
|
+
if (released)
|
|
263
|
+
return;
|
|
264
|
+
released = true;
|
|
265
|
+
this.release();
|
|
266
|
+
};
|
|
267
|
+
try {
|
|
268
|
+
await this.ensureStarted();
|
|
269
|
+
const result = await this.requireInner().doStream(options);
|
|
270
|
+
// The lease is held until the stream settles — close, error, or
|
|
271
|
+
// cancel — so the idle timer can never scale the server to zero
|
|
272
|
+
// while tokens are still flowing.
|
|
273
|
+
const reader = result.stream.getReader();
|
|
274
|
+
const stream = new ReadableStream({
|
|
275
|
+
pull: async (controller) => {
|
|
276
|
+
try {
|
|
277
|
+
const { done, value } = await reader.read();
|
|
278
|
+
if (done) {
|
|
279
|
+
controller.close();
|
|
280
|
+
releaseOnce();
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
controller.enqueue(value);
|
|
284
|
+
}
|
|
285
|
+
catch (error) {
|
|
286
|
+
releaseOnce();
|
|
287
|
+
controller.error(error);
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
cancel: (cause) => {
|
|
291
|
+
releaseOnce();
|
|
292
|
+
return reader.cancel(cause);
|
|
293
|
+
}
|
|
294
|
+
});
|
|
295
|
+
return { ...result, stream };
|
|
296
|
+
}
|
|
297
|
+
catch (error) {
|
|
298
|
+
releaseOnce();
|
|
299
|
+
throw error;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
/** Create a managed local model server from a prepare hook. */
|
|
304
|
+
export function managedModelServer(options) {
|
|
305
|
+
return new ManagedModelServer(options);
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Env options for structured decoding: the self-contained mlx-lm fork as
|
|
309
|
+
* the main spec. The stock `mlx_lm server` entry point is unchanged; the
|
|
310
|
+
* hooks activate because the [structured] extra's dependencies import.
|
|
311
|
+
*/
|
|
312
|
+
function structuredEnvOptions() {
|
|
313
|
+
return {
|
|
314
|
+
packageSpec: MLX_LM_STRUCTURED_PIN,
|
|
315
|
+
extraImportNames: ["mlx_lm.structured.integration"]
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* The MLX preset: a managed server whose Python environment is owned by
|
|
320
|
+
* Warrant (see MlxEnv) and whose process is spawned from that env's own
|
|
321
|
+
* interpreter. `handle.env` exposes verify/info/destroy for the footprint.
|
|
322
|
+
*/
|
|
323
|
+
export function mlxServer(options) {
|
|
324
|
+
const { model, env: envOption, extraArgs, structured, ...serverOptions } = options;
|
|
325
|
+
let env;
|
|
326
|
+
if (envOption instanceof MlxEnv) {
|
|
327
|
+
if (structured) {
|
|
328
|
+
throw new Error("structured cannot be combined with a pre-built MlxEnv: configure " +
|
|
329
|
+
"extraPackageSpecs/extraImportNames/serverModule on the env instead");
|
|
330
|
+
}
|
|
331
|
+
env = envOption;
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
// Structured mode supplies defaults; explicit env options win (e.g. a
|
|
335
|
+
// custom packageSpec pointing at another fork revision).
|
|
336
|
+
env = new MlxEnv({
|
|
337
|
+
...(structured ? structuredEnvOptions() : {}),
|
|
338
|
+
...(envOption ?? {})
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
const server = new ManagedModelServer({
|
|
342
|
+
...serverOptions,
|
|
343
|
+
modelId: model,
|
|
344
|
+
prepare: (port) => env.prepare(model, port, extraArgs ?? []),
|
|
345
|
+
supportsStructuredOutputs: structured === true || serverOptions.supportsStructuredOutputs === true
|
|
346
|
+
});
|
|
347
|
+
return Object.assign(server, { env });
|
|
348
|
+
}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Warrant-owned MLX environment.
|
|
3
|
+
*
|
|
4
|
+
* The managed MLX backend does not shell out to whatever `mlx_lm.server`
|
|
5
|
+
* happens to be on PATH — it owns the entire stack. This provisioner
|
|
6
|
+
* materializes and maintains a dedicated directory containing:
|
|
7
|
+
*
|
|
8
|
+
* <dir>/venv/ a private Python venv with mlx-lm at an exact pin
|
|
9
|
+
* <dir>/env.json a manifest of what was provisioned (and from where)
|
|
10
|
+
* <dir>/hf-cache/ HF_HOME, so model weights live inside the owned dir
|
|
11
|
+
* <dir>/logs/ server stdout/stderr
|
|
12
|
+
*
|
|
13
|
+
* The whole footprint is one directory: inspectable (info), verifiable
|
|
14
|
+
* (verify), repairable (re-provision on pin mismatch), and removable
|
|
15
|
+
* (destroy). The server process is always spawned via the venv's own
|
|
16
|
+
* interpreter — never a PATH lookup.
|
|
17
|
+
*
|
|
18
|
+
* The mlx-lm pin follows the same trusted-pin policy as the repo's npm
|
|
19
|
+
* allowlist: exact version, bumped only as a reviewed code change.
|
|
20
|
+
*
|
|
21
|
+
* Toolchain: provisioning prefers `uv` when available (an explicit path,
|
|
22
|
+
* WARRANT_UV, or PATH discovery) — it is much faster and can supply its own
|
|
23
|
+
* managed CPython, removing even the system-python requirement. Without uv
|
|
24
|
+
* it falls back to stdlib `python3 -m venv` + pip, so uv is an upgrade,
|
|
25
|
+
* never a dependency. uv's caches and managed interpreters are contained
|
|
26
|
+
* inside the owned directory, so destroy() removes them too.
|
|
27
|
+
*/
|
|
28
|
+
/** Exact-pinned mlx-lm version this provisioner installs. */
|
|
29
|
+
export declare const MLX_LM_PIN = "0.31.3";
|
|
30
|
+
/**
|
|
31
|
+
* The velum-labs/mlx-lm fork installed in structured mode: upstream mlx-lm
|
|
32
|
+
* plus the self-contained mlx_lm.structured package (see the fork's
|
|
33
|
+
* STRUCTURED.md). Pinned to the current reviewed head of the fork's main
|
|
34
|
+
* branch; refresh this SHA when we intentionally pick up fork fixes.
|
|
35
|
+
*/
|
|
36
|
+
export declare const MLX_LM_STRUCTURED_PIN = "mlx-lm[structured] @ git+https://github.com/velum-labs/mlx-lm@2ee2d570d365a1fcee9ba90a298f1bae865fccda";
|
|
37
|
+
/** Python version requested from uv (which can download it if absent). */
|
|
38
|
+
export declare const PYTHON_PIN = "3.12";
|
|
39
|
+
/** Default owned directory for the MLX stack. */
|
|
40
|
+
export declare function defaultMlxDir(): string;
|
|
41
|
+
export type MlxEnvManifest = {
|
|
42
|
+
version: "warrant.mlxenv.v1";
|
|
43
|
+
/** What was installed (e.g. "mlx-lm==0.31.3"). */
|
|
44
|
+
packageSpec: string;
|
|
45
|
+
/** Additional specs installed after the main one (e.g. the structured
|
|
46
|
+
* decoding overlay). Absent in manifests written before this field
|
|
47
|
+
* existed, which reads as []. */
|
|
48
|
+
extraPackageSpecs?: string[];
|
|
49
|
+
/** Module whose import proves the install is usable. */
|
|
50
|
+
importName: string;
|
|
51
|
+
/** What built the env: "uv <version>" or "venv+pip via <interpreter>". */
|
|
52
|
+
toolchain: string;
|
|
53
|
+
/** The venv interpreter every spawn uses. */
|
|
54
|
+
interpreterPath: string;
|
|
55
|
+
pythonVersion: string;
|
|
56
|
+
createdAt: string;
|
|
57
|
+
};
|
|
58
|
+
/** Everything the process layer needs to spawn the server. */
|
|
59
|
+
export type SpawnSpec = {
|
|
60
|
+
cmd: string;
|
|
61
|
+
args: string[];
|
|
62
|
+
env: Record<string, string>;
|
|
63
|
+
cwd?: string;
|
|
64
|
+
/** Append server output here (inside the owned dir). */
|
|
65
|
+
logFile?: string;
|
|
66
|
+
};
|
|
67
|
+
/** A capability the current host cannot satisfy (wrong OS, no Python). */
|
|
68
|
+
export declare class MlxCapabilityError extends Error {
|
|
69
|
+
readonly code: "capability_mismatch";
|
|
70
|
+
constructor(message: string);
|
|
71
|
+
}
|
|
72
|
+
export type MlxEnvOptions = {
|
|
73
|
+
/** Owned directory. Defaults to ~/.warrant/mlx. */
|
|
74
|
+
dir?: string;
|
|
75
|
+
/** Requirement to install. Defaults to the MLX_LM_PIN pin. */
|
|
76
|
+
packageSpec?: string;
|
|
77
|
+
/**
|
|
78
|
+
* Additional requirements installed after the main spec — pinned PyPI
|
|
79
|
+
* specs or local package directories (e.g. the structured decoding
|
|
80
|
+
* overlay). Part of the manifest: changing them re-provisions.
|
|
81
|
+
*/
|
|
82
|
+
extraPackageSpecs?: string[];
|
|
83
|
+
/** Import that must succeed after install. Defaults to "mlx_lm". */
|
|
84
|
+
importName?: string;
|
|
85
|
+
/** Additional imports that must succeed (one per extra package). */
|
|
86
|
+
extraImportNames?: string[];
|
|
87
|
+
/**
|
|
88
|
+
* Python module spawned by prepare(). Defaults to the stock
|
|
89
|
+
* `mlx_lm server`; the structured overlay uses STRUCTURED_SERVER_MODULE.
|
|
90
|
+
*/
|
|
91
|
+
serverModule?: string;
|
|
92
|
+
/**
|
|
93
|
+
* Explicit base interpreter. Setting this forces the stdlib venv+pip
|
|
94
|
+
* toolchain with exactly that interpreter (an escape hatch from uv).
|
|
95
|
+
*/
|
|
96
|
+
python?: string;
|
|
97
|
+
/**
|
|
98
|
+
* uv binary to provision with, or `false` to disable uv entirely.
|
|
99
|
+
* Default: WARRANT_UV if set, otherwise "uv" discovered on PATH,
|
|
100
|
+
* otherwise the stdlib venv+pip fallback.
|
|
101
|
+
*/
|
|
102
|
+
uv?: string | false;
|
|
103
|
+
/** Python version requested from uv. Defaults to PYTHON_PIN. */
|
|
104
|
+
pythonVersion?: string;
|
|
105
|
+
/**
|
|
106
|
+
* Enforce the MLX platform gate (macOS on Apple Silicon). Defaults to
|
|
107
|
+
* true; tests provisioning stub packages on other hosts disable it.
|
|
108
|
+
*/
|
|
109
|
+
requirePlatform?: boolean;
|
|
110
|
+
/**
|
|
111
|
+
* Override the install step (default: install <packageSpec> plus any
|
|
112
|
+
* <extraPackageSpecs> into the venv with the resolved toolchain). Tests
|
|
113
|
+
* inject an offline installer.
|
|
114
|
+
*/
|
|
115
|
+
install?: (venvPython: string, packageSpec: string, extraPackageSpecs: string[]) => void;
|
|
116
|
+
};
|
|
117
|
+
export declare class MlxEnv {
|
|
118
|
+
readonly dir: string;
|
|
119
|
+
private readonly packageSpec;
|
|
120
|
+
private readonly extraPackageSpecs;
|
|
121
|
+
private readonly importName;
|
|
122
|
+
private readonly extraImportNames;
|
|
123
|
+
private readonly serverModule;
|
|
124
|
+
private readonly requirePlatform;
|
|
125
|
+
private readonly explicitPython;
|
|
126
|
+
private readonly uvOption;
|
|
127
|
+
private readonly pythonVersion;
|
|
128
|
+
private readonly installHook;
|
|
129
|
+
private provisionPromise;
|
|
130
|
+
constructor(options?: MlxEnvOptions);
|
|
131
|
+
get manifestPath(): string;
|
|
132
|
+
get venvDir(): string;
|
|
133
|
+
get venvPython(): string;
|
|
134
|
+
get hfCacheDir(): string;
|
|
135
|
+
get logsDir(): string;
|
|
136
|
+
/** uv's caches and managed interpreters, contained in the owned dir. */
|
|
137
|
+
private get uvEnv();
|
|
138
|
+
private readManifest;
|
|
139
|
+
private assertPlatform;
|
|
140
|
+
/**
|
|
141
|
+
* Pick the provisioning toolchain. An explicit `python` option forces
|
|
142
|
+
* stdlib venv+pip with that interpreter; otherwise uv is preferred
|
|
143
|
+
* (explicit path, WARRANT_UV, or PATH discovery) with venv+pip as the
|
|
144
|
+
* no-extra-requirements fallback.
|
|
145
|
+
*/
|
|
146
|
+
private resolveToolchain;
|
|
147
|
+
/** Sanity-check a base interpreter for the stdlib venv+pip path. */
|
|
148
|
+
private checkPython;
|
|
149
|
+
/** Does the venv interpreter exist and import the managed packages? */
|
|
150
|
+
private importWorks;
|
|
151
|
+
private extrasMatch;
|
|
152
|
+
/** Manifest matches the current pins and the env actually works. */
|
|
153
|
+
verify(): boolean;
|
|
154
|
+
/** Manifest plus on-disk footprint of the owned directory. */
|
|
155
|
+
info(): {
|
|
156
|
+
dir: string;
|
|
157
|
+
provisioned: boolean;
|
|
158
|
+
manifest?: MlxEnvManifest;
|
|
159
|
+
diskBytes: number;
|
|
160
|
+
};
|
|
161
|
+
/** Remove the entire owned footprint: venv, manifest, weights, logs. */
|
|
162
|
+
destroy(): void;
|
|
163
|
+
/**
|
|
164
|
+
* Idempotently provision the env. A matching manifest plus a passing
|
|
165
|
+
* import check is a no-op; anything else (fresh host, pin bump, broken
|
|
166
|
+
* venv) provisions in place. Concurrent callers share one provision run.
|
|
167
|
+
*/
|
|
168
|
+
ensureProvisioned(): Promise<MlxEnvManifest>;
|
|
169
|
+
private provision;
|
|
170
|
+
private createVenv;
|
|
171
|
+
private installPackages;
|
|
172
|
+
/**
|
|
173
|
+
* Provision (if needed) and produce the spawn spec for the server:
|
|
174
|
+
* the venv's interpreter running `-m mlx_lm server` with a minimal,
|
|
175
|
+
* explicit environment whose caches live inside the owned dir.
|
|
176
|
+
*/
|
|
177
|
+
prepare(model: string, port: number, extraArgs?: string[]): Promise<SpawnSpec>;
|
|
178
|
+
}
|