@hypabolic/crossbar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ /**
2
+ * llama-swap BackendAdapter
3
+ *
4
+ * llama-swap (mostlygeek/llama-swap) is a proxy front-door for llama-server instances that enables
5
+ * hot-swapping models at runtime. It exposes the llama-swap-specific /running and /upstream/{model}
6
+ * paths that distinguish it from a bare llama-server.
7
+ *
8
+ * Fingerprint: GET /running 200 (JSON) — a path that only llama-swap exposes.
9
+ * Inference base URL: server.baseUrl + "/v1" (OpenAI + Anthropic compat front door).
10
+ */
11
+
12
+ import { Capability } from "../core/capability.ts";
13
+ import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
14
+ import type {
15
+ DiscoveredServer,
16
+ HealthStatus,
17
+ LoadAction,
18
+ LoadedState,
19
+ ModelDescriptor,
20
+ PiModelEntry,
21
+ Probe,
22
+ ServerCredential,
23
+ } from "../core/types.ts";
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Internal types
27
+ // ---------------------------------------------------------------------------
28
+
29
+ interface RunningBody {
30
+ id?: string;
31
+ model?: string;
32
+ models?: string[];
33
+ // llama-swap /running can return a single object or an array of running upstreams
34
+ [key: string]: unknown;
35
+ }
36
+
37
+ interface V1ModelsBody {
38
+ data?: Array<{
39
+ id: string;
40
+ }>;
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Helpers
45
+ // ---------------------------------------------------------------------------
46
+
47
+ /** Extract running model ids from a /running response (handles various shapes). */
48
+ function parseRunningIds(json: unknown): string[] {
49
+ if (!json || typeof json !== "object") return [];
50
+
51
+ // Array of running-upstream objects
52
+ if (Array.isArray(json)) {
53
+ return json.flatMap((item) => {
54
+ if (typeof item === "string") return [item];
55
+ if (item && typeof item === "object") {
56
+ const id = (item as RunningBody).id ?? (item as RunningBody).model;
57
+ return typeof id === "string" ? [id] : [];
58
+ }
59
+ return [];
60
+ });
61
+ }
62
+
63
+ const body = json as RunningBody;
64
+
65
+ // { models: [...] }
66
+ if (Array.isArray(body.models)) {
67
+ return body.models.filter((m): m is string => typeof m === "string");
68
+ }
69
+
70
+ // { id: "..." }
71
+ if (typeof body.id === "string") return [body.id];
72
+
73
+ // { model: "..." }
74
+ if (typeof body.model === "string") return [body.model];
75
+
76
+ return [];
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // LlamaswapAdapter
81
+ // ---------------------------------------------------------------------------
82
+
83
+ class LlamaswapAdapter implements BackendAdapter {
84
+ readonly kind = "llamaswap" as const;
85
+ readonly displayName = "llama-swap";
86
+ readonly defaultPorts: readonly number[] = [8080];
87
+ readonly piApi: PiApiType = "openai-completions";
88
+ readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
89
+ Capability.ListModels,
90
+ Capability.IntrospectLoaded,
91
+ Capability.SwitchModel,
92
+ Capability.LoadUnload,
93
+ Capability.Health,
94
+ Capability.Streaming,
95
+ ]);
96
+
97
+ // --- fingerprint ----------------------------------------------------------
98
+
99
+ async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
100
+ // /running is a llama-swap-only path — not present on bare llama-server.
101
+ const r = await probe("/running");
102
+ if (!r.ok) return null;
103
+ // Must parse as JSON (llama-swap returns JSON from /running, not plain text)
104
+ if (r.json === undefined && r.text !== undefined) {
105
+ // If text is not JSON, bail
106
+ try {
107
+ JSON.parse(r.text);
108
+ } catch {
109
+ return null;
110
+ }
111
+ }
112
+ return {
113
+ kind: "llamaswap",
114
+ baseUrl,
115
+ auth: "none",
116
+ label: `llama-swap (${baseUrl})`,
117
+ confidence: 0.9,
118
+ };
119
+ }
120
+
121
+ // --- health ---------------------------------------------------------------
122
+
123
+ async health(
124
+ _server: DiscoveredServer,
125
+ _cred: ServerCredential,
126
+ probe: Probe,
127
+ ): Promise<HealthStatus> {
128
+ const r = await probe("/health");
129
+ if (r.status === 0) return { state: "unreachable" };
130
+ if (r.status === 401) return { state: "unauthorized" };
131
+ if (!r.ok) return { state: "degraded" };
132
+
133
+ // llama-swap /health returns plain "OK" text
134
+ const isOk =
135
+ r.text?.trim().toUpperCase() === "OK" ||
136
+ (r.json && typeof r.json === "object" && (r.json as { status?: string }).status === "ok");
137
+ if (!isOk && r.text !== undefined && r.text.trim() !== "") {
138
+ return { state: "degraded" };
139
+ }
140
+ const status: HealthStatus = { state: "healthy" };
141
+ if (r.latencyMs !== undefined) status.latencyMs = r.latencyMs;
142
+ return status;
143
+ }
144
+
145
+ // --- listModels -----------------------------------------------------------
146
+
147
+ async listModels(
148
+ _server: DiscoveredServer,
149
+ _cred: ServerCredential,
150
+ probe: Probe,
151
+ ): Promise<ModelDescriptor[]> {
152
+ const r = await probe("/v1/models");
153
+ if (!r.ok) {
154
+ if (r.status === 401) throw new Error("401 Unauthorized");
155
+ if (r.status === 0) throw new Error("listModels failed: server unreachable");
156
+ throw new Error(`listModels failed: status ${r.status}`);
157
+ }
158
+ const body = r.json as V1ModelsBody | undefined;
159
+ const data = body?.data ?? [];
160
+ return data.map((entry) => ({
161
+ id: entry.id,
162
+ name: entry.id,
163
+ contextWindow: 8192,
164
+ maxTokens: 4096,
165
+ input: ["text"] as ("text" | "image")[],
166
+ reasoning: false,
167
+ }));
168
+ }
169
+
170
+ // --- introspectLoaded -----------------------------------------------------
171
+
172
+ async introspectLoaded(
173
+ _server: DiscoveredServer,
174
+ _cred: ServerCredential,
175
+ probe: Probe,
176
+ ): Promise<LoadedState> {
177
+ const r = await probe("/running");
178
+ if (!r.ok) {
179
+ if (r.status === 401) throw new Error("401 Unauthorized");
180
+ if (r.status === 0) throw new Error("introspectLoaded failed: server unreachable");
181
+ throw new Error(`introspectLoaded failed: status ${r.status}`);
182
+ }
183
+ const ids = parseRunningIds(r.json ?? r.text);
184
+ return {
185
+ loadedModelIds: ids,
186
+ source: "introspection",
187
+ };
188
+ }
189
+
190
+ // --- switchModel ----------------------------------------------------------
191
+
192
+ async switchModel(
193
+ _server: DiscoveredServer,
194
+ _cred: ServerCredential,
195
+ modelId: string,
196
+ probe: Probe,
197
+ ): Promise<void> {
198
+ // Step 1: GET /upstream/{model} — triggers llama-swap to start that upstream.
199
+ const r1 = await probe(`/upstream/${modelId}`);
200
+ if (!r1.ok) {
201
+ if (r1.status === 0) throw new Error("server unreachable during switchModel");
202
+ if (r1.status === 401) throw new Error("401 Unauthorized");
203
+ throw new Error(`switchModel: upstream request failed: status ${r1.status}`);
204
+ }
205
+
206
+ // Step 2: Confirm via GET /running that the target is now active.
207
+ const r2 = await probe("/running");
208
+ if (!r2.ok) {
209
+ if (r2.status === 0) throw new Error("server went down after switch request");
210
+ if (r2.status === 401) throw new Error("401 Unauthorized");
211
+ throw new Error(`switchModel: confirmation probe failed: status ${r2.status}`);
212
+ }
213
+ const runningIds = parseRunningIds(r2.json ?? r2.text);
214
+ if (!runningIds.includes(modelId)) {
215
+ throw new Error(`model-not-loaded: ${modelId} not found in /running after switch`);
216
+ }
217
+ }
218
+
219
+ // --- loadUnload -----------------------------------------------------------
220
+
221
+ async loadUnload(
222
+ _server: DiscoveredServer,
223
+ _cred: ServerCredential,
224
+ modelId: string,
225
+ action: LoadAction,
226
+ probe: Probe,
227
+ ): Promise<void> {
228
+ if (action === "load") {
229
+ // load: GET /upstream/{model}
230
+ const r = await probe(`/upstream/${modelId}`);
231
+ if (!r.ok) {
232
+ if (r.status === 0) throw new Error("server unreachable during load");
233
+ if (r.status === 401) throw new Error("401 Unauthorized");
234
+ throw new Error(`loadUnload(load) failed: status ${r.status}`);
235
+ }
236
+ } else {
237
+ // unload: POST /api/models/unload
238
+ const r = await probe(`/api/models/unload`, {
239
+ method: "POST",
240
+ body: JSON.stringify({ model: modelId }),
241
+ headers: { "content-type": "application/json" },
242
+ });
243
+ if (!r.ok) {
244
+ if (r.status === 0) throw new Error("server unreachable during unload");
245
+ if (r.status === 401) throw new Error("401 Unauthorized");
246
+ throw new Error(`loadUnload(unload) failed: status ${r.status}`);
247
+ }
248
+ }
249
+ }
250
+
251
+ // --- toPiModel ------------------------------------------------------------
252
+
253
+ toPiModel(_server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
254
+ return {
255
+ id: model.id,
256
+ name: model.name,
257
+ reasoning: model.reasoning ?? false,
258
+ input: model.input.length > 0 ? model.input : ["text"],
259
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
260
+ contextWindow: model.contextWindow ?? 8192,
261
+ maxTokens: model.maxTokens ?? 4096,
262
+ };
263
+ }
264
+
265
+ // --- inferenceBaseUrl -----------------------------------------------------
266
+
267
+ inferenceBaseUrl(server: DiscoveredServer): string {
268
+ return `${server.baseUrl}/v1`;
269
+ }
270
+ }
271
+
272
+ // ---------------------------------------------------------------------------
273
+ // Singleton export
274
+ // ---------------------------------------------------------------------------
275
+
276
+ export const llamaswapAdapter: BackendAdapter = new LlamaswapAdapter();
@@ -0,0 +1,307 @@
1
+ /**
2
+ * LM Studio backend adapter.
3
+ *
4
+ * Implements the BackendAdapter contract for LM Studio's local server.
5
+ * Uses the LM Studio-native /api/v0/* endpoints for discovery and management,
6
+ * and delegates inference to the OpenAI-compatible /v1/* layer.
7
+ *
8
+ * Key API endpoints:
9
+ * GET /api/v0/models — model list with state, type, context lengths
10
+ * POST /api/v1/models/load — load a model by id
11
+ * POST /api/v1/models/unload — unload a model by id
12
+ *
13
+ * Fingerprint discriminator: data[] entries have both `state` and
14
+ * `compatibility_type` fields (unique to LM Studio's v0 API).
15
+ */
16
+
17
+ import { Capability } from "../core/capability.ts";
18
+ import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
19
+ import type {
20
+ DiscoveredServer,
21
+ HealthStatus,
22
+ LoadAction,
23
+ LoadedState,
24
+ ModelDescriptor,
25
+ PiModelEntry,
26
+ Probe,
27
+ ServerCredential,
28
+ } from "../core/types.ts";
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // LM Studio API shapes (narrowed from unknown JSON)
32
+ // ---------------------------------------------------------------------------
33
+
34
+ interface LmsModelEntry {
35
+ id: string;
36
+ type?: string; // "llm" | "vlm" | "embeddings"
37
+ state?: string; // "loaded" | "not-loaded"
38
+ max_context_length?: number;
39
+ loaded_context_length?: number;
40
+ quantization?: string;
41
+ arch?: string;
42
+ }
43
+
44
+ interface LmsModelsResponse {
45
+ data?: LmsModelEntry[];
46
+ }
47
+
48
+ // ---------------------------------------------------------------------------
49
+ // Helpers
50
+ // ---------------------------------------------------------------------------
51
+
52
+ /** Narrow an unknown JSON body to a LmsModelsResponse defensively. */
53
+ function parseModelsBody(json: unknown): LmsModelsResponse {
54
+ if (json == null || typeof json !== "object") return {};
55
+ const obj = json as Record<string, unknown>;
56
+ const data = obj["data"];
57
+ if (!Array.isArray(data)) return {};
58
+ const entries: LmsModelEntry[] = [];
59
+ for (const item of data) {
60
+ if (item == null || typeof item !== "object") continue;
61
+ const m = item as Record<string, unknown>;
62
+ const entry: LmsModelEntry = {
63
+ id: typeof m["id"] === "string" ? m["id"] : String(m["id"] ?? ""),
64
+ };
65
+ if (typeof m["type"] === "string") entry.type = m["type"];
66
+ if (typeof m["state"] === "string") entry.state = m["state"];
67
+ if (typeof m["max_context_length"] === "number") entry.max_context_length = m["max_context_length"];
68
+ if (typeof m["loaded_context_length"] === "number") entry.loaded_context_length = m["loaded_context_length"];
69
+ if (typeof m["quantization"] === "string") entry.quantization = m["quantization"];
70
+ if (typeof m["arch"] === "string") entry.arch = m["arch"];
71
+ entries.push(entry);
72
+ }
73
+ return { data: entries };
74
+ }
75
+
76
+ /**
77
+ * Check that a parsed models response has the LM Studio discriminator:
78
+ * at least one entry with both `state` and `compatibility_type` (or we check
79
+ * `state` as the unique discriminator since compatibility_type is what the
80
+ * SPEC calls out; we check state on the actual fields we parse).
81
+ *
82
+ * The SPEC says: data[] entries have `state` and `compatibility_type`.
83
+ * We check for `state` field presence (which is definitively LM Studio).
84
+ */
85
+ function hasLmsDiscriminator(json: unknown): boolean {
86
+ if (json == null || typeof json !== "object") return false;
87
+ const obj = json as Record<string, unknown>;
88
+ const data = obj["data"];
89
+ if (!Array.isArray(data) || data.length === 0) return false;
90
+ // Check that at least one entry has `state` (and optionally `compatibility_type`)
91
+ // The raw json (before parsing) has the original fields, so we check there
92
+ for (const item of data) {
93
+ if (item == null || typeof item !== "object") continue;
94
+ const m = item as Record<string, unknown>;
95
+ if ("state" in m && "compatibility_type" in m) return true;
96
+ // Some versions may only have state — still a strong signal
97
+ if ("state" in m) return true;
98
+ }
99
+ return false;
100
+ }
101
+
102
+ /** Map a LM Studio model entry to a Crossbar ModelDescriptor. */
103
+ function toDescriptor(m: LmsModelEntry): ModelDescriptor {
104
+ const isEmbeddings = m.type === "embeddings";
105
+ const isVlm = m.type === "vlm";
106
+
107
+ const input: ("text" | "image")[] = ["text"];
108
+ if (isVlm) input.push("image");
109
+
110
+ const desc: ModelDescriptor = {
111
+ id: m.id,
112
+ name: m.id,
113
+ input,
114
+ embeddings: isEmbeddings,
115
+ loaded: m.state === "loaded",
116
+ raw: m,
117
+ };
118
+ if (m.max_context_length !== undefined) {
119
+ desc.contextWindow = m.max_context_length;
120
+ }
121
+ return desc;
122
+ }
123
+
124
+ // ---------------------------------------------------------------------------
125
+ // LmStudioAdapter
126
+ // ---------------------------------------------------------------------------
127
+
128
+ class LmStudioAdapter implements BackendAdapter {
129
+ readonly kind = "lmstudio" as const;
130
+ readonly displayName = "LM Studio";
131
+ readonly defaultPorts: readonly number[] = [1234];
132
+ readonly piApi: PiApiType = "openai-completions";
133
+ readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
134
+ Capability.ListModels,
135
+ Capability.IntrospectLoaded,
136
+ Capability.SwitchModel,
137
+ Capability.LoadUnload,
138
+ Capability.Health,
139
+ Capability.PerModelCaps,
140
+ Capability.Streaming,
141
+ ]);
142
+
143
+ // --- fingerprint ----------------------------------------------------------
144
+
145
+ async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
146
+ const r = await probe("/api/v0/models");
147
+ if (!r.ok || r.status === 0) return null;
148
+ if (!hasLmsDiscriminator(r.json)) return null;
149
+
150
+ return {
151
+ kind: "lmstudio",
152
+ baseUrl,
153
+ auth: "none",
154
+ label: `LM Studio (${baseUrl.replace(/^https?:\/\//, "")})`,
155
+ confidence: 0.95,
156
+ };
157
+ }
158
+
159
+ // --- health ---------------------------------------------------------------
160
+
161
+ async health(
162
+ _server: DiscoveredServer,
163
+ _cred: ServerCredential,
164
+ probe: Probe,
165
+ ): Promise<HealthStatus> {
166
+ const r = await probe("/api/v0/models");
167
+ if (r.status === 0) return { state: "unreachable" };
168
+ if (r.status === 401) return { state: "unauthorized" };
169
+ if (!r.ok) return { state: "degraded" };
170
+ const status: HealthStatus = { state: "healthy" };
171
+ if (r.latencyMs !== undefined) status.latencyMs = r.latencyMs;
172
+ return status;
173
+ }
174
+
175
+ // --- listModels -----------------------------------------------------------
176
+
177
+ async listModels(
178
+ _server: DiscoveredServer,
179
+ _cred: ServerCredential,
180
+ probe: Probe,
181
+ ): Promise<ModelDescriptor[]> {
182
+ const r = await probe("/api/v0/models");
183
+ if (!r.ok) {
184
+ if (r.status === 401) throw new Error("401 Unauthorized");
185
+ if (r.status === 0) throw new Error("listModels failed: server unreachable");
186
+ throw new Error(`listModels failed: status ${r.status}`);
187
+ }
188
+ const body = parseModelsBody(r.json);
189
+ if (!body.data) return [];
190
+ return body.data.map(toDescriptor);
191
+ }
192
+
193
+ // --- introspectLoaded -----------------------------------------------------
194
+
195
+ async introspectLoaded(
196
+ _server: DiscoveredServer,
197
+ _cred: ServerCredential,
198
+ probe: Probe,
199
+ ): Promise<LoadedState> {
200
+ const r = await probe("/api/v0/models");
201
+ if (!r.ok) {
202
+ if (r.status === 401) throw new Error("401 Unauthorized");
203
+ if (r.status === 0) throw new Error("introspectLoaded failed: server unreachable");
204
+ throw new Error(`introspectLoaded failed: status ${r.status}`);
205
+ }
206
+ const body = parseModelsBody(r.json);
207
+ const loaded = (body.data ?? []).filter((m) => m.state === "loaded");
208
+ const perModel: Record<string, { contextLength: number }> = {};
209
+ for (const m of loaded) {
210
+ if (m.loaded_context_length !== undefined) {
211
+ perModel[m.id] = { contextLength: m.loaded_context_length };
212
+ }
213
+ }
214
+ const result: LoadedState = {
215
+ loadedModelIds: loaded.map((m) => m.id),
216
+ source: "introspection",
217
+ };
218
+ if (Object.keys(perModel).length > 0) {
219
+ result.perModel = perModel;
220
+ }
221
+ return result;
222
+ }
223
+
224
+ // --- switchModel ----------------------------------------------------------
225
+
226
+ async switchModel(
227
+ _server: DiscoveredServer,
228
+ _cred: ServerCredential,
229
+ modelId: string,
230
+ probe: Probe,
231
+ ): Promise<void> {
232
+ // Step 1: JIT load
233
+ const r1 = await probe("/api/v1/models/load", {
234
+ method: "POST",
235
+ body: JSON.stringify({ model: modelId }),
236
+ headers: { "content-type": "application/json" },
237
+ });
238
+ if (!r1.ok) {
239
+ if (r1.status === 0) throw new Error("switchModel failed: server unreachable");
240
+ if (r1.status === 401) throw new Error("401 Unauthorized");
241
+ throw new Error(`switchModel load failed: status ${r1.status}`);
242
+ }
243
+
244
+ // Step 2: Confirm via model list that the target is now loaded
245
+ const r2 = await probe("/api/v0/models");
246
+ if (!r2.ok) {
247
+ if (r2.status === 0) throw new Error("switchModel confirmation failed: server went down");
248
+ if (r2.status === 401) throw new Error("401 Unauthorized");
249
+ throw new Error(`switchModel confirmation failed: status ${r2.status}`);
250
+ }
251
+ const body = parseModelsBody(r2.json);
252
+ const found = (body.data ?? []).find((m) => m.id === modelId);
253
+ if (!found || found.state !== "loaded") {
254
+ throw new Error(`model-not-loaded: ${modelId} not found in loaded state after switch`);
255
+ }
256
+ }
257
+
258
+ // --- loadUnload -----------------------------------------------------------
259
+
260
+ async loadUnload(
261
+ _server: DiscoveredServer,
262
+ _cred: ServerCredential,
263
+ modelId: string,
264
+ action: LoadAction,
265
+ probe: Probe,
266
+ ): Promise<void> {
267
+ const path = action === "load"
268
+ ? "/api/v1/models/load"
269
+ : "/api/v1/models/unload";
270
+ const r = await probe(path, {
271
+ method: "POST",
272
+ body: JSON.stringify({ model: modelId }),
273
+ headers: { "content-type": "application/json" },
274
+ });
275
+ if (!r.ok) {
276
+ if (r.status === 0) throw new Error(`loadUnload(${action}) failed: server unreachable`);
277
+ if (r.status === 401) throw new Error("401 Unauthorized");
278
+ throw new Error(`loadUnload(${action}) failed: status ${r.status}`);
279
+ }
280
+ }
281
+
282
+ // --- toPiModel ------------------------------------------------------------
283
+
284
+ toPiModel(_server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
285
+ return {
286
+ id: model.id,
287
+ name: model.name,
288
+ reasoning: model.reasoning ?? false,
289
+ input: model.input.length > 0 ? (model.input as ("text" | "image")[]) : ["text"],
290
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
291
+ contextWindow: model.contextWindow ?? 8192,
292
+ maxTokens: model.maxTokens ?? 4096,
293
+ };
294
+ }
295
+
296
+ // --- inferenceBaseUrl -----------------------------------------------------
297
+
298
+ inferenceBaseUrl(server: DiscoveredServer): string {
299
+ return `${server.baseUrl}/v1`;
300
+ }
301
+ }
302
+
303
+ // ---------------------------------------------------------------------------
304
+ // Singleton export
305
+ // ---------------------------------------------------------------------------
306
+
307
+ export const lmstudioAdapter: BackendAdapter = new LmStudioAdapter();