@hypabolic/crossbar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +168 -0
- package/CAPABILITY-MATRIX.md +49 -0
- package/LICENSE +21 -0
- package/README.md +127 -0
- package/RESEARCH.md +343 -0
- package/package.json +53 -0
- package/src/adapters/anthropic.ts +197 -0
- package/src/adapters/generic.ts +164 -0
- package/src/adapters/index.ts +64 -0
- package/src/adapters/llamacpp.ts +217 -0
- package/src/adapters/llamaswap.ts +276 -0
- package/src/adapters/lmstudio.ts +307 -0
- package/src/adapters/ollama.ts +340 -0
- package/src/adapters/openai.ts +195 -0
- package/src/adapters/vllm.ts +197 -0
- package/src/core/backend-adapter.ts +123 -0
- package/src/core/capability.ts +53 -0
- package/src/core/index.ts +36 -0
- package/src/core/types.ts +160 -0
- package/src/discovery/engine.ts +247 -0
- package/src/discovery/probe.ts +144 -0
- package/src/index.ts +158 -0
- package/src/registry/ids.ts +68 -0
- package/src/registry/persistence.ts +111 -0
- package/src/registry/pi-credential-store.ts +27 -0
- package/src/registry/registry.ts +150 -0
- package/src/shim/provider-shim.ts +187 -0
- package/src/ui/loaded-widget.ts +220 -0
- package/src/ui/onboarding.ts +439 -0
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ollama backend adapter for Crossbar.
|
|
3
|
+
*
|
|
4
|
+
* Implements the full BackendAdapter contract against Ollama's native HTTP API:
|
|
5
|
+
* - Fingerprint: GET / → "Ollama is running"
|
|
6
|
+
* - List models: GET /api/tags + per-model POST /api/show for caps
|
|
7
|
+
* - Introspect loaded: GET /api/ps
|
|
8
|
+
* - Switch model: POST /api/generate {keep_alive:"5m"} then confirm via GET /api/ps
|
|
9
|
+
* - Load/unload: POST /api/generate {keep_alive:"5m"} / {keep_alive:0}
|
|
10
|
+
* - Health: GET /
|
|
11
|
+
* - Inference base URL: server.baseUrl + "/v1"
|
|
12
|
+
*
|
|
13
|
+
* Uses ONLY the injected Probe — never calls fetch directly.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { Capability } from "../core/capability.ts";
|
|
17
|
+
import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
|
|
18
|
+
import type {
|
|
19
|
+
DiscoveredServer,
|
|
20
|
+
HealthStatus,
|
|
21
|
+
LoadAction,
|
|
22
|
+
LoadedState,
|
|
23
|
+
ModelDescriptor,
|
|
24
|
+
PiModelEntry,
|
|
25
|
+
Probe,
|
|
26
|
+
ServerCredential,
|
|
27
|
+
} from "../core/types.ts";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Internal shapes matching Ollama's API responses
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
interface OllamaTagsModel {
|
|
34
|
+
name: string;
|
|
35
|
+
model?: string;
|
|
36
|
+
details?: {
|
|
37
|
+
family?: string;
|
|
38
|
+
parameter_size?: string;
|
|
39
|
+
quantization_level?: string;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface OllamaTagsResponse {
|
|
44
|
+
models?: OllamaTagsModel[];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
interface OllamaShowResponse {
|
|
48
|
+
capabilities?: string[];
|
|
49
|
+
model_info?: Record<string, unknown>;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface OllamaPsModel {
|
|
53
|
+
name?: string;
|
|
54
|
+
model?: string;
|
|
55
|
+
expires_at?: string;
|
|
56
|
+
size_vram?: number;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
interface OllamaPsResponse {
|
|
60
|
+
models?: OllamaPsModel[];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Constants
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
const DEFAULT_CONTEXT_WINDOW = 8192;
|
|
68
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
69
|
+
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// OllamaAdapter
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
|
|
74
|
+
class OllamaAdapter implements BackendAdapter {
|
|
75
|
+
readonly kind = "ollama" as const;
|
|
76
|
+
readonly displayName = "Ollama";
|
|
77
|
+
readonly defaultPorts: readonly number[] = [11434];
|
|
78
|
+
readonly piApi: PiApiType = "openai-completions";
|
|
79
|
+
readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
|
|
80
|
+
Capability.ListModels,
|
|
81
|
+
Capability.IntrospectLoaded,
|
|
82
|
+
Capability.SwitchModel,
|
|
83
|
+
Capability.LoadUnload,
|
|
84
|
+
Capability.Health,
|
|
85
|
+
Capability.PerModelCaps,
|
|
86
|
+
Capability.Streaming,
|
|
87
|
+
]);
|
|
88
|
+
|
|
89
|
+
// --- fingerprint ----------------------------------------------------------
|
|
90
|
+
|
|
91
|
+
async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
|
|
92
|
+
const r = await probe("/");
|
|
93
|
+
// status:0 means connection refused / unreachable → not our backend
|
|
94
|
+
if (r.status === 0) return null;
|
|
95
|
+
// Must be a 200 OK with the sentinel text
|
|
96
|
+
if (!r.ok) return null;
|
|
97
|
+
const body = r.text ?? "";
|
|
98
|
+
if (!body.includes("Ollama is running")) return null;
|
|
99
|
+
|
|
100
|
+
// High confidence: the text sentinel is unique to Ollama
|
|
101
|
+
return {
|
|
102
|
+
kind: "ollama",
|
|
103
|
+
baseUrl,
|
|
104
|
+
auth: "none",
|
|
105
|
+
label: `Ollama (${baseUrl.replace(/^https?:\/\//, "")})`,
|
|
106
|
+
confidence: 0.95,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// --- health ---------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
async health(
|
|
113
|
+
_server: DiscoveredServer,
|
|
114
|
+
_cred: ServerCredential,
|
|
115
|
+
probe: Probe,
|
|
116
|
+
): Promise<HealthStatus> {
|
|
117
|
+
const r = await probe("/");
|
|
118
|
+
if (r.status === 0) {
|
|
119
|
+
const status: HealthStatus = { state: "unreachable" };
|
|
120
|
+
if (r.error !== undefined) status.detail = r.error;
|
|
121
|
+
return status;
|
|
122
|
+
}
|
|
123
|
+
if (r.status === 401) return { state: "unauthorized" };
|
|
124
|
+
if (!r.ok) return { state: "degraded", detail: `status ${r.status}` };
|
|
125
|
+
const status: HealthStatus = { state: "healthy" };
|
|
126
|
+
if (r.latencyMs !== undefined) status.latencyMs = r.latencyMs;
|
|
127
|
+
return status;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// --- listModels -----------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
async listModels(
|
|
133
|
+
_server: DiscoveredServer,
|
|
134
|
+
_cred: ServerCredential,
|
|
135
|
+
probe: Probe,
|
|
136
|
+
): Promise<ModelDescriptor[]> {
|
|
137
|
+
const r = await probe("/api/tags", { method: "GET" });
|
|
138
|
+
if (!r.ok) {
|
|
139
|
+
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
140
|
+
if (r.status === 0) throw new Error("server unreachable (status:0)");
|
|
141
|
+
throw new Error(`listModels failed: status ${r.status}`);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const body = r.json as OllamaTagsResponse | undefined;
|
|
145
|
+
const rawModels = body?.models ?? [];
|
|
146
|
+
|
|
147
|
+
const descriptors: ModelDescriptor[] = await Promise.all(
|
|
148
|
+
rawModels.map(async (m) => {
|
|
149
|
+
const modelId = m.model ?? m.name;
|
|
150
|
+
const caps = await this._fetchModelCaps(modelId, probe);
|
|
151
|
+
return caps;
|
|
152
|
+
}),
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
return descriptors;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/** Fetch /api/show for a single model and build its ModelDescriptor. */
|
|
159
|
+
private async _fetchModelCaps(modelId: string, probe: Probe): Promise<ModelDescriptor> {
|
|
160
|
+
const defaults: ModelDescriptor = {
|
|
161
|
+
id: modelId,
|
|
162
|
+
name: modelId,
|
|
163
|
+
contextWindow: DEFAULT_CONTEXT_WINDOW,
|
|
164
|
+
maxTokens: DEFAULT_MAX_TOKENS,
|
|
165
|
+
input: ["text"],
|
|
166
|
+
reasoning: false,
|
|
167
|
+
tools: false,
|
|
168
|
+
embeddings: false,
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
const r = await probe("/api/show", {
|
|
173
|
+
method: "POST",
|
|
174
|
+
body: JSON.stringify({ name: modelId }),
|
|
175
|
+
headers: { "content-type": "application/json" },
|
|
176
|
+
});
|
|
177
|
+
if (!r.ok) return defaults;
|
|
178
|
+
|
|
179
|
+
const show = r.json as OllamaShowResponse | undefined;
|
|
180
|
+
if (!show) return defaults;
|
|
181
|
+
|
|
182
|
+
const caps = show.capabilities ?? [];
|
|
183
|
+
const hasVision = caps.includes("vision");
|
|
184
|
+
const hasTools = caps.includes("tools");
|
|
185
|
+
const hasThinking = caps.includes("thinking");
|
|
186
|
+
const isEmbedding = caps.includes("embedding");
|
|
187
|
+
|
|
188
|
+
// Extract context length from model_info: look for any key ending in ".context_length"
|
|
189
|
+
let contextWindow = DEFAULT_CONTEXT_WINDOW;
|
|
190
|
+
if (show.model_info) {
|
|
191
|
+
for (const [key, val] of Object.entries(show.model_info)) {
|
|
192
|
+
if (key.endsWith(".context_length") && typeof val === "number" && val > 0) {
|
|
193
|
+
contextWindow = val;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return {
|
|
200
|
+
id: modelId,
|
|
201
|
+
name: modelId,
|
|
202
|
+
contextWindow,
|
|
203
|
+
maxTokens: DEFAULT_MAX_TOKENS,
|
|
204
|
+
input: hasVision ? ["text", "image"] : ["text"],
|
|
205
|
+
reasoning: hasThinking,
|
|
206
|
+
tools: hasTools,
|
|
207
|
+
embeddings: isEmbedding,
|
|
208
|
+
raw: show,
|
|
209
|
+
};
|
|
210
|
+
} catch {
|
|
211
|
+
// /api/show might not exist or might error — fall back to defaults
|
|
212
|
+
return defaults;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// --- introspectLoaded -----------------------------------------------------
|
|
217
|
+
|
|
218
|
+
async introspectLoaded(
|
|
219
|
+
_server: DiscoveredServer,
|
|
220
|
+
_cred: ServerCredential,
|
|
221
|
+
probe: Probe,
|
|
222
|
+
): Promise<LoadedState> {
|
|
223
|
+
const r = await probe("/api/ps", { method: "GET" });
|
|
224
|
+
if (!r.ok) {
|
|
225
|
+
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
226
|
+
if (r.status === 0) throw new Error("server unreachable (status:0)");
|
|
227
|
+
throw new Error(`introspectLoaded failed: status ${r.status}`);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const body = r.json as OllamaPsResponse | undefined;
|
|
231
|
+
const loaded = body?.models ?? [];
|
|
232
|
+
|
|
233
|
+
const loadedModelIds: string[] = loaded
|
|
234
|
+
.map((m) => m.model ?? m.name ?? "")
|
|
235
|
+
.filter((id) => id.length > 0);
|
|
236
|
+
|
|
237
|
+
const perModel: Record<string, { vramBytes?: number; expiresAt?: number }> = {};
|
|
238
|
+
for (const m of loaded) {
|
|
239
|
+
const id = m.model ?? m.name ?? "";
|
|
240
|
+
if (!id) continue;
|
|
241
|
+
const info: { vramBytes?: number; expiresAt?: number } = {};
|
|
242
|
+
if (m.size_vram !== undefined) info.vramBytes = m.size_vram;
|
|
243
|
+
if (m.expires_at) {
|
|
244
|
+
const ms = new Date(m.expires_at).getTime();
|
|
245
|
+
if (!isNaN(ms)) info.expiresAt = ms;
|
|
246
|
+
}
|
|
247
|
+
perModel[id] = info;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
loadedModelIds,
|
|
252
|
+
perModel,
|
|
253
|
+
source: "introspection",
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// --- switchModel ----------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
async switchModel(
|
|
260
|
+
_server: DiscoveredServer,
|
|
261
|
+
_cred: ServerCredential,
|
|
262
|
+
modelId: string,
|
|
263
|
+
probe: Probe,
|
|
264
|
+
): Promise<void> {
|
|
265
|
+
// Step 1: trigger load by sending a generate request with keep_alive:"5m"
|
|
266
|
+
const r1 = await probe("/api/generate", {
|
|
267
|
+
method: "POST",
|
|
268
|
+
body: JSON.stringify({ model: modelId, keep_alive: "5m" }),
|
|
269
|
+
headers: { "content-type": "application/json" },
|
|
270
|
+
});
|
|
271
|
+
if (!r1.ok) {
|
|
272
|
+
if (r1.status === 0) throw new Error("server unreachable during switch");
|
|
273
|
+
if (r1.status === 401) throw new Error("401 Unauthorized during switch");
|
|
274
|
+
throw new Error(`switchModel generate failed: status ${r1.status}`);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Step 2: confirm via /api/ps that the model is now loaded
|
|
278
|
+
const r2 = await probe("/api/ps", { method: "GET" });
|
|
279
|
+
if (!r2.ok) {
|
|
280
|
+
if (r2.status === 0) throw new Error("server went down after switch request");
|
|
281
|
+
if (r2.status === 401) throw new Error("401 Unauthorized during switch confirmation");
|
|
282
|
+
throw new Error(`switchModel confirmation failed: status ${r2.status}`);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const body = r2.json as OllamaPsResponse | undefined;
|
|
286
|
+
const loaded = body?.models ?? [];
|
|
287
|
+
const loadedIds = loaded.map((m) => m.model ?? m.name ?? "");
|
|
288
|
+
if (!loadedIds.includes(modelId)) {
|
|
289
|
+
throw new Error(`model-not-loaded: ${modelId} not found in /api/ps after switch`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// --- loadUnload -----------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
async loadUnload(
|
|
296
|
+
_server: DiscoveredServer,
|
|
297
|
+
_cred: ServerCredential,
|
|
298
|
+
modelId: string,
|
|
299
|
+
action: LoadAction,
|
|
300
|
+
probe: Probe,
|
|
301
|
+
): Promise<void> {
|
|
302
|
+
const keepAlive = action === "load" ? "5m" : 0;
|
|
303
|
+
const r = await probe("/api/generate", {
|
|
304
|
+
method: "POST",
|
|
305
|
+
body: JSON.stringify({ model: modelId, keep_alive: keepAlive }),
|
|
306
|
+
headers: { "content-type": "application/json" },
|
|
307
|
+
});
|
|
308
|
+
if (!r.ok) {
|
|
309
|
+
if (r.status === 0) throw new Error(`server unreachable during ${action}`);
|
|
310
|
+
if (r.status === 401) throw new Error(`401 Unauthorized during ${action}`);
|
|
311
|
+
throw new Error(`loadUnload(${action}) failed: status ${r.status}`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// --- toPiModel -----------------------------------------------------------
|
|
316
|
+
|
|
317
|
+
toPiModel(_server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
|
|
318
|
+
return {
|
|
319
|
+
id: model.id,
|
|
320
|
+
name: model.name,
|
|
321
|
+
reasoning: model.reasoning ?? false,
|
|
322
|
+
input: model.input.length > 0 ? model.input : ["text"],
|
|
323
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
324
|
+
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
325
|
+
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// --- inferenceBaseUrl ----------------------------------------------------
|
|
330
|
+
|
|
331
|
+
inferenceBaseUrl(server: DiscoveredServer): string {
|
|
332
|
+
return `${server.baseUrl}/v1`;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// ---------------------------------------------------------------------------
|
|
337
|
+
// Singleton export
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
export const ollamaAdapter: BackendAdapter = new OllamaAdapter();
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI cloud backend adapter for Crossbar.
|
|
3
|
+
*
|
|
4
|
+
* OpenAI is a CLOUD backend: it is configured by the user (base URL + API key),
|
|
5
|
+
* never port-probed. Therefore:
|
|
6
|
+
* - `fingerprint(...)` ALWAYS returns null (cloud adapters are not discovered).
|
|
7
|
+
* - capabilities are limited to ListModels + Streaming (no Health, no
|
|
8
|
+
* IntrospectLoaded/SwitchModel/LoadUnload, no PerModelCaps — the OpenAI API
|
|
9
|
+
* exposes no per-model capability metadata).
|
|
10
|
+
*
|
|
11
|
+
* listModels: GET /v1/models with a Bearer token. The orchestrator's injected
|
|
12
|
+
* Probe attaches the Authorization header automatically for apiKey servers, so
|
|
13
|
+
* this adapter never touches (or logs) the key itself. The API returns only bare
|
|
14
|
+
* ids, so per-model caps are enriched from a STATIC table of known families.
|
|
15
|
+
*
|
|
16
|
+
* Uses ONLY the injected Probe — never calls fetch directly.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { Capability } from "../core/capability.ts";
|
|
20
|
+
import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
|
|
21
|
+
import type {
|
|
22
|
+
DiscoveredServer,
|
|
23
|
+
ModelDescriptor,
|
|
24
|
+
PiModelEntry,
|
|
25
|
+
Probe,
|
|
26
|
+
ServerCredential,
|
|
27
|
+
} from "../core/types.ts";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// API response shape
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
interface OpenAiModelsResponse {
|
|
34
|
+
data?: Array<{ id: string }>;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Constants & static capability table
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
const DEFAULT_BASE_URL = "https://api.openai.com/v1";
|
|
42
|
+
|
|
43
|
+
/** Conservative defaults for ids not matched by the static table. */
|
|
44
|
+
const DEFAULT_CONTEXT_WINDOW = 128_000;
|
|
45
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
46
|
+
|
|
47
|
+
/** Static per-family caps, keyed by an id-prefix matcher. First match wins. */
|
|
48
|
+
interface StaticCaps {
|
|
49
|
+
contextWindow: number;
|
|
50
|
+
maxTokens: number;
|
|
51
|
+
input: ("text" | "image")[];
|
|
52
|
+
reasoning: boolean;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
interface StaticRule {
|
|
56
|
+
/** Returns true if this rule applies to the given (lower-cased) model id. */
|
|
57
|
+
match: (id: string) => boolean;
|
|
58
|
+
caps: StaticCaps;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const STATIC_TABLE: StaticRule[] = [
|
|
62
|
+
// Reasoning models (o-series). No vision on the bare reasoning ids.
|
|
63
|
+
{
|
|
64
|
+
match: (id) => id.startsWith("o3") || id.startsWith("o4-mini") || id.startsWith("o1"),
|
|
65
|
+
caps: { contextWindow: 200_000, maxTokens: 100_000, input: ["text"], reasoning: true },
|
|
66
|
+
},
|
|
67
|
+
// GPT-4.1 family — 1M context.
|
|
68
|
+
{
|
|
69
|
+
match: (id) => id.startsWith("gpt-4.1"),
|
|
70
|
+
caps: { contextWindow: 1_000_000, maxTokens: 32_768, input: ["text", "image"], reasoning: false },
|
|
71
|
+
},
|
|
72
|
+
// GPT-4o family (multimodal, 128k).
|
|
73
|
+
{
|
|
74
|
+
match: (id) => id.startsWith("gpt-4o"),
|
|
75
|
+
caps: { contextWindow: 128_000, maxTokens: 16_384, input: ["text", "image"], reasoning: false },
|
|
76
|
+
},
|
|
77
|
+
// Legacy GPT-4 Turbo (128k, vision).
|
|
78
|
+
{
|
|
79
|
+
match: (id) => id.startsWith("gpt-4-turbo") || id.startsWith("gpt-4-1106") || id.startsWith("gpt-4-0125"),
|
|
80
|
+
caps: { contextWindow: 128_000, maxTokens: 4096, input: ["text", "image"], reasoning: false },
|
|
81
|
+
},
|
|
82
|
+
// Original GPT-4 (8k).
|
|
83
|
+
{
|
|
84
|
+
match: (id) => id.startsWith("gpt-4"),
|
|
85
|
+
caps: { contextWindow: 8192, maxTokens: 4096, input: ["text"], reasoning: false },
|
|
86
|
+
},
|
|
87
|
+
// GPT-3.5 Turbo (16k).
|
|
88
|
+
{
|
|
89
|
+
match: (id) => id.startsWith("gpt-3.5"),
|
|
90
|
+
caps: { contextWindow: 16_385, maxTokens: 4096, input: ["text"], reasoning: false },
|
|
91
|
+
},
|
|
92
|
+
];
|
|
93
|
+
|
|
94
|
+
/** Look up static caps for an id, falling back to conservative defaults. */
|
|
95
|
+
function lookupCaps(id: string): StaticCaps {
|
|
96
|
+
const lower = id.toLowerCase();
|
|
97
|
+
for (const rule of STATIC_TABLE) {
|
|
98
|
+
if (rule.match(lower)) return rule.caps;
|
|
99
|
+
}
|
|
100
|
+
return {
|
|
101
|
+
contextWindow: DEFAULT_CONTEXT_WINDOW,
|
|
102
|
+
maxTokens: DEFAULT_MAX_TOKENS,
|
|
103
|
+
input: ["text"],
|
|
104
|
+
reasoning: false,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
// OpenAiAdapter
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
class OpenAiAdapter implements BackendAdapter {
|
|
113
|
+
readonly kind = "openai" as const;
|
|
114
|
+
readonly displayName = "OpenAI";
|
|
115
|
+
readonly defaultPorts: readonly number[] = [];
|
|
116
|
+
readonly piApi: PiApiType = "openai-completions";
|
|
117
|
+
readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
|
|
118
|
+
Capability.ListModels,
|
|
119
|
+
Capability.Streaming,
|
|
120
|
+
]);
|
|
121
|
+
|
|
122
|
+
// --- fingerprint ----------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
/** Cloud backend: configured, never probed. Always null. */
|
|
125
|
+
async fingerprint(_baseUrl: string, _probe: Probe): Promise<DiscoveredServer | null> {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// --- listModels -----------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
async listModels(
|
|
132
|
+
_server: DiscoveredServer,
|
|
133
|
+
_cred: ServerCredential,
|
|
134
|
+
probe: Probe,
|
|
135
|
+
): Promise<ModelDescriptor[]> {
|
|
136
|
+
// The injected Probe adds `Authorization: Bearer <key>` automatically for
|
|
137
|
+
// apiKey servers, so we never read or log cred.apiKey here.
|
|
138
|
+
const r = await probe("/v1/models", { method: "GET" });
|
|
139
|
+
if (!r.ok) {
|
|
140
|
+
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
141
|
+
if (r.status === 0) throw new Error("server unreachable (status:0)");
|
|
142
|
+
throw new Error(`listModels failed: status ${r.status}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const body = r.json as OpenAiModelsResponse | undefined;
|
|
146
|
+
const rows = body?.data ?? [];
|
|
147
|
+
|
|
148
|
+
return rows
|
|
149
|
+
.filter((m) => typeof m.id === "string" && m.id.length > 0)
|
|
150
|
+
.map((m) => this._describe(m.id));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/** Build a ModelDescriptor from a bare id + static caps. */
|
|
154
|
+
private _describe(id: string): ModelDescriptor {
|
|
155
|
+
const isEmbedding = id.toLowerCase().startsWith("text-embedding");
|
|
156
|
+
const caps = lookupCaps(id);
|
|
157
|
+
return {
|
|
158
|
+
id,
|
|
159
|
+
name: id,
|
|
160
|
+
contextWindow: caps.contextWindow,
|
|
161
|
+
maxTokens: caps.maxTokens,
|
|
162
|
+
input: caps.input,
|
|
163
|
+
reasoning: caps.reasoning,
|
|
164
|
+
tools: !isEmbedding,
|
|
165
|
+
embeddings: isEmbedding,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// --- toPiModel ------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
toPiModel(_server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
|
|
172
|
+
return {
|
|
173
|
+
id: model.id,
|
|
174
|
+
name: model.name,
|
|
175
|
+
reasoning: model.reasoning ?? false,
|
|
176
|
+
input: model.input.length > 0 ? model.input : ["text"],
|
|
177
|
+
// Crossbar does not bill — costs are always zero.
|
|
178
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
179
|
+
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
180
|
+
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// --- inferenceBaseUrl -----------------------------------------------------
|
|
185
|
+
|
|
186
|
+
inferenceBaseUrl(server: DiscoveredServer): string {
|
|
187
|
+
return server.baseUrl || DEFAULT_BASE_URL;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
// Singleton export
|
|
193
|
+
// ---------------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
export const openaiAdapter: BackendAdapter = new OpenAiAdapter();
|