@hypabolic/crossbar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +168 -0
- package/CAPABILITY-MATRIX.md +49 -0
- package/LICENSE +21 -0
- package/README.md +127 -0
- package/RESEARCH.md +343 -0
- package/package.json +53 -0
- package/src/adapters/anthropic.ts +197 -0
- package/src/adapters/generic.ts +164 -0
- package/src/adapters/index.ts +64 -0
- package/src/adapters/llamacpp.ts +217 -0
- package/src/adapters/llamaswap.ts +276 -0
- package/src/adapters/lmstudio.ts +307 -0
- package/src/adapters/ollama.ts +340 -0
- package/src/adapters/openai.ts +195 -0
- package/src/adapters/vllm.ts +197 -0
- package/src/core/backend-adapter.ts +123 -0
- package/src/core/capability.ts +53 -0
- package/src/core/index.ts +36 -0
- package/src/core/types.ts +160 -0
- package/src/discovery/engine.ts +247 -0
- package/src/discovery/probe.ts +144 -0
- package/src/index.ts +158 -0
- package/src/registry/ids.ts +68 -0
- package/src/registry/persistence.ts +111 -0
- package/src/registry/pi-credential-store.ts +27 -0
- package/src/registry/registry.ts +150 -0
- package/src/shim/provider-shim.ts +187 -0
- package/src/ui/loaded-widget.ts +220 -0
- package/src/ui/onboarding.ts +439 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic OpenAI-compatible backend adapter for Crossbar.
|
|
3
|
+
*
|
|
4
|
+
* Catch-all fallback for the long tail: TabbyAPI, KoboldCpp, oobabooga, Jan, llamafile,
|
|
5
|
+
* and any unknown server that merely exposes `/v1/models`. Specific adapters (ollama,
|
|
6
|
+
* lmstudio, llamacpp, vllm, ...) run first and win at higher confidence; this adapter
|
|
7
|
+
* takes what remains.
|
|
8
|
+
*
|
|
9
|
+
* Capabilities: ListModels + Streaming ONLY.
|
|
10
|
+
* Fingerprint: GET /v1/models → 200 + `data` array → LOW confidence ~0.3 so any
|
|
11
|
+
* specific adapter outranks it.
|
|
12
|
+
*
|
|
13
|
+
* Uses ONLY the injected Probe — never calls fetch directly.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { Capability } from "../core/capability.ts";
|
|
17
|
+
import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
|
|
18
|
+
import type {
|
|
19
|
+
DiscoveredServer,
|
|
20
|
+
ModelDescriptor,
|
|
21
|
+
PiModelEntry,
|
|
22
|
+
Probe,
|
|
23
|
+
ServerCredential,
|
|
24
|
+
} from "../core/types.ts";
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// Conservative defaults — applied when the backend doesn't report metadata
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
const DEFAULT_CONTEXT_WINDOW = 8192;
|
|
31
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
32
|
+
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// Internal helpers
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Normalise a user-supplied base URL so it ends with "/v1".
|
|
39
|
+
* If the URL already ends with "/v1" it is returned unchanged.
|
|
40
|
+
* A trailing slash before "/v1" is accepted (e.g. "http://host:8080/" → "http://host:8080/v1").
|
|
41
|
+
*/
|
|
42
|
+
function normaliseToV1(url: string): string {
|
|
43
|
+
// Strip a single trailing slash before testing.
|
|
44
|
+
const stripped = url.endsWith("/") ? url.slice(0, -1) : url;
|
|
45
|
+
if (stripped.endsWith("/v1")) return stripped;
|
|
46
|
+
return `${stripped}/v1`;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// GenericAdapter
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
class GenericAdapter implements BackendAdapter {
|
|
54
|
+
readonly kind = "openai-generic" as const;
|
|
55
|
+
readonly displayName = "OpenAI-compatible";
|
|
56
|
+
/** Empty — the engine tries specific adapters first; generic is the fallback. */
|
|
57
|
+
readonly defaultPorts: readonly number[] = [];
|
|
58
|
+
readonly piApi: PiApiType = "openai-completions";
|
|
59
|
+
readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
|
|
60
|
+
Capability.ListModels,
|
|
61
|
+
Capability.Streaming,
|
|
62
|
+
]);
|
|
63
|
+
|
|
64
|
+
// --- fingerprint ----------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Probe GET /v1/models. Returns a DiscoveredServer at LOW confidence (~0.3) when the
|
|
68
|
+
* response is 200 with a non-empty `data` array of objects. Returns null otherwise.
|
|
69
|
+
*
|
|
70
|
+
* Low confidence is intentional: any origin that also has a more specific response
|
|
71
|
+
* (Ollama root text, LM Studio /api/v0/models, vLLM /version, etc.) will be matched
|
|
72
|
+
* at higher confidence by its specific adapter, which the engine prefers.
|
|
73
|
+
*/
|
|
74
|
+
async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
|
|
75
|
+
const r = await probe("/v1/models");
|
|
76
|
+
if (!r.ok || r.status === 0) return null;
|
|
77
|
+
|
|
78
|
+
const body = r.json as { data?: unknown[] } | undefined;
|
|
79
|
+
if (!Array.isArray(body?.data)) return null;
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
kind: "openai-generic",
|
|
83
|
+
baseUrl,
|
|
84
|
+
auth: "none",
|
|
85
|
+
label: `OpenAI-compatible (${baseUrl})`,
|
|
86
|
+
confidence: 0.3,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// --- listModels -----------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* GET /v1/models → map data[].id to ModelDescriptor.
|
|
94
|
+
* Conservative defaults are applied (contextWindow 8192, maxTokens 4096, input ["text"]).
|
|
95
|
+
* Model IDs containing "embed" are flagged as embeddings models (excluded from chat registration).
|
|
96
|
+
* Throws on non-ok / 401 / status:0.
|
|
97
|
+
*/
|
|
98
|
+
async listModels(
|
|
99
|
+
server: DiscoveredServer,
|
|
100
|
+
cred: ServerCredential,
|
|
101
|
+
probe: Probe,
|
|
102
|
+
): Promise<ModelDescriptor[]> {
|
|
103
|
+
const headers: Record<string, string> = {};
|
|
104
|
+
if (cred.mode === "apiKey" && cred.apiKey) {
|
|
105
|
+
// Never log the key — inject header only.
|
|
106
|
+
headers["Authorization"] = `Bearer ${cred.apiKey}`;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const r = await probe("/v1/models", { headers });
|
|
110
|
+
|
|
111
|
+
if (r.status === 401) throw new Error("401 Unauthorized: invalid or missing API key");
|
|
112
|
+
if (r.status === 0) throw new Error("listModels failed: server unreachable (status 0)");
|
|
113
|
+
if (!r.ok) throw new Error(`listModels failed: HTTP ${r.status}`);
|
|
114
|
+
|
|
115
|
+
const body = r.json as { data?: Array<{ id?: unknown }> } | undefined;
|
|
116
|
+
if (!Array.isArray(body?.data)) return [];
|
|
117
|
+
|
|
118
|
+
return body.data
|
|
119
|
+
.filter((item): item is { id: string } => typeof item?.id === "string")
|
|
120
|
+
.map((item): ModelDescriptor => {
|
|
121
|
+
const isEmbedding = item.id.toLowerCase().includes("embed");
|
|
122
|
+
return {
|
|
123
|
+
id: item.id,
|
|
124
|
+
name: item.id,
|
|
125
|
+
contextWindow: DEFAULT_CONTEXT_WINDOW,
|
|
126
|
+
maxTokens: DEFAULT_MAX_TOKENS,
|
|
127
|
+
input: ["text"],
|
|
128
|
+
reasoning: false,
|
|
129
|
+
embeddings: isEmbedding,
|
|
130
|
+
raw: item,
|
|
131
|
+
};
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// --- toPiModel ------------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
toPiModel(server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
|
|
138
|
+
return {
|
|
139
|
+
id: model.id,
|
|
140
|
+
name: model.name,
|
|
141
|
+
reasoning: model.reasoning ?? false,
|
|
142
|
+
input: model.input.length > 0 ? model.input : ["text"],
|
|
143
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
144
|
+
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
145
|
+
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// --- inferenceBaseUrl -----------------------------------------------------
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Returns server.baseUrl normalised to end with "/v1".
|
|
153
|
+
* Pi needs this to resolve `/v1/chat/completions` correctly.
|
|
154
|
+
*/
|
|
155
|
+
inferenceBaseUrl(server: DiscoveredServer): string {
|
|
156
|
+
return normaliseToV1(server.baseUrl);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// Singleton export
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
export const genericAdapter: BackendAdapter = new GenericAdapter();
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter registry — the canonical set of backend adapters Crossbar ships with, plus lookup helpers
|
|
3
|
+
* the discovery engine, registry, and provider shim consume.
|
|
4
|
+
*
|
|
5
|
+
* The generic OpenAI-compatible adapter is the fallback for the long tail (TabbyAPI, KoboldCpp,
|
|
6
|
+
* oobabooga, Jan, llamafile, and unknown servers) — those kinds have no dedicated adapter and resolve
|
|
7
|
+
* to `genericAdapter` via fingerprint.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { BackendAdapter } from "../core/backend-adapter.ts";
|
|
11
|
+
import type { BackendKind } from "../core/capability.ts";
|
|
12
|
+
import { CLOUD_KINDS } from "../core/capability.ts";
|
|
13
|
+
|
|
14
|
+
import { ollamaAdapter } from "./ollama.ts";
|
|
15
|
+
import { lmstudioAdapter } from "./lmstudio.ts";
|
|
16
|
+
import { llamacppAdapter } from "./llamacpp.ts";
|
|
17
|
+
import { llamaswapAdapter } from "./llamaswap.ts";
|
|
18
|
+
import { vllmAdapter } from "./vllm.ts";
|
|
19
|
+
import { openaiAdapter } from "./openai.ts";
|
|
20
|
+
import { anthropicAdapter } from "./anthropic.ts";
|
|
21
|
+
import { genericAdapter } from "./generic.ts";
|
|
22
|
+
|
|
23
|
+
/** Every adapter Crossbar ships. */
|
|
24
|
+
export const ADAPTERS: readonly BackendAdapter[] = [
|
|
25
|
+
ollamaAdapter,
|
|
26
|
+
lmstudioAdapter,
|
|
27
|
+
llamacppAdapter,
|
|
28
|
+
llamaswapAdapter,
|
|
29
|
+
vllmAdapter,
|
|
30
|
+
openaiAdapter,
|
|
31
|
+
anthropicAdapter,
|
|
32
|
+
genericAdapter,
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
/** Lookup by kind. Note: only kinds with a dedicated adapter are present; the tail maps to generic. */
|
|
36
|
+
export const ADAPTERS_BY_KIND: Partial<Record<BackendKind, BackendAdapter>> = Object.fromEntries(
|
|
37
|
+
ADAPTERS.map((a) => [a.kind, a]),
|
|
38
|
+
) as Partial<Record<BackendKind, BackendAdapter>>;
|
|
39
|
+
|
|
40
|
+
/** Resolve an adapter for a kind, falling back to the generic OpenAI-compat adapter. */
|
|
41
|
+
export function adapterFor(kind: BackendKind): BackendAdapter {
|
|
42
|
+
return ADAPTERS_BY_KIND[kind] ?? genericAdapter;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Adapters used for active discovery (cloud kinds are configured, never probed). */
|
|
46
|
+
export const DISCOVERY_ADAPTERS: readonly BackendAdapter[] = ADAPTERS.filter(
|
|
47
|
+
(a) => !CLOUD_KINDS.has(a.kind),
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
/** Cloud adapters (configured via onboarding / `/login`, not port-probed). */
|
|
51
|
+
export const CLOUD_ADAPTERS: readonly BackendAdapter[] = ADAPTERS.filter((a) =>
|
|
52
|
+
CLOUD_KINDS.has(a.kind),
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
export {
|
|
56
|
+
ollamaAdapter,
|
|
57
|
+
lmstudioAdapter,
|
|
58
|
+
llamacppAdapter,
|
|
59
|
+
llamaswapAdapter,
|
|
60
|
+
vllmAdapter,
|
|
61
|
+
openaiAdapter,
|
|
62
|
+
anthropicAdapter,
|
|
63
|
+
genericAdapter,
|
|
64
|
+
};
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llama.cpp (llama-server) BackendAdapter
|
|
3
|
+
*
|
|
4
|
+
* Covers a single-model llama-server instance. No hot-swap (SwitchModel / LoadUnload absent).
|
|
5
|
+
* Fingerprinted via GET /props with `default_generation_settings` + `build_info`.
|
|
6
|
+
* Inference base URL: server.baseUrl + "/v1" (OpenAI-compat endpoint).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { Capability } from "../core/capability.ts";
|
|
10
|
+
import type { BackendAdapter, PiApiType } from "../core/backend-adapter.ts";
|
|
11
|
+
import type {
|
|
12
|
+
DiscoveredServer,
|
|
13
|
+
HealthStatus,
|
|
14
|
+
LoadedState,
|
|
15
|
+
ModelDescriptor,
|
|
16
|
+
PiModelEntry,
|
|
17
|
+
Probe,
|
|
18
|
+
ServerCredential,
|
|
19
|
+
} from "../core/types.ts";
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Internal helpers
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
interface PropsBody {
|
|
26
|
+
default_generation_settings?: {
|
|
27
|
+
n_ctx?: number;
|
|
28
|
+
};
|
|
29
|
+
build_info?: unknown;
|
|
30
|
+
model_path?: string;
|
|
31
|
+
modalities?: string[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface V1ModelsBody {
|
|
35
|
+
data?: Array<{
|
|
36
|
+
id: string;
|
|
37
|
+
meta?: {
|
|
38
|
+
n_ctx_train?: number;
|
|
39
|
+
};
|
|
40
|
+
}>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function basename(path: string): string {
|
|
44
|
+
// Extract the last path segment, dropping any trailing slash.
|
|
45
|
+
return path.replace(/\\/g, "/").split("/").filter(Boolean).pop() ?? path;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
// LlamacppAdapter
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
class LlamacppAdapter implements BackendAdapter {
|
|
53
|
+
readonly kind = "llamacpp" as const;
|
|
54
|
+
readonly displayName = "llama.cpp";
|
|
55
|
+
readonly defaultPorts: readonly number[] = [8080];
|
|
56
|
+
readonly piApi: PiApiType = "openai-completions";
|
|
57
|
+
readonly capabilities: ReadonlySet<Capability> = new Set<Capability>([
|
|
58
|
+
Capability.ListModels,
|
|
59
|
+
Capability.IntrospectLoaded,
|
|
60
|
+
Capability.Health,
|
|
61
|
+
Capability.PerModelCaps,
|
|
62
|
+
Capability.Streaming,
|
|
63
|
+
]);
|
|
64
|
+
|
|
65
|
+
// --- fingerprint ----------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
|
|
68
|
+
const r = await probe("/props");
|
|
69
|
+
if (!r.ok) return null;
|
|
70
|
+
const body = r.json as PropsBody | undefined;
|
|
71
|
+
if (!body?.default_generation_settings) return null;
|
|
72
|
+
if (!("build_info" in (body as object))) return null;
|
|
73
|
+
return {
|
|
74
|
+
kind: "llamacpp",
|
|
75
|
+
baseUrl,
|
|
76
|
+
auth: "none",
|
|
77
|
+
label: `llama.cpp (${baseUrl})`,
|
|
78
|
+
confidence: 0.9,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// --- health ---------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
async health(
|
|
85
|
+
_server: DiscoveredServer,
|
|
86
|
+
_cred: ServerCredential,
|
|
87
|
+
probe: Probe,
|
|
88
|
+
): Promise<HealthStatus> {
|
|
89
|
+
const r = await probe("/health");
|
|
90
|
+
if (r.status === 0) return { state: "unreachable" };
|
|
91
|
+
if (r.status === 401) return { state: "unauthorized" };
|
|
92
|
+
if (!r.ok) {
|
|
93
|
+
// llama-server returns 503 while loading
|
|
94
|
+
if (r.status === 503) return { state: "loading" };
|
|
95
|
+
return { state: "degraded" };
|
|
96
|
+
}
|
|
97
|
+
const body = r.json as { status?: string } | undefined;
|
|
98
|
+
if (body?.status === "loading") return { state: "loading" };
|
|
99
|
+
const status: HealthStatus = { state: "healthy" };
|
|
100
|
+
if (r.latencyMs !== undefined) status.latencyMs = r.latencyMs;
|
|
101
|
+
return status;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// --- listModels -----------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
async listModels(
|
|
107
|
+
server: DiscoveredServer,
|
|
108
|
+
_cred: ServerCredential,
|
|
109
|
+
probe: Probe,
|
|
110
|
+
): Promise<ModelDescriptor[]> {
|
|
111
|
+
// Fetch /v1/models
|
|
112
|
+
const r = await probe("/v1/models");
|
|
113
|
+
if (!r.ok) {
|
|
114
|
+
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
115
|
+
if (r.status === 0) throw new Error("listModels failed: server unreachable");
|
|
116
|
+
throw new Error(`listModels failed: status ${r.status}`);
|
|
117
|
+
}
|
|
118
|
+
const body = r.json as V1ModelsBody | undefined;
|
|
119
|
+
const data = body?.data ?? [];
|
|
120
|
+
|
|
121
|
+
// Fetch /props for context window and model_path
|
|
122
|
+
const propsResult = await probe("/props");
|
|
123
|
+
const props = propsResult.ok ? (propsResult.json as PropsBody | undefined) : undefined;
|
|
124
|
+
const propsNCtx = props?.default_generation_settings?.n_ctx;
|
|
125
|
+
const hasVision = Array.isArray(props?.modalities) &&
|
|
126
|
+
props!.modalities!.some((m) => m.toLowerCase().includes("vision") || m.toLowerCase().includes("image"));
|
|
127
|
+
|
|
128
|
+
return data.map((entry) => {
|
|
129
|
+
const contextWindow =
|
|
130
|
+
propsNCtx ??
|
|
131
|
+
entry.meta?.n_ctx_train ??
|
|
132
|
+
8192;
|
|
133
|
+
const descriptor: ModelDescriptor = {
|
|
134
|
+
id: entry.id,
|
|
135
|
+
name: entry.id,
|
|
136
|
+
contextWindow,
|
|
137
|
+
maxTokens: 4096,
|
|
138
|
+
input: hasVision ? (["text", "image"] as ("text" | "image")[]) : (["text"] as ("text" | "image")[]),
|
|
139
|
+
reasoning: false,
|
|
140
|
+
};
|
|
141
|
+
return descriptor;
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// --- introspectLoaded -----------------------------------------------------
|
|
146
|
+
|
|
147
|
+
async introspectLoaded(
|
|
148
|
+
_server: DiscoveredServer,
|
|
149
|
+
_cred: ServerCredential,
|
|
150
|
+
probe: Probe,
|
|
151
|
+
): Promise<LoadedState> {
|
|
152
|
+
const r = await probe("/props");
|
|
153
|
+
if (!r.ok) {
|
|
154
|
+
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
155
|
+
if (r.status === 0) throw new Error("introspectLoaded failed: server unreachable");
|
|
156
|
+
throw new Error(`introspectLoaded failed: status ${r.status}`);
|
|
157
|
+
}
|
|
158
|
+
const body = r.json as PropsBody | undefined;
|
|
159
|
+
const modelPath = body?.model_path;
|
|
160
|
+
|
|
161
|
+
if (!modelPath) {
|
|
162
|
+
return { loadedModelIds: [], source: "introspection" };
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Try to match model_path to a /v1/models id. The id is typically the basename.
|
|
166
|
+
const modelBase = basename(modelPath);
|
|
167
|
+
|
|
168
|
+
// Also fetch /v1/models to find the matching id
|
|
169
|
+
const modelsResult = await probe("/v1/models");
|
|
170
|
+
let matchedId = modelBase;
|
|
171
|
+
if (modelsResult.ok) {
|
|
172
|
+
const modelsBody = modelsResult.json as V1ModelsBody | undefined;
|
|
173
|
+
const data = modelsBody?.data ?? [];
|
|
174
|
+
// Find a model whose id matches the path basename (exact or suffix)
|
|
175
|
+
const found = data.find(
|
|
176
|
+
(m) => m.id === modelBase || m.id === modelPath || modelPath.endsWith(m.id),
|
|
177
|
+
);
|
|
178
|
+
if (found) {
|
|
179
|
+
matchedId = found.id;
|
|
180
|
+
} else if (data.length === 1 && data[0]) {
|
|
181
|
+
// Single model — use it regardless
|
|
182
|
+
matchedId = data[0].id;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
loadedModelIds: [matchedId],
|
|
188
|
+
source: "introspection",
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// --- toPiModel ------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
toPiModel(_server: DiscoveredServer, model: ModelDescriptor): PiModelEntry {
|
|
195
|
+
return {
|
|
196
|
+
id: model.id,
|
|
197
|
+
name: model.name,
|
|
198
|
+
reasoning: model.reasoning ?? false,
|
|
199
|
+
input: model.input.length > 0 ? model.input : ["text"],
|
|
200
|
+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
201
|
+
contextWindow: model.contextWindow ?? 8192,
|
|
202
|
+
maxTokens: model.maxTokens ?? 4096,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// --- inferenceBaseUrl -----------------------------------------------------
|
|
207
|
+
|
|
208
|
+
inferenceBaseUrl(server: DiscoveredServer): string {
|
|
209
|
+
return `${server.baseUrl}/v1`;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
// Singleton export
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
|
|
217
|
+
export const llamacppAdapter: BackendAdapter = new LlamacppAdapter();
|