@hypabolic/crossbar 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -17
- package/docs/onboarding.gif +0 -0
- package/package.json +8 -4
- package/src/adapters/generic.ts +6 -0
- package/src/adapters/llamacpp.ts +5 -0
- package/src/adapters/llamaswap.ts +5 -0
- package/src/adapters/lmstudio.ts +26 -3
- package/src/adapters/ollama.ts +5 -0
- package/src/adapters/vllm.ts +6 -0
- package/src/ui/onboarding.ts +342 -60
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml)
|
|
4
4
|
[](https://www.npmjs.com/package/@hypabolic/crossbar)
|
|
5
5
|
|
|
6
|
-
**
|
|
6
|
+
**Effortless local & self-hosted model backends for the Pi coding agent.**
|
|
7
7
|
|
|
8
8
|
Crossbar is an extension for the [Pi coding agent](https://github.com/earendil-works/pi) that makes
|
|
9
9
|
wiring Pi to *any* local or self-hosted model backend effortless — zero hand-edited JSON, all setup
|
|
@@ -12,6 +12,8 @@ in-place model switching.
|
|
|
12
12
|
|
|
13
13
|
> Built by [Hypabolic](https://github.com/hypabolic).
|
|
14
14
|
|
|
15
|
+

|
|
16
|
+
|
|
15
17
|
---
|
|
16
18
|
|
|
17
19
|
## Why Crossbar
|
|
@@ -104,23 +106,10 @@ The `BackendAdapter` contract (`src/core/`) is the frozen boundary every adapter
|
|
|
104
106
|
conformance suite (`tests/conformance/`) validates every adapter against it, and
|
|
105
107
|
`tests/integration/` exercises the real discovery path over live sockets.
|
|
106
108
|
|
|
107
|
-
### CI
|
|
108
|
-
|
|
109
|
-
- **CI** (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
|
|
110
|
-
(Node 22 & 24).
|
|
111
|
-
- **Releases** (`.github/workflows/release.yml`) publish to npm via **GitHub→npm OIDC trusted
|
|
112
|
-
publishing** — no tokens or secrets. [Provenance](https://docs.npmjs.com/generating-provenance-statements)
|
|
113
|
-
is attached automatically. Two ways:
|
|
114
|
-
1. **Manual** — GitHub → *Actions → Release → Run workflow* → choose `patch` / `minor` / `major`.
|
|
115
|
-
It bumps `package.json`, commits, tags `vX.Y.Z`, and publishes.
|
|
116
|
-
2. **Tag push** — `npm version patch && git push --follow-tags` locally.
|
|
117
|
-
|
|
118
|
-
**One-time setup:** on npmjs.com, add a **Trusted Publisher** for `@hypabolic/crossbar`
|
|
119
|
-
(*Package settings → Trusted Publisher → GitHub Actions*) pointing at repo **`Hypabolic/Crossbar`**
|
|
120
|
-
and workflow **`release.yml`**. The workflow authenticates through the OIDC `id-token` it already
|
|
121
|
-
requests — no `NPM_TOKEN` needed.
|
|
109
|
+
### CI
|
|
122
110
|
|
|
123
|
-
|
|
111
|
+
CI (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
|
|
112
|
+
(Node 22 & 24).
|
|
124
113
|
|
|
125
114
|
## License
|
|
126
115
|
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hypabolic/crossbar",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "The local/self-hosted inference connector Pi
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "The local/self-hosted inference connector for Pi — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Hypabolic",
|
|
@@ -29,10 +29,12 @@
|
|
|
29
29
|
"pi": {
|
|
30
30
|
"extensions": [
|
|
31
31
|
"./src/index.ts"
|
|
32
|
-
]
|
|
32
|
+
],
|
|
33
|
+
"image": "https://raw.githubusercontent.com/Hypabolic/Crossbar/main/docs/onboarding.gif"
|
|
33
34
|
},
|
|
34
35
|
"files": [
|
|
35
36
|
"src",
|
|
37
|
+
"docs/onboarding.gif",
|
|
36
38
|
"RESEARCH.md",
|
|
37
39
|
"CAPABILITY-MATRIX.md",
|
|
38
40
|
"ARCHITECTURE.md",
|
|
@@ -42,7 +44,9 @@
|
|
|
42
44
|
"scripts": {
|
|
43
45
|
"check": "tsc --noEmit",
|
|
44
46
|
"test": "vitest run",
|
|
45
|
-
"test:watch": "vitest"
|
|
47
|
+
"test:watch": "vitest",
|
|
48
|
+
"demo:lmstudio": "node scripts/fake-lmstudio.mjs",
|
|
49
|
+
"demo:gif": "node scripts/gen-onboarding-gif.mjs"
|
|
46
50
|
},
|
|
47
51
|
"peerDependencies": {
|
|
48
52
|
"@earendil-works/pi-coding-agent": "0.79.9",
|
package/src/adapters/generic.ts
CHANGED
|
@@ -140,9 +140,15 @@ class GenericAdapter implements BackendAdapter {
|
|
|
140
140
|
name: model.name,
|
|
141
141
|
reasoning: model.reasoning ?? false,
|
|
142
142
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
143
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
144
|
+
// COUNTS still matter: Pi maps any `usage.prompt_tokens_details.cached_tokens` the
|
|
145
|
+
// backend reports to `Usage.cacheRead` and displays it regardless of cost. The
|
|
146
|
+
// flag only asks for usage in streaming (never fabricates), so it is safe even for
|
|
147
|
+
// unknown OpenAI-compatible servers that may not report cache hits.
|
|
143
148
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
144
149
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
145
150
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
151
|
+
compat: { supportsUsageInStreaming: true },
|
|
146
152
|
};
|
|
147
153
|
}
|
|
148
154
|
|
package/src/adapters/llamacpp.ts
CHANGED
|
@@ -197,9 +197,14 @@ class LlamacppAdapter implements BackendAdapter {
|
|
|
197
197
|
name: model.name,
|
|
198
198
|
reasoning: model.reasoning ?? false,
|
|
199
199
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
200
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
201
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
202
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
203
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
200
204
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
201
205
|
contextWindow: model.contextWindow ?? 8192,
|
|
202
206
|
maxTokens: model.maxTokens ?? 4096,
|
|
207
|
+
compat: { supportsUsageInStreaming: true },
|
|
203
208
|
};
|
|
204
209
|
}
|
|
205
210
|
|
|
@@ -256,9 +256,14 @@ class LlamaswapAdapter implements BackendAdapter {
|
|
|
256
256
|
name: model.name,
|
|
257
257
|
reasoning: model.reasoning ?? false,
|
|
258
258
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
259
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
260
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
261
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
262
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
259
263
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
260
264
|
contextWindow: model.contextWindow ?? 8192,
|
|
261
265
|
maxTokens: model.maxTokens ?? 4096,
|
|
266
|
+
compat: { supportsUsageInStreaming: true },
|
|
262
267
|
};
|
|
263
268
|
}
|
|
264
269
|
|
package/src/adapters/lmstudio.ts
CHANGED
|
@@ -112,6 +112,7 @@ function hasLmsDiscriminator(json: unknown): boolean {
|
|
|
112
112
|
function toDescriptor(m: LmsModelEntry): ModelDescriptor {
|
|
113
113
|
const isEmbeddings = m.type === "embeddings";
|
|
114
114
|
const isVlm = m.type === "vlm";
|
|
115
|
+
const isLoaded = m.state === "loaded";
|
|
115
116
|
|
|
116
117
|
const input: ("text" | "image")[] = ["text"];
|
|
117
118
|
if (isVlm) input.push("image");
|
|
@@ -121,11 +122,24 @@ function toDescriptor(m: LmsModelEntry): ModelDescriptor {
|
|
|
121
122
|
name: m.id,
|
|
122
123
|
input,
|
|
123
124
|
embeddings: isEmbeddings,
|
|
124
|
-
loaded:
|
|
125
|
+
loaded: isLoaded,
|
|
125
126
|
raw: m,
|
|
126
127
|
};
|
|
127
|
-
|
|
128
|
-
|
|
128
|
+
|
|
129
|
+
// Context window: LM Studio reports both the model ceiling (`max_context_length`)
|
|
130
|
+
// and the window the model was actually loaded with (`loaded_context_length`),
|
|
131
|
+
// which is frequently configured well below the ceiling (e.g. a 128k model loaded
|
|
132
|
+
// at 4096). Register the OPERATIVE window so Pi budgets against what the server
|
|
133
|
+
// will really accept: prefer the loaded length when the model is resident (and
|
|
134
|
+
// non-zero), otherwise fall back to the model max. `loaded_context_length` is 0 or
|
|
135
|
+
// absent while the model is not loaded, so it never masks the ceiling in that case.
|
|
136
|
+
const loadedCtx =
|
|
137
|
+
isLoaded && typeof m.loaded_context_length === "number" && m.loaded_context_length > 0
|
|
138
|
+
? m.loaded_context_length
|
|
139
|
+
: undefined;
|
|
140
|
+
const ctx = loadedCtx ?? m.max_context_length;
|
|
141
|
+
if (ctx !== undefined) {
|
|
142
|
+
desc.contextWindow = ctx;
|
|
129
143
|
}
|
|
130
144
|
return desc;
|
|
131
145
|
}
|
|
@@ -307,9 +321,18 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
307
321
|
name: model.name,
|
|
308
322
|
reasoning: model.reasoning ?? false,
|
|
309
323
|
input: model.input.length > 0 ? (model.input as ("text" | "image")[]) : ["text"],
|
|
324
|
+
// Local inference is free, so per-token COSTS are zero. The cache-hit token
|
|
325
|
+
// COUNTS still flow and are worth recording: LM Studio's OpenAI-compatible
|
|
326
|
+
// responses report `usage.prompt_tokens_details.cached_tokens`, which Pi maps to
|
|
327
|
+
// `Usage.cacheRead` and surfaces in the TUI regardless of cost. Keep usage
|
|
328
|
+
// reporting on during streaming so those automatic-prefix-cache hits are
|
|
329
|
+
// recorded. We intentionally do NOT set `cacheControlFormat`: LM Studio (llama.cpp
|
|
330
|
+
// engine) caches matching prefixes automatically, so injecting Anthropic-style
|
|
331
|
+
// `cache_control` markers would be wrong for this OpenAI-completions backend.
|
|
310
332
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
311
333
|
contextWindow: model.contextWindow ?? 8192,
|
|
312
334
|
maxTokens: model.maxTokens ?? 4096,
|
|
335
|
+
compat: { supportsUsageInStreaming: true },
|
|
313
336
|
};
|
|
314
337
|
}
|
|
315
338
|
|
package/src/adapters/ollama.ts
CHANGED
|
@@ -320,9 +320,14 @@ class OllamaAdapter implements BackendAdapter {
|
|
|
320
320
|
name: model.name,
|
|
321
321
|
reasoning: model.reasoning ?? false,
|
|
322
322
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
323
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
324
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
325
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
326
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
323
327
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
324
328
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
325
329
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
330
|
+
compat: { supportsUsageInStreaming: true },
|
|
326
331
|
};
|
|
327
332
|
}
|
|
328
333
|
|
package/src/adapters/vllm.ts
CHANGED
|
@@ -177,9 +177,15 @@ class VllmAdapter implements BackendAdapter {
|
|
|
177
177
|
name: model.name,
|
|
178
178
|
reasoning: model.reasoning ?? false,
|
|
179
179
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
180
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
181
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
182
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. vLLM
|
|
183
|
+
// reports cached tokens from its automatic prefix cache; keep streaming usage
|
|
184
|
+
// reporting on so those hits are recorded.
|
|
180
185
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
181
186
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
182
187
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
188
|
+
compat: { supportsUsageInStreaming: true },
|
|
183
189
|
};
|
|
184
190
|
}
|
|
185
191
|
|
package/src/ui/onboarding.ts
CHANGED
|
@@ -24,45 +24,56 @@ import { Container, type SelectItem, SelectList, Text, matchesKey } from "@earen
|
|
|
24
24
|
|
|
25
25
|
import type { BackendAdapter } from "../core/backend-adapter.ts";
|
|
26
26
|
import { canIntrospect, canLoadUnload, canSwitch } from "../core/backend-adapter.ts";
|
|
27
|
-
import type { DiscoveredServer, ModelDescriptor, ServerRecord } from "../core/types.ts";
|
|
27
|
+
import type { DiscoveredServer, LoadedState, ModelDescriptor, ServerRecord } from "../core/types.ts";
|
|
28
28
|
import type { ServerRegistry } from "../registry/registry.ts";
|
|
29
29
|
import { serverId } from "../registry/ids.ts";
|
|
30
30
|
import { adapterFor } from "../adapters/index.ts";
|
|
31
|
+
import { unregisterServer } from "../shim/provider-shim.ts";
|
|
31
32
|
import { createProbe } from "../discovery/probe.ts";
|
|
32
33
|
|
|
33
34
|
// ─── Pure helpers ────────────────────────────────────────────────────────────
|
|
34
35
|
|
|
36
|
+
/** Extract a `host:port` string from a base URL for compact labels. */
|
|
37
|
+
function hostPortOf(baseUrl: string): string {
|
|
38
|
+
try {
|
|
39
|
+
const u = new URL(baseUrl);
|
|
40
|
+
return `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
|
|
41
|
+
} catch {
|
|
42
|
+
return baseUrl.replace(/^https?:\/\//, "");
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Capitalise a backend kind for display, e.g. "lmstudio" → "Lmstudio". */
|
|
47
|
+
function kindLabelOf(kind: string): string {
|
|
48
|
+
return kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
49
|
+
}
|
|
50
|
+
|
|
35
51
|
/**
|
|
36
|
-
* Build a `SelectItem[]` representing the
|
|
37
|
-
*
|
|
38
|
-
*
|
|
52
|
+
* Build a `SelectItem[]` representing the servers shown in the top-level onboarding
|
|
53
|
+
* list. Three kinds of entry can appear:
|
|
54
|
+
* - discovered servers (in discovery order) — already-registered ones get an
|
|
55
|
+
* "(added)" suffix so the user can tell new from known;
|
|
56
|
+
* - registered servers that are NOT currently discovered (e.g. offline), so they
|
|
57
|
+
* can still be managed/removed;
|
|
58
|
+
* - a sentinel "Add manually" entry, always last.
|
|
39
59
|
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
60
|
+
* Selecting any already-registered entry opens the manage overlay; selecting a new
|
|
61
|
+
* discovered entry or the sentinel runs the add flow.
|
|
42
62
|
*/
|
|
43
63
|
export function buildDiscoveredItems(
|
|
44
64
|
discovered: DiscoveredServer[],
|
|
45
65
|
existing: ServerRecord[],
|
|
46
66
|
): SelectItem[] {
|
|
47
67
|
const existingIds = new Set(existing.map((r) => r.id));
|
|
68
|
+
const discoveredUrls = new Set(discovered.map((s) => s.baseUrl));
|
|
48
69
|
|
|
49
70
|
const items: SelectItem[] = discovered.map((server): SelectItem => {
|
|
50
71
|
const id = serverId(server.kind, server.baseUrl);
|
|
51
72
|
const isAdded = existingIds.has(id);
|
|
52
73
|
|
|
53
|
-
// Extract host:port from baseUrl for the label suffix
|
|
54
|
-
let hostPort: string;
|
|
55
|
-
try {
|
|
56
|
-
const u = new URL(server.baseUrl);
|
|
57
|
-
hostPort = `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
|
|
58
|
-
} catch {
|
|
59
|
-
hostPort = server.baseUrl.replace(/^https?:\/\//, "");
|
|
60
|
-
}
|
|
61
|
-
|
|
62
74
|
// Compose a label: "[kind] host:port ✓ healthy" or "(added)"
|
|
63
|
-
const kindLabel = server.kind.charAt(0).toUpperCase() + server.kind.slice(1);
|
|
64
75
|
const healthMark = isAdded ? "(added)" : "✓ healthy";
|
|
65
|
-
const label = `${
|
|
76
|
+
const label = `${kindLabelOf(server.kind)} (${hostPortOf(server.baseUrl)})`;
|
|
66
77
|
|
|
67
78
|
return {
|
|
68
79
|
value: server.baseUrl,
|
|
@@ -73,6 +84,18 @@ export function buildDiscoveredItems(
|
|
|
73
84
|
};
|
|
74
85
|
});
|
|
75
86
|
|
|
87
|
+
// Append registered servers that weren't discovered this scan (offline / not
|
|
88
|
+
// reachable right now) so they remain manageable from the same list.
|
|
89
|
+
for (const record of existing) {
|
|
90
|
+
if (!record.enabled) continue;
|
|
91
|
+
if (discoveredUrls.has(record.baseUrl)) continue;
|
|
92
|
+
items.push({
|
|
93
|
+
value: record.baseUrl,
|
|
94
|
+
label: `${kindLabelOf(record.kind)} (${hostPortOf(record.baseUrl)}) (added)`,
|
|
95
|
+
description: "Registered · not currently discovered",
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
76
99
|
// Always append the manual-add sentinel
|
|
77
100
|
items.push({
|
|
78
101
|
value: "__manual__",
|
|
@@ -151,6 +174,36 @@ export function capabilityActions(
|
|
|
151
174
|
return actions;
|
|
152
175
|
}
|
|
153
176
|
|
|
177
|
+
/** One-line hints shown under each manage action. */
|
|
178
|
+
const ACTION_DESCRIPTIONS: Record<string, string> = {
|
|
179
|
+
switch: "Make a model the active/served one",
|
|
180
|
+
load: "Load a model into memory",
|
|
181
|
+
unload: "Evict a loaded model from memory",
|
|
182
|
+
introspect: "Show which models are currently loaded",
|
|
183
|
+
remove: "Forget this server and delete its stored key",
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Build the manage-overlay action list for an already-registered server: the
|
|
188
|
+
* adapter's capability-filtered actions (switch / load / unload / introspect) plus
|
|
189
|
+
* a "Remove server" action that is always available. Backends without any local
|
|
190
|
+
* capabilities (vLLM, OpenAI, Anthropic, generic) show only "Remove server".
|
|
191
|
+
*/
|
|
192
|
+
export function buildManageItems(adapter: BackendAdapter): SelectItem[] {
|
|
193
|
+
const items: SelectItem[] = capabilityActions(adapter).map((a) => {
|
|
194
|
+
const item: SelectItem = { value: a.value, label: a.label };
|
|
195
|
+
const desc = ACTION_DESCRIPTIONS[a.value];
|
|
196
|
+
if (desc !== undefined) item.description = desc;
|
|
197
|
+
return item;
|
|
198
|
+
});
|
|
199
|
+
items.push({
|
|
200
|
+
value: "remove",
|
|
201
|
+
label: "Remove server",
|
|
202
|
+
description: ACTION_DESCRIPTIONS["remove"]!,
|
|
203
|
+
});
|
|
204
|
+
return items;
|
|
205
|
+
}
|
|
206
|
+
|
|
154
207
|
/**
|
|
155
208
|
* Coerce a user-supplied string (which may be bare "host:port", missing a scheme,
|
|
156
209
|
* or already a valid URL) into a well-formed origin with no trailing slash.
|
|
@@ -177,6 +230,263 @@ export function normalizeManualUrl(input: string): string {
|
|
|
177
230
|
return u.origin.replace(/\/+$/, "");
|
|
178
231
|
}
|
|
179
232
|
|
|
233
|
+
// ─── Shared overlay + server-action helpers ─────────────────────────────────
|
|
234
|
+
|
|
235
|
+
/** Reconstruct a minimal DiscoveredServer from a persisted record for adapter calls. */
|
|
236
|
+
function serverFromRecord(record: ServerRecord): DiscoveredServer {
|
|
237
|
+
return {
|
|
238
|
+
kind: record.kind,
|
|
239
|
+
baseUrl: record.baseUrl,
|
|
240
|
+
auth: record.auth,
|
|
241
|
+
label: record.label,
|
|
242
|
+
confidence: 1,
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Render a single-select overlay (titled SelectList in an accent border) and resolve
|
|
248
|
+
* to the chosen item value, or `null` on Esc/cancel. Shared by the model picker and
|
|
249
|
+
* the manage menus so they stay visually consistent.
|
|
250
|
+
*/
|
|
251
|
+
function selectOverlay(
|
|
252
|
+
ctx: ExtensionCommandContext,
|
|
253
|
+
title: string,
|
|
254
|
+
items: SelectItem[],
|
|
255
|
+
hint: string,
|
|
256
|
+
): Promise<string | null> {
|
|
257
|
+
return ctx.ui.custom<string | null>(
|
|
258
|
+
(_tui, theme, _kb, done) => {
|
|
259
|
+
const container = new Container();
|
|
260
|
+
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
261
|
+
container.addChild(new Text(theme.fg("accent", theme.bold(title))));
|
|
262
|
+
|
|
263
|
+
const list = new SelectList(items, Math.min(items.length, 12), getSelectListTheme());
|
|
264
|
+
list.onSelect = (item) => done(item.value);
|
|
265
|
+
list.onCancel = () => done(null);
|
|
266
|
+
|
|
267
|
+
container.addChild(list);
|
|
268
|
+
container.addChild(new Text(theme.fg("dim", hint)));
|
|
269
|
+
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
render: (width: number) => container.render(width),
|
|
273
|
+
invalidate: () => container.invalidate(),
|
|
274
|
+
handleInput: (data: string) => {
|
|
275
|
+
if (matchesKey(data, "escape")) {
|
|
276
|
+
done(null);
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
list.handleInput(data);
|
|
280
|
+
_tui.requestRender();
|
|
281
|
+
},
|
|
282
|
+
};
|
|
283
|
+
},
|
|
284
|
+
{ overlay: true, overlayOptions: { width: "60%" } },
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const errMsg = (err: unknown): string => (err instanceof Error ? err.message : String(err));
|
|
289
|
+
|
|
290
|
+
/** Fetch a server's models (live, falling back to last-known on failure). */
|
|
291
|
+
async function fetchModels(
|
|
292
|
+
ctx: ExtensionCommandContext,
|
|
293
|
+
registry: ServerRegistry,
|
|
294
|
+
record: ServerRecord,
|
|
295
|
+
): Promise<ModelDescriptor[] | null> {
|
|
296
|
+
const adapter = adapterFor(record.kind);
|
|
297
|
+
const cred = await registry.resolveCredential(record);
|
|
298
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
299
|
+
try {
|
|
300
|
+
return await adapter.listModels(serverFromRecord(record), cred, probe);
|
|
301
|
+
} catch (err) {
|
|
302
|
+
if (record.lastKnownModels && record.lastKnownModels.length > 0) {
|
|
303
|
+
return record.lastKnownModels;
|
|
304
|
+
}
|
|
305
|
+
ctx.ui.notify(`Crossbar: could not list models — ${errMsg(err)}`, "error");
|
|
306
|
+
return null;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/** Switch the active model or load a model: pick from the list, then call the adapter. */
|
|
311
|
+
async function performModelAction(
|
|
312
|
+
ctx: ExtensionCommandContext,
|
|
313
|
+
registry: ServerRegistry,
|
|
314
|
+
record: ServerRecord,
|
|
315
|
+
action: "switch" | "load",
|
|
316
|
+
): Promise<void> {
|
|
317
|
+
const adapter = adapterFor(record.kind);
|
|
318
|
+
const models = await fetchModels(ctx, registry, record);
|
|
319
|
+
if (!models) return;
|
|
320
|
+
if (models.length === 0) {
|
|
321
|
+
ctx.ui.notify("Crossbar: server returned no models.", "warning");
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const title = action === "switch"
|
|
326
|
+
? `Switch model — ${record.label}`
|
|
327
|
+
: `Load model — ${record.label}`;
|
|
328
|
+
const modelId = await selectOverlay(
|
|
329
|
+
ctx,
|
|
330
|
+
title,
|
|
331
|
+
buildModelItems(models.filter((m) => !m.embeddings)),
|
|
332
|
+
"↑↓ navigate · Enter select · Esc cancel",
|
|
333
|
+
);
|
|
334
|
+
if (!modelId) return;
|
|
335
|
+
|
|
336
|
+
const cred = await registry.resolveCredential(record);
|
|
337
|
+
// Loads can be slow (cold model into VRAM) — give them a generous budget.
|
|
338
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 60_000 });
|
|
339
|
+
|
|
340
|
+
ctx.ui.notify(
|
|
341
|
+
`Crossbar: ${action === "switch" ? "switching to" : "loading"} ${modelId}…`,
|
|
342
|
+
"info",
|
|
343
|
+
);
|
|
344
|
+
try {
|
|
345
|
+
if (action === "switch") {
|
|
346
|
+
if (!canSwitch(adapter)) return;
|
|
347
|
+
await adapter.switchModel(serverFromRecord(record), cred, modelId, probe);
|
|
348
|
+
} else {
|
|
349
|
+
if (!canLoadUnload(adapter)) return;
|
|
350
|
+
await adapter.loadUnload(serverFromRecord(record), cred, modelId, "load", probe);
|
|
351
|
+
}
|
|
352
|
+
ctx.ui.notify(
|
|
353
|
+
`Crossbar: ${modelId} ${action === "switch" ? "is now active" : "loaded"}.`,
|
|
354
|
+
"info",
|
|
355
|
+
);
|
|
356
|
+
} catch (err) {
|
|
357
|
+
ctx.ui.notify(`Crossbar: ${action} failed — ${errMsg(err)}`, "error");
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/** Unload a currently-loaded model: resolve the loaded set, pick one, evict it. */
|
|
362
|
+
async function performUnload(
|
|
363
|
+
ctx: ExtensionCommandContext,
|
|
364
|
+
registry: ServerRegistry,
|
|
365
|
+
record: ServerRecord,
|
|
366
|
+
): Promise<void> {
|
|
367
|
+
const adapter = adapterFor(record.kind);
|
|
368
|
+
if (!canLoadUnload(adapter)) return;
|
|
369
|
+
const cred = await registry.resolveCredential(record);
|
|
370
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
371
|
+
|
|
372
|
+
let loadedIds: string[] = record.lastKnownLoaded ?? [];
|
|
373
|
+
if (canIntrospect(adapter)) {
|
|
374
|
+
try {
|
|
375
|
+
const state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
|
|
376
|
+
loadedIds = state.loadedModelIds;
|
|
377
|
+
} catch {
|
|
378
|
+
// Fall back to last-known on a failed introspection.
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (loadedIds.length === 0) {
|
|
382
|
+
ctx.ui.notify("Crossbar: no models are currently loaded.", "info");
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const modelId = await selectOverlay(
|
|
387
|
+
ctx,
|
|
388
|
+
`Unload model — ${record.label}`,
|
|
389
|
+
loadedIds.map((id) => ({ value: id, label: id })),
|
|
390
|
+
"↑↓ navigate · Enter select · Esc cancel",
|
|
391
|
+
);
|
|
392
|
+
if (!modelId) return;
|
|
393
|
+
|
|
394
|
+
ctx.ui.notify(`Crossbar: unloading ${modelId}…`, "info");
|
|
395
|
+
try {
|
|
396
|
+
await adapter.loadUnload(serverFromRecord(record), cred, modelId, "unload", probe);
|
|
397
|
+
ctx.ui.notify(`Crossbar: ${modelId} unloaded.`, "info");
|
|
398
|
+
} catch (err) {
|
|
399
|
+
ctx.ui.notify(`Crossbar: unload failed — ${errMsg(err)}`, "error");
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/** Read and report the currently-loaded models for a server. */
|
|
404
|
+
async function performIntrospect(
|
|
405
|
+
ctx: ExtensionCommandContext,
|
|
406
|
+
registry: ServerRegistry,
|
|
407
|
+
record: ServerRecord,
|
|
408
|
+
): Promise<void> {
|
|
409
|
+
const adapter = adapterFor(record.kind);
|
|
410
|
+
if (!canIntrospect(adapter)) return;
|
|
411
|
+
const cred = await registry.resolveCredential(record);
|
|
412
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
413
|
+
|
|
414
|
+
let state: LoadedState;
|
|
415
|
+
try {
|
|
416
|
+
state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
|
|
417
|
+
} catch (err) {
|
|
418
|
+
ctx.ui.notify(`Crossbar: could not read loaded models — ${errMsg(err)}`, "error");
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
if (state.loadedModelIds.length === 0) {
|
|
422
|
+
ctx.ui.notify(`Crossbar: ${record.label} has no models loaded.`, "info");
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
const summary = state.loadedModelIds
|
|
426
|
+
.map((id) => {
|
|
427
|
+
const ctxLen = state.perModel?.[id]?.contextLength;
|
|
428
|
+
if (ctxLen === undefined) return id;
|
|
429
|
+
const ctxStr = ctxLen >= 1000 ? `${Math.round(ctxLen / 1000)}k` : `${ctxLen}`;
|
|
430
|
+
return `${id} (${ctxStr} ctx)`;
|
|
431
|
+
})
|
|
432
|
+
.join(", ");
|
|
433
|
+
ctx.ui.notify(`Crossbar: ${record.label} loaded — ${summary}`, "info");
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/** Confirm and remove a server from the registry, auth.json, and Pi. */
|
|
437
|
+
async function performRemove(
|
|
438
|
+
pi: ExtensionAPI,
|
|
439
|
+
ctx: ExtensionCommandContext,
|
|
440
|
+
registry: ServerRegistry,
|
|
441
|
+
record: ServerRecord,
|
|
442
|
+
): Promise<void> {
|
|
443
|
+
const confirm = await ctx.ui.select(`Remove ${record.label}?`, ["Cancel", "Remove server"]);
|
|
444
|
+
if (confirm !== "Remove server") return;
|
|
445
|
+
unregisterServer(pi, record);
|
|
446
|
+
await registry.remove(record.id);
|
|
447
|
+
ctx.ui.notify(`Crossbar: removed ${record.label}.`, "info");
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Open the manage overlay for an already-registered server: show the
|
|
452
|
+
* capability-filtered action menu and dispatch the chosen action.
|
|
453
|
+
*/
|
|
454
|
+
export async function openServerActions(
|
|
455
|
+
pi: ExtensionAPI,
|
|
456
|
+
ctx: ExtensionCommandContext,
|
|
457
|
+
deps: OnboardingDeps,
|
|
458
|
+
record: ServerRecord,
|
|
459
|
+
): Promise<void> {
|
|
460
|
+
const { registry } = deps;
|
|
461
|
+
const adapter = adapterFor(record.kind);
|
|
462
|
+
|
|
463
|
+
const choice = await selectOverlay(
|
|
464
|
+
ctx,
|
|
465
|
+
`Manage — ${record.label}`,
|
|
466
|
+
buildManageItems(adapter),
|
|
467
|
+
"↑↓ navigate · Enter select · Esc close",
|
|
468
|
+
);
|
|
469
|
+
if (!choice) return;
|
|
470
|
+
|
|
471
|
+
switch (choice) {
|
|
472
|
+
case "switch":
|
|
473
|
+
await performModelAction(ctx, registry, record, "switch");
|
|
474
|
+
break;
|
|
475
|
+
case "load":
|
|
476
|
+
await performModelAction(ctx, registry, record, "load");
|
|
477
|
+
break;
|
|
478
|
+
case "unload":
|
|
479
|
+
await performUnload(ctx, registry, record);
|
|
480
|
+
break;
|
|
481
|
+
case "introspect":
|
|
482
|
+
await performIntrospect(ctx, registry, record);
|
|
483
|
+
break;
|
|
484
|
+
case "remove":
|
|
485
|
+
await performRemove(pi, ctx, registry, record);
|
|
486
|
+
break;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
180
490
|
// ─── Overlay flow driver ────────────────────────────────────────────────────
|
|
181
491
|
|
|
182
492
|
export interface OnboardingDeps {
|
|
@@ -203,7 +513,7 @@ export interface OnboardingDeps {
|
|
|
203
513
|
* @param deps - injected registry + discover function (for testability)
|
|
204
514
|
*/
|
|
205
515
|
export async function openOnboarding(
|
|
206
|
-
|
|
516
|
+
pi: ExtensionAPI,
|
|
207
517
|
ctx: ExtensionCommandContext,
|
|
208
518
|
deps: OnboardingDeps,
|
|
209
519
|
): Promise<void> {
|
|
@@ -299,7 +609,15 @@ export async function openOnboarding(
|
|
|
299
609
|
|
|
300
610
|
targetBaseUrl = normalizedUrl;
|
|
301
611
|
} else {
|
|
302
|
-
//
|
|
612
|
+
// Already-registered server (discovered or offline) → open the manage overlay
|
|
613
|
+
// instead of re-running the add flow.
|
|
614
|
+
const existingRecord = registry.list().find((r) => r.baseUrl === chosenBaseUrl);
|
|
615
|
+
if (existingRecord) {
|
|
616
|
+
await openServerActions(pi, ctx, deps, existingRecord);
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// New discovered server path
|
|
303
621
|
discoveredServer = discovered.find((s) => s.baseUrl === chosenBaseUrl);
|
|
304
622
|
targetBaseUrl = chosenBaseUrl;
|
|
305
623
|
}
|
|
@@ -368,47 +686,11 @@ export async function openOnboarding(
|
|
|
368
686
|
}
|
|
369
687
|
|
|
370
688
|
// ── Step 5: pick default model ─────────────────────────────────────────────
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
378
|
-
container.addChild(
|
|
379
|
-
new Text(theme.fg("accent", theme.bold(`Pick default model — ${discoveredServer!.label}`))),
|
|
380
|
-
);
|
|
381
|
-
|
|
382
|
-
const list = new SelectList(
|
|
383
|
-
modelItems,
|
|
384
|
-
Math.min(modelItems.length, 12),
|
|
385
|
-
getSelectListTheme(),
|
|
386
|
-
);
|
|
387
|
-
|
|
388
|
-
list.onSelect = (item) => done(item.value);
|
|
389
|
-
list.onCancel = () => done(null);
|
|
390
|
-
|
|
391
|
-
container.addChild(list);
|
|
392
|
-
container.addChild(
|
|
393
|
-
new Text(theme.fg("dim", "↑↓ navigate · Enter select · Esc skip")),
|
|
394
|
-
);
|
|
395
|
-
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
396
|
-
|
|
397
|
-
return {
|
|
398
|
-
render: (width: number) => container.render(width),
|
|
399
|
-
invalidate: () => container.invalidate(),
|
|
400
|
-
handleInput: (data: string) => {
|
|
401
|
-
// Allow Esc to skip model selection
|
|
402
|
-
if (matchesKey(data, "escape")) {
|
|
403
|
-
done(null);
|
|
404
|
-
return;
|
|
405
|
-
}
|
|
406
|
-
list.handleInput(data);
|
|
407
|
-
_tui.requestRender();
|
|
408
|
-
},
|
|
409
|
-
};
|
|
410
|
-
},
|
|
411
|
-
{ overlay: true, overlayOptions: { width: "60%" } },
|
|
689
|
+
const chosenModelId = await selectOverlay(
|
|
690
|
+
ctx,
|
|
691
|
+
`Pick default model — ${discoveredServer.label}`,
|
|
692
|
+
buildModelItems(models),
|
|
693
|
+
"↑↓ navigate · Enter select · Esc skip",
|
|
412
694
|
);
|
|
413
695
|
|
|
414
696
|
// chosenModelId === null means the user skipped — still register the server
|