@hypabolic/crossbar 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![CI](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml/badge.svg)](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml)
4
4
  [![npm](https://img.shields.io/npm/v/@hypabolic/crossbar)](https://www.npmjs.com/package/@hypabolic/crossbar)
5
5
 
6
- **The local/self-hosted inference connector Pi should have shipped with.**
6
+ **Effortless local & self-hosted model backends for the Pi coding agent.**
7
7
 
8
8
  Crossbar is an extension for the [Pi coding agent](https://github.com/earendil-works/pi) that makes
9
9
  wiring Pi to *any* local or self-hosted model backend effortless — zero hand-edited JSON, all setup
@@ -12,6 +12,8 @@ in-place model switching.
12
12
 
13
13
  > Built by [Hypabolic](https://github.com/hypabolic).
14
14
 
15
+ ![Crossbar onboarding: discover a server, open the manage menu, switch the active model](docs/onboarding.gif)
16
+
15
17
  ---
16
18
 
17
19
  ## Why Crossbar
@@ -104,23 +106,10 @@ The `BackendAdapter` contract (`src/core/`) is the frozen boundary every adapter
104
106
  conformance suite (`tests/conformance/`) validates every adapter against it, and
105
107
  `tests/integration/` exercises the real discovery path over live sockets.
106
108
 
107
- ### CI / releasing
108
-
109
- - **CI** (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
110
- (Node 22 & 24).
111
- - **Releases** (`.github/workflows/release.yml`) publish to npm via **GitHub→npm OIDC trusted
112
- publishing** — no tokens or secrets. [Provenance](https://docs.npmjs.com/generating-provenance-statements)
113
- is attached automatically. Two ways:
114
- 1. **Manual** — GitHub → *Actions → Release → Run workflow* → choose `patch` / `minor` / `major`.
115
- It bumps `package.json`, commits, tags `vX.Y.Z`, and publishes.
116
- 2. **Tag push** — `npm version patch && git push --follow-tags` locally.
117
-
118
- **One-time setup:** on npmjs.com, add a **Trusted Publisher** for `@hypabolic/crossbar`
119
- (*Package settings → Trusted Publisher → GitHub Actions*) pointing at repo **`Hypabolic/Crossbar`**
120
- and workflow **`release.yml`**. The workflow authenticates through the OIDC `id-token` it already
121
- requests — no `NPM_TOKEN` needed.
109
+ ### CI
122
110
 
123
- <!-- TODO: add an onboarding demo GIF (docs/onboarding.gif) recorded against a live Ollama + LM Studio. -->
111
+ CI (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
112
+ (Node 22 & 24).
124
113
 
125
114
  ## License
126
115
 
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@hypabolic/crossbar",
3
- "version": "0.1.1",
4
- "description": "The local/self-hosted inference connector Pi should have shipped with — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
3
+ "version": "0.2.0",
4
+ "description": "The local/self-hosted inference connector for Pi — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "author": "Hypabolic",
@@ -29,10 +29,12 @@
29
29
  "pi": {
30
30
  "extensions": [
31
31
  "./src/index.ts"
32
- ]
32
+ ],
33
+ "image": "https://raw.githubusercontent.com/Hypabolic/Crossbar/main/docs/onboarding.gif"
33
34
  },
34
35
  "files": [
35
36
  "src",
37
+ "docs/onboarding.gif",
36
38
  "RESEARCH.md",
37
39
  "CAPABILITY-MATRIX.md",
38
40
  "ARCHITECTURE.md",
@@ -42,7 +44,9 @@
42
44
  "scripts": {
43
45
  "check": "tsc --noEmit",
44
46
  "test": "vitest run",
45
- "test:watch": "vitest"
47
+ "test:watch": "vitest",
48
+ "demo:lmstudio": "node scripts/fake-lmstudio.mjs",
49
+ "demo:gif": "node scripts/gen-onboarding-gif.mjs"
46
50
  },
47
51
  "peerDependencies": {
48
52
  "@earendil-works/pi-coding-agent": "0.79.9",
@@ -140,9 +140,15 @@ class GenericAdapter implements BackendAdapter {
140
140
  name: model.name,
141
141
  reasoning: model.reasoning ?? false,
142
142
  input: model.input.length > 0 ? model.input : ["text"],
143
+ // Local inference is free → per-token costs are zero, but cache-hit token
144
+ // COUNTS still matter: Pi maps any `usage.prompt_tokens_details.cached_tokens` the
145
+ // backend reports to `Usage.cacheRead` and displays it regardless of cost. The
146
+ // flag only asks for usage in streaming (never fabricates), so it is safe even for
147
+ // unknown OpenAI-compatible servers that may not report cache hits.
143
148
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
144
149
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
145
150
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
151
+ compat: { supportsUsageInStreaming: true },
146
152
  };
147
153
  }
148
154
 
@@ -197,9 +197,14 @@ class LlamacppAdapter implements BackendAdapter {
197
197
  name: model.name,
198
198
  reasoning: model.reasoning ?? false,
199
199
  input: model.input.length > 0 ? model.input : ["text"],
200
+ // Local inference is free → per-token costs are zero, but cache-hit token
201
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
202
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
203
+ // streaming usage reporting on so those prompt-cache hits are recorded.
200
204
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
201
205
  contextWindow: model.contextWindow ?? 8192,
202
206
  maxTokens: model.maxTokens ?? 4096,
207
+ compat: { supportsUsageInStreaming: true },
203
208
  };
204
209
  }
205
210
 
@@ -256,9 +256,14 @@ class LlamaswapAdapter implements BackendAdapter {
256
256
  name: model.name,
257
257
  reasoning: model.reasoning ?? false,
258
258
  input: model.input.length > 0 ? model.input : ["text"],
259
+ // Local inference is free → per-token costs are zero, but cache-hit token
260
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
261
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
262
+ // streaming usage reporting on so those prompt-cache hits are recorded.
259
263
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
260
264
  contextWindow: model.contextWindow ?? 8192,
261
265
  maxTokens: model.maxTokens ?? 4096,
266
+ compat: { supportsUsageInStreaming: true },
262
267
  };
263
268
  }
264
269
 
@@ -112,6 +112,7 @@ function hasLmsDiscriminator(json: unknown): boolean {
112
112
  function toDescriptor(m: LmsModelEntry): ModelDescriptor {
113
113
  const isEmbeddings = m.type === "embeddings";
114
114
  const isVlm = m.type === "vlm";
115
+ const isLoaded = m.state === "loaded";
115
116
 
116
117
  const input: ("text" | "image")[] = ["text"];
117
118
  if (isVlm) input.push("image");
@@ -121,11 +122,24 @@ function toDescriptor(m: LmsModelEntry): ModelDescriptor {
121
122
  name: m.id,
122
123
  input,
123
124
  embeddings: isEmbeddings,
124
- loaded: m.state === "loaded",
125
+ loaded: isLoaded,
125
126
  raw: m,
126
127
  };
127
- if (m.max_context_length !== undefined) {
128
- desc.contextWindow = m.max_context_length;
128
+
129
+ // Context window: LM Studio reports both the model ceiling (`max_context_length`)
130
+ // and the window the model was actually loaded with (`loaded_context_length`),
131
+ // which is frequently configured well below the ceiling (e.g. a 128k model loaded
132
+ // at 4096). Register the OPERATIVE window so Pi budgets against what the server
133
+ // will really accept: prefer the loaded length when the model is resident (and
134
+ // non-zero), otherwise fall back to the model max. `loaded_context_length` is 0 or
135
+ // absent while the model is not loaded, so it never masks the ceiling in that case.
136
+ const loadedCtx =
137
+ isLoaded && typeof m.loaded_context_length === "number" && m.loaded_context_length > 0
138
+ ? m.loaded_context_length
139
+ : undefined;
140
+ const ctx = loadedCtx ?? m.max_context_length;
141
+ if (ctx !== undefined) {
142
+ desc.contextWindow = ctx;
129
143
  }
130
144
  return desc;
131
145
  }
@@ -307,9 +321,18 @@ class LmStudioAdapter implements BackendAdapter {
307
321
  name: model.name,
308
322
  reasoning: model.reasoning ?? false,
309
323
  input: model.input.length > 0 ? (model.input as ("text" | "image")[]) : ["text"],
324
+ // Local inference is free, so per-token COSTS are zero. The cache-hit token
325
+ // COUNTS still flow and are worth recording: LM Studio's OpenAI-compatible
326
+ // responses report `usage.prompt_tokens_details.cached_tokens`, which Pi maps to
327
+ // `Usage.cacheRead` and surfaces in the TUI regardless of cost. Keep usage
328
+ // reporting on during streaming so those automatic-prefix-cache hits are
329
+ // recorded. We intentionally do NOT set `cacheControlFormat`: LM Studio (llama.cpp
330
+ // engine) caches matching prefixes automatically, so injecting Anthropic-style
331
+ // `cache_control` markers would be wrong for this OpenAI-completions backend.
310
332
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
311
333
  contextWindow: model.contextWindow ?? 8192,
312
334
  maxTokens: model.maxTokens ?? 4096,
335
+ compat: { supportsUsageInStreaming: true },
313
336
  };
314
337
  }
315
338
 
@@ -320,9 +320,14 @@ class OllamaAdapter implements BackendAdapter {
320
320
  name: model.name,
321
321
  reasoning: model.reasoning ?? false,
322
322
  input: model.input.length > 0 ? model.input : ["text"],
323
+ // Local inference is free → per-token costs are zero, but cache-hit token
324
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
325
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
326
+ // streaming usage reporting on so those prompt-cache hits are recorded.
323
327
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
324
328
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
325
329
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
330
+ compat: { supportsUsageInStreaming: true },
326
331
  };
327
332
  }
328
333
 
@@ -177,9 +177,15 @@ class VllmAdapter implements BackendAdapter {
177
177
  name: model.name,
178
178
  reasoning: model.reasoning ?? false,
179
179
  input: model.input.length > 0 ? model.input : ["text"],
180
+ // Local inference is free → per-token costs are zero, but cache-hit token
181
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
182
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. vLLM
183
+ // reports cached tokens from its automatic prefix cache; keep streaming usage
184
+ // reporting on so those hits are recorded.
180
185
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
181
186
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
182
187
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
188
+ compat: { supportsUsageInStreaming: true },
183
189
  };
184
190
  }
185
191
 
@@ -24,45 +24,56 @@ import { Container, type SelectItem, SelectList, Text, matchesKey } from "@earen
24
24
 
25
25
  import type { BackendAdapter } from "../core/backend-adapter.ts";
26
26
  import { canIntrospect, canLoadUnload, canSwitch } from "../core/backend-adapter.ts";
27
- import type { DiscoveredServer, ModelDescriptor, ServerRecord } from "../core/types.ts";
27
+ import type { DiscoveredServer, LoadedState, ModelDescriptor, ServerRecord } from "../core/types.ts";
28
28
  import type { ServerRegistry } from "../registry/registry.ts";
29
29
  import { serverId } from "../registry/ids.ts";
30
30
  import { adapterFor } from "../adapters/index.ts";
31
+ import { unregisterServer } from "../shim/provider-shim.ts";
31
32
  import { createProbe } from "../discovery/probe.ts";
32
33
 
33
34
  // ─── Pure helpers ────────────────────────────────────────────────────────────
34
35
 
36
+ /** Extract a `host:port` string from a base URL for compact labels. */
37
+ function hostPortOf(baseUrl: string): string {
38
+ try {
39
+ const u = new URL(baseUrl);
40
+ return `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
41
+ } catch {
42
+ return baseUrl.replace(/^https?:\/\//, "");
43
+ }
44
+ }
45
+
46
+ /** Capitalise a backend kind for display, e.g. "lmstudio" → "Lmstudio". */
47
+ function kindLabelOf(kind: string): string {
48
+ return kind.charAt(0).toUpperCase() + kind.slice(1);
49
+ }
50
+
35
51
  /**
36
- * Build a `SelectItem[]` representing the discovered servers for the top-level
37
- * onboarding list. Already-registered servers are marked with a "(added)" suffix
38
- * so the user can see what is new vs. what Crossbar already knows about.
52
+ * Build a `SelectItem[]` representing the servers shown in the top-level onboarding
53
+ * list. Three kinds of entry can appear:
54
+ * - discovered servers (in discovery order) already-registered ones get an
55
+ * "(added)" suffix so the user can tell new from known;
56
+ * - registered servers that are NOT currently discovered (e.g. offline), so they
57
+ * can still be managed/removed;
58
+ * - a sentinel "Add manually" entry, always last.
39
59
  *
40
- * Items are ordered: discovered servers first (in discovery order), then a
41
- * sentinel "Add manually" entry at the end.
60
+ * Selecting any already-registered entry opens the manage overlay; selecting a new
61
+ * discovered entry or the sentinel runs the add flow.
42
62
  */
43
63
  export function buildDiscoveredItems(
44
64
  discovered: DiscoveredServer[],
45
65
  existing: ServerRecord[],
46
66
  ): SelectItem[] {
47
67
  const existingIds = new Set(existing.map((r) => r.id));
68
+ const discoveredUrls = new Set(discovered.map((s) => s.baseUrl));
48
69
 
49
70
  const items: SelectItem[] = discovered.map((server): SelectItem => {
50
71
  const id = serverId(server.kind, server.baseUrl);
51
72
  const isAdded = existingIds.has(id);
52
73
 
53
- // Extract host:port from baseUrl for the label suffix
54
- let hostPort: string;
55
- try {
56
- const u = new URL(server.baseUrl);
57
- hostPort = `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
58
- } catch {
59
- hostPort = server.baseUrl.replace(/^https?:\/\//, "");
60
- }
61
-
62
74
  // Compose a label: "[kind] host:port ✓ healthy" or "(added)"
63
- const kindLabel = server.kind.charAt(0).toUpperCase() + server.kind.slice(1);
64
75
  const healthMark = isAdded ? "(added)" : "✓ healthy";
65
- const label = `${kindLabel} (${hostPort})`;
76
+ const label = `${kindLabelOf(server.kind)} (${hostPortOf(server.baseUrl)})`;
66
77
 
67
78
  return {
68
79
  value: server.baseUrl,
@@ -73,6 +84,18 @@ export function buildDiscoveredItems(
73
84
  };
74
85
  });
75
86
 
87
+ // Append registered servers that weren't discovered this scan (offline / not
88
+ // reachable right now) so they remain manageable from the same list.
89
+ for (const record of existing) {
90
+ if (!record.enabled) continue;
91
+ if (discoveredUrls.has(record.baseUrl)) continue;
92
+ items.push({
93
+ value: record.baseUrl,
94
+ label: `${kindLabelOf(record.kind)} (${hostPortOf(record.baseUrl)}) (added)`,
95
+ description: "Registered · not currently discovered",
96
+ });
97
+ }
98
+
76
99
  // Always append the manual-add sentinel
77
100
  items.push({
78
101
  value: "__manual__",
@@ -151,6 +174,36 @@ export function capabilityActions(
151
174
  return actions;
152
175
  }
153
176
 
177
+ /** One-line hints shown under each manage action. */
178
+ const ACTION_DESCRIPTIONS: Record<string, string> = {
179
+ switch: "Make a model the active/served one",
180
+ load: "Load a model into memory",
181
+ unload: "Evict a loaded model from memory",
182
+ introspect: "Show which models are currently loaded",
183
+ remove: "Forget this server and delete its stored key",
184
+ };
185
+
186
+ /**
187
+ * Build the manage-overlay action list for an already-registered server: the
188
+ * adapter's capability-filtered actions (switch / load / unload / introspect) plus
189
+ * a "Remove server" action that is always available. Backends without any local
190
+ * capabilities (vLLM, OpenAI, Anthropic, generic) show only "Remove server".
191
+ */
192
+ export function buildManageItems(adapter: BackendAdapter): SelectItem[] {
193
+ const items: SelectItem[] = capabilityActions(adapter).map((a) => {
194
+ const item: SelectItem = { value: a.value, label: a.label };
195
+ const desc = ACTION_DESCRIPTIONS[a.value];
196
+ if (desc !== undefined) item.description = desc;
197
+ return item;
198
+ });
199
+ items.push({
200
+ value: "remove",
201
+ label: "Remove server",
202
+ description: ACTION_DESCRIPTIONS["remove"]!,
203
+ });
204
+ return items;
205
+ }
206
+
154
207
  /**
155
208
  * Coerce a user-supplied string (which may be bare "host:port", missing a scheme,
156
209
  * or already a valid URL) into a well-formed origin with no trailing slash.
@@ -177,6 +230,263 @@ export function normalizeManualUrl(input: string): string {
177
230
  return u.origin.replace(/\/+$/, "");
178
231
  }
179
232
 
233
+ // ─── Shared overlay + server-action helpers ─────────────────────────────────
234
+
235
+ /** Reconstruct a minimal DiscoveredServer from a persisted record for adapter calls. */
236
+ function serverFromRecord(record: ServerRecord): DiscoveredServer {
237
+ return {
238
+ kind: record.kind,
239
+ baseUrl: record.baseUrl,
240
+ auth: record.auth,
241
+ label: record.label,
242
+ confidence: 1,
243
+ };
244
+ }
245
+
246
+ /**
247
+ * Render a single-select overlay (titled SelectList in an accent border) and resolve
248
+ * to the chosen item value, or `null` on Esc/cancel. Shared by the model picker and
249
+ * the manage menus so they stay visually consistent.
250
+ */
251
+ function selectOverlay(
252
+ ctx: ExtensionCommandContext,
253
+ title: string,
254
+ items: SelectItem[],
255
+ hint: string,
256
+ ): Promise<string | null> {
257
+ return ctx.ui.custom<string | null>(
258
+ (_tui, theme, _kb, done) => {
259
+ const container = new Container();
260
+ container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
261
+ container.addChild(new Text(theme.fg("accent", theme.bold(title))));
262
+
263
+ const list = new SelectList(items, Math.min(items.length, 12), getSelectListTheme());
264
+ list.onSelect = (item) => done(item.value);
265
+ list.onCancel = () => done(null);
266
+
267
+ container.addChild(list);
268
+ container.addChild(new Text(theme.fg("dim", hint)));
269
+ container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
270
+
271
+ return {
272
+ render: (width: number) => container.render(width),
273
+ invalidate: () => container.invalidate(),
274
+ handleInput: (data: string) => {
275
+ if (matchesKey(data, "escape")) {
276
+ done(null);
277
+ return;
278
+ }
279
+ list.handleInput(data);
280
+ _tui.requestRender();
281
+ },
282
+ };
283
+ },
284
+ { overlay: true, overlayOptions: { width: "60%" } },
285
+ );
286
+ }
287
+
288
+ const errMsg = (err: unknown): string => (err instanceof Error ? err.message : String(err));
289
+
290
+ /** Fetch a server's models (live, falling back to last-known on failure). */
291
+ async function fetchModels(
292
+ ctx: ExtensionCommandContext,
293
+ registry: ServerRegistry,
294
+ record: ServerRecord,
295
+ ): Promise<ModelDescriptor[] | null> {
296
+ const adapter = adapterFor(record.kind);
297
+ const cred = await registry.resolveCredential(record);
298
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
299
+ try {
300
+ return await adapter.listModels(serverFromRecord(record), cred, probe);
301
+ } catch (err) {
302
+ if (record.lastKnownModels && record.lastKnownModels.length > 0) {
303
+ return record.lastKnownModels;
304
+ }
305
+ ctx.ui.notify(`Crossbar: could not list models — ${errMsg(err)}`, "error");
306
+ return null;
307
+ }
308
+ }
309
+
310
+ /** Switch the active model or load a model: pick from the list, then call the adapter. */
311
+ async function performModelAction(
312
+ ctx: ExtensionCommandContext,
313
+ registry: ServerRegistry,
314
+ record: ServerRecord,
315
+ action: "switch" | "load",
316
+ ): Promise<void> {
317
+ const adapter = adapterFor(record.kind);
318
+ const models = await fetchModels(ctx, registry, record);
319
+ if (!models) return;
320
+ if (models.length === 0) {
321
+ ctx.ui.notify("Crossbar: server returned no models.", "warning");
322
+ return;
323
+ }
324
+
325
+ const title = action === "switch"
326
+ ? `Switch model — ${record.label}`
327
+ : `Load model — ${record.label}`;
328
+ const modelId = await selectOverlay(
329
+ ctx,
330
+ title,
331
+ buildModelItems(models.filter((m) => !m.embeddings)),
332
+ "↑↓ navigate · Enter select · Esc cancel",
333
+ );
334
+ if (!modelId) return;
335
+
336
+ const cred = await registry.resolveCredential(record);
337
+ // Loads can be slow (cold model into VRAM) — give them a generous budget.
338
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 60_000 });
339
+
340
+ ctx.ui.notify(
341
+ `Crossbar: ${action === "switch" ? "switching to" : "loading"} ${modelId}…`,
342
+ "info",
343
+ );
344
+ try {
345
+ if (action === "switch") {
346
+ if (!canSwitch(adapter)) return;
347
+ await adapter.switchModel(serverFromRecord(record), cred, modelId, probe);
348
+ } else {
349
+ if (!canLoadUnload(adapter)) return;
350
+ await adapter.loadUnload(serverFromRecord(record), cred, modelId, "load", probe);
351
+ }
352
+ ctx.ui.notify(
353
+ `Crossbar: ${modelId} ${action === "switch" ? "is now active" : "loaded"}.`,
354
+ "info",
355
+ );
356
+ } catch (err) {
357
+ ctx.ui.notify(`Crossbar: ${action} failed — ${errMsg(err)}`, "error");
358
+ }
359
+ }
360
+
361
+ /** Unload a currently-loaded model: resolve the loaded set, pick one, evict it. */
362
+ async function performUnload(
363
+ ctx: ExtensionCommandContext,
364
+ registry: ServerRegistry,
365
+ record: ServerRecord,
366
+ ): Promise<void> {
367
+ const adapter = adapterFor(record.kind);
368
+ if (!canLoadUnload(adapter)) return;
369
+ const cred = await registry.resolveCredential(record);
370
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
371
+
372
+ let loadedIds: string[] = record.lastKnownLoaded ?? [];
373
+ if (canIntrospect(adapter)) {
374
+ try {
375
+ const state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
376
+ loadedIds = state.loadedModelIds;
377
+ } catch {
378
+ // Fall back to last-known on a failed introspection.
379
+ }
380
+ }
381
+ if (loadedIds.length === 0) {
382
+ ctx.ui.notify("Crossbar: no models are currently loaded.", "info");
383
+ return;
384
+ }
385
+
386
+ const modelId = await selectOverlay(
387
+ ctx,
388
+ `Unload model — ${record.label}`,
389
+ loadedIds.map((id) => ({ value: id, label: id })),
390
+ "↑↓ navigate · Enter select · Esc cancel",
391
+ );
392
+ if (!modelId) return;
393
+
394
+ ctx.ui.notify(`Crossbar: unloading ${modelId}…`, "info");
395
+ try {
396
+ await adapter.loadUnload(serverFromRecord(record), cred, modelId, "unload", probe);
397
+ ctx.ui.notify(`Crossbar: ${modelId} unloaded.`, "info");
398
+ } catch (err) {
399
+ ctx.ui.notify(`Crossbar: unload failed — ${errMsg(err)}`, "error");
400
+ }
401
+ }
402
+
403
+ /** Read and report the currently-loaded models for a server. */
404
+ async function performIntrospect(
405
+ ctx: ExtensionCommandContext,
406
+ registry: ServerRegistry,
407
+ record: ServerRecord,
408
+ ): Promise<void> {
409
+ const adapter = adapterFor(record.kind);
410
+ if (!canIntrospect(adapter)) return;
411
+ const cred = await registry.resolveCredential(record);
412
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
413
+
414
+ let state: LoadedState;
415
+ try {
416
+ state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
417
+ } catch (err) {
418
+ ctx.ui.notify(`Crossbar: could not read loaded models — ${errMsg(err)}`, "error");
419
+ return;
420
+ }
421
+ if (state.loadedModelIds.length === 0) {
422
+ ctx.ui.notify(`Crossbar: ${record.label} has no models loaded.`, "info");
423
+ return;
424
+ }
425
+ const summary = state.loadedModelIds
426
+ .map((id) => {
427
+ const ctxLen = state.perModel?.[id]?.contextLength;
428
+ if (ctxLen === undefined) return id;
429
+ const ctxStr = ctxLen >= 1000 ? `${Math.round(ctxLen / 1000)}k` : `${ctxLen}`;
430
+ return `${id} (${ctxStr} ctx)`;
431
+ })
432
+ .join(", ");
433
+ ctx.ui.notify(`Crossbar: ${record.label} loaded — ${summary}`, "info");
434
+ }
435
+
436
+ /** Confirm and remove a server from the registry, auth.json, and Pi. */
437
+ async function performRemove(
438
+ pi: ExtensionAPI,
439
+ ctx: ExtensionCommandContext,
440
+ registry: ServerRegistry,
441
+ record: ServerRecord,
442
+ ): Promise<void> {
443
+ const confirm = await ctx.ui.select(`Remove ${record.label}?`, ["Cancel", "Remove server"]);
444
+ if (confirm !== "Remove server") return;
445
+ unregisterServer(pi, record);
446
+ await registry.remove(record.id);
447
+ ctx.ui.notify(`Crossbar: removed ${record.label}.`, "info");
448
+ }
449
+
450
+ /**
451
+ * Open the manage overlay for an already-registered server: show the
452
+ * capability-filtered action menu and dispatch the chosen action.
453
+ */
454
+ export async function openServerActions(
455
+ pi: ExtensionAPI,
456
+ ctx: ExtensionCommandContext,
457
+ deps: OnboardingDeps,
458
+ record: ServerRecord,
459
+ ): Promise<void> {
460
+ const { registry } = deps;
461
+ const adapter = adapterFor(record.kind);
462
+
463
+ const choice = await selectOverlay(
464
+ ctx,
465
+ `Manage — ${record.label}`,
466
+ buildManageItems(adapter),
467
+ "↑↓ navigate · Enter select · Esc close",
468
+ );
469
+ if (!choice) return;
470
+
471
+ switch (choice) {
472
+ case "switch":
473
+ await performModelAction(ctx, registry, record, "switch");
474
+ break;
475
+ case "load":
476
+ await performModelAction(ctx, registry, record, "load");
477
+ break;
478
+ case "unload":
479
+ await performUnload(ctx, registry, record);
480
+ break;
481
+ case "introspect":
482
+ await performIntrospect(ctx, registry, record);
483
+ break;
484
+ case "remove":
485
+ await performRemove(pi, ctx, registry, record);
486
+ break;
487
+ }
488
+ }
489
+
180
490
  // ─── Overlay flow driver ────────────────────────────────────────────────────
181
491
 
182
492
  export interface OnboardingDeps {
@@ -203,7 +513,7 @@ export interface OnboardingDeps {
203
513
  * @param deps - injected registry + discover function (for testability)
204
514
  */
205
515
  export async function openOnboarding(
206
- _pi: ExtensionAPI,
516
+ pi: ExtensionAPI,
207
517
  ctx: ExtensionCommandContext,
208
518
  deps: OnboardingDeps,
209
519
  ): Promise<void> {
@@ -299,7 +609,15 @@ export async function openOnboarding(
299
609
 
300
610
  targetBaseUrl = normalizedUrl;
301
611
  } else {
302
- // Discovered server path
612
+ // Already-registered server (discovered or offline) → open the manage overlay
613
+ // instead of re-running the add flow.
614
+ const existingRecord = registry.list().find((r) => r.baseUrl === chosenBaseUrl);
615
+ if (existingRecord) {
616
+ await openServerActions(pi, ctx, deps, existingRecord);
617
+ return;
618
+ }
619
+
620
+ // New discovered server path
303
621
  discoveredServer = discovered.find((s) => s.baseUrl === chosenBaseUrl);
304
622
  targetBaseUrl = chosenBaseUrl;
305
623
  }
@@ -368,47 +686,11 @@ export async function openOnboarding(
368
686
  }
369
687
 
370
688
  // ── Step 5: pick default model ─────────────────────────────────────────────
371
- const modelItems = buildModelItems(models);
372
-
373
- const chosenModelId = await ctx.ui.custom<string | null>(
374
- (_tui, theme, _kb, done) => {
375
- const container = new Container();
376
-
377
- container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
378
- container.addChild(
379
- new Text(theme.fg("accent", theme.bold(`Pick default model — ${discoveredServer!.label}`))),
380
- );
381
-
382
- const list = new SelectList(
383
- modelItems,
384
- Math.min(modelItems.length, 12),
385
- getSelectListTheme(),
386
- );
387
-
388
- list.onSelect = (item) => done(item.value);
389
- list.onCancel = () => done(null);
390
-
391
- container.addChild(list);
392
- container.addChild(
393
- new Text(theme.fg("dim", "↑↓ navigate · Enter select · Esc skip")),
394
- );
395
- container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
396
-
397
- return {
398
- render: (width: number) => container.render(width),
399
- invalidate: () => container.invalidate(),
400
- handleInput: (data: string) => {
401
- // Allow Esc to skip model selection
402
- if (matchesKey(data, "escape")) {
403
- done(null);
404
- return;
405
- }
406
- list.handleInput(data);
407
- _tui.requestRender();
408
- },
409
- };
410
- },
411
- { overlay: true, overlayOptions: { width: "60%" } },
689
+ const chosenModelId = await selectOverlay(
690
+ ctx,
691
+ `Pick default model ${discoveredServer.label}`,
692
+ buildModelItems(models),
693
+ "↑↓ navigate · Enter select · Esc skip",
412
694
  );
413
695
 
414
696
  // chosenModelId === null means the user skipped — still register the server