@hypabolic/crossbar 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ adapter registers under (`oai` = `openai-completions`, `ant` = `anthropic-messag
7
7
  | Backend | port | pi api | listModels | introspectLoaded | switchModel | loadUnload | auth | health | perModelCaps | streaming | discovery fingerprint |
8
8
  |---|---|---|---|---|---|---|---|---|---|---|---|
9
9
  | **Ollama** | 11434 | oai | ✅ `/api/tags`,`/v1/models` | ✅ `/api/ps` | ✅ implicit (request id) | ✅ `keep_alive:0` | ◐ none local | ✅ `GET /` text | ✅ `/api/show` caps + ctx | ✅ | `GET /` → `Ollama is running` |
10
- | **LM Studio** | 1234 | oai | ✅ `/api/v0/models`,`/v1/models` | ✅ `state` field | ✅ JIT + `/api/v1/models/load` | ✅ load/unload + `lms` | ◐ Bearer, none default | ◐ infer 200 | ✅ type+`max_context_length` | ✅ | `/api/v0/models` w/ `state`,`compatibility_type` |
10
+ | **LM Studio** | 1234 | oai | ✅ `/api/v1/models` (v0 fallback) | ✅ `state` field | ✅ JIT + `/api/v1/models/load` | ✅ load/unload + `lms` | ◐ Bearer, none default | ◐ infer 200 | ✅ type+`max_context_length` | ✅ | `/api/v1/models` (v0 fallback) w/ `state`,`compatibility_type` |
11
11
  | **llama-server** | 8080 | oai | ✅ `/v1/models` | ◐ `/props`,`/slots` (single) | ❌ (1/instance) | ❌ classic | ◐ none / `--api-key` | ✅ `/health` | ◐ ctx via `/props`,`meta` | ✅ | `/props` w/ `default_generation_settings`+`build_info` |
12
12
  | **llama-swap** | 8080 | oai/ant | ✅ `/v1/models` (all config) | ✅ `/running` | ✅ via `model` → restart upstream | ✅ `/api/models/unload`, ttl | ◐ optional multi-scheme | ✅ `/health`→OK | ◐ via upstream | ✅ | `/` → `/ui/`; `/running`,`/upstream/{model}` |
13
13
  | **vLLM** | 8000 | oai | ✅ `/v1/models` | ◐ `/is_sleeping` (dev) | ❌ base · ◐ LoRA | ◐ sleep/wake + LoRA | ◐ none / `--api-key` | ✅ `/health` | ◐ `max_model_len` only | ✅ | `/version` + `/metrics` `vllm:` + `owned_by:"vllm"` |
@@ -38,7 +38,7 @@ adapter registers under (`oai` = `openai-completions`, `ant` = `anthropic-messag
38
38
 
39
39
  1. `GET /` → `Ollama is running` ⇒ Ollama · redirect `/ui/` ⇒ llama-swap
40
40
  2. `GET /api/extra/version` → `{"result":"KoboldCpp"}` ⇒ KoboldCpp
41
- 3. `GET /api/v0/models` 200 w/ `state`/`compatibility_type` ⇒ LM Studio
41
+ 3. `GET /api/v1/models` (v0 fallback) 200 w/ `state`/`compatibility_type` ⇒ LM Studio
42
42
  4. `GET /props` w/ `default_generation_settings`+`build_info` ⇒ llama-server / llamafile
43
43
  5. `GET /version` + `/metrics` `vllm:` ⇒ vLLM
44
44
  6. `GET /v1/models` shape: `owned_by:"vllm"`⇒vLLM · `meta.n_ctx_train`⇒llama.cpp ·
package/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![CI](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml/badge.svg)](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml)
4
4
  [![npm](https://img.shields.io/npm/v/@hypabolic/crossbar)](https://www.npmjs.com/package/@hypabolic/crossbar)
5
5
 
6
- **The local/self-hosted inference connector Pi should have shipped with.**
6
+ **Effortless local & self-hosted model backends for the Pi coding agent.**
7
7
 
8
8
  Crossbar is an extension for the [Pi coding agent](https://github.com/earendil-works/pi) that makes
9
9
  wiring Pi to *any* local or self-hosted model backend effortless — zero hand-edited JSON, all setup
@@ -12,6 +12,8 @@ in-place model switching.
12
12
 
13
13
  > Built by [Hypabolic](https://github.com/hypabolic).
14
14
 
15
+ ![Crossbar onboarding: discover a server, open the manage menu, switch the active model](docs/onboarding.gif)
16
+
15
17
  ---
16
18
 
17
19
  ## Why Crossbar
@@ -104,23 +106,10 @@ The `BackendAdapter` contract (`src/core/`) is the frozen boundary every adapter
104
106
  conformance suite (`tests/conformance/`) validates every adapter against it, and
105
107
  `tests/integration/` exercises the real discovery path over live sockets.
106
108
 
107
- ### CI / releasing
108
-
109
- - **CI** (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
110
- (Node 22 & 24).
111
- - **Releases** (`.github/workflows/release.yml`) publish to npm via **GitHub→npm OIDC trusted
112
- publishing** — no tokens or secrets. [Provenance](https://docs.npmjs.com/generating-provenance-statements)
113
- is attached automatically. Two ways:
114
- 1. **Manual** — GitHub → *Actions → Release → Run workflow* → choose `patch` / `minor` / `major`.
115
- It bumps `package.json`, commits, tags `vX.Y.Z`, and publishes.
116
- 2. **Tag push** — `npm version patch && git push --follow-tags` locally.
117
-
118
- **One-time setup:** on npmjs.com, add a **Trusted Publisher** for `@hypabolic/crossbar`
119
- (*Package settings → Trusted Publisher → GitHub Actions*) pointing at repo **`Hypabolic/Crossbar`**
120
- and workflow **`release.yml`**. The workflow authenticates through the OIDC `id-token` it already
121
- requests — no `NPM_TOKEN` needed.
109
+ ### CI
122
110
 
123
- <!-- TODO: add an onboarding demo GIF (docs/onboarding.gif) recorded against a live Ollama + LM Studio. -->
111
+ CI (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
112
+ (Node 22 & 24).
124
113
 
125
114
  ## License
126
115
 
Binary file
package/package.json CHANGED
@@ -1,11 +1,18 @@
1
1
  {
2
2
  "name": "@hypabolic/crossbar",
3
- "version": "0.1.0",
4
- "description": "The local/self-hosted inference connector Pi should have shipped with — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
3
+ "version": "0.2.0",
4
+ "description": "The local/self-hosted inference connector for Pi — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
7
  "author": "Hypabolic",
8
- "homepage": "https://github.com/hypabolic/crossbar",
8
+ "homepage": "https://github.com/Hypabolic/Crossbar#readme",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/Hypabolic/Crossbar.git"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/Hypabolic/Crossbar/issues"
15
+ },
9
16
  "keywords": [
10
17
  "pi-package",
11
18
  "pi-extension",
@@ -22,10 +29,12 @@
22
29
  "pi": {
23
30
  "extensions": [
24
31
  "./src/index.ts"
25
- ]
32
+ ],
33
+ "image": "https://raw.githubusercontent.com/Hypabolic/Crossbar/main/docs/onboarding.gif"
26
34
  },
27
35
  "files": [
28
36
  "src",
37
+ "docs/onboarding.gif",
29
38
  "RESEARCH.md",
30
39
  "CAPABILITY-MATRIX.md",
31
40
  "ARCHITECTURE.md",
@@ -35,7 +44,9 @@
35
44
  "scripts": {
36
45
  "check": "tsc --noEmit",
37
46
  "test": "vitest run",
38
- "test:watch": "vitest"
47
+ "test:watch": "vitest",
48
+ "demo:lmstudio": "node scripts/fake-lmstudio.mjs",
49
+ "demo:gif": "node scripts/gen-onboarding-gif.mjs"
39
50
  },
40
51
  "peerDependencies": {
41
52
  "@earendil-works/pi-coding-agent": "0.79.9",
@@ -140,9 +140,15 @@ class GenericAdapter implements BackendAdapter {
140
140
  name: model.name,
141
141
  reasoning: model.reasoning ?? false,
142
142
  input: model.input.length > 0 ? model.input : ["text"],
143
+ // Local inference is free → per-token costs are zero, but cache-hit token
144
+ // COUNTS still matter: Pi maps any `usage.prompt_tokens_details.cached_tokens` the
145
+ // backend reports to `Usage.cacheRead` and displays it regardless of cost. The
146
+ // flag only asks for usage in streaming (never fabricates), so it is safe even for
147
+ // unknown OpenAI-compatible servers that may not report cache hits.
143
148
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
144
149
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
145
150
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
151
+ compat: { supportsUsageInStreaming: true },
146
152
  };
147
153
  }
148
154
 
@@ -197,9 +197,14 @@ class LlamacppAdapter implements BackendAdapter {
197
197
  name: model.name,
198
198
  reasoning: model.reasoning ?? false,
199
199
  input: model.input.length > 0 ? model.input : ["text"],
200
+ // Local inference is free → per-token costs are zero, but cache-hit token
201
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
202
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
203
+ // streaming usage reporting on so those prompt-cache hits are recorded.
200
204
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
201
205
  contextWindow: model.contextWindow ?? 8192,
202
206
  maxTokens: model.maxTokens ?? 4096,
207
+ compat: { supportsUsageInStreaming: true },
203
208
  };
204
209
  }
205
210
 
@@ -256,9 +256,14 @@ class LlamaswapAdapter implements BackendAdapter {
256
256
  name: model.name,
257
257
  reasoning: model.reasoning ?? false,
258
258
  input: model.input.length > 0 ? model.input : ["text"],
259
+ // Local inference is free → per-token costs are zero, but cache-hit token
260
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
261
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
262
+ // streaming usage reporting on so those prompt-cache hits are recorded.
259
263
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
260
264
  contextWindow: model.contextWindow ?? 8192,
261
265
  maxTokens: model.maxTokens ?? 4096,
266
+ compat: { supportsUsageInStreaming: true },
262
267
  };
263
268
  }
264
269
 
@@ -2,16 +2,20 @@
2
2
  * LM Studio backend adapter.
3
3
  *
4
4
  * Implements the BackendAdapter contract for LM Studio's local server.
5
- * Uses the LM Studio-native /api/v0/* endpoints for discovery and management,
6
- * and delegates inference to the OpenAI-compatible /v1/* layer.
5
+ * Uses the LM Studio-native REST API for discovery and management, and delegates
6
+ * inference to the OpenAI-compatible /v1/* layer.
7
+ *
8
+ * LM Studio 0.4.0+ ships a native `/api/v1/*` REST API (recommended); the older
9
+ * `/api/v0/*` API carries the same rich model fields and is kept as a fallback for
10
+ * pre-0.4.0 servers. We prefer v1 and fall back to v0 only on a 404.
7
11
  *
8
12
  * Key API endpoints:
9
- * GET /api/v0/models — model list with state, type, context lengths
10
- * POST /api/v1/models/load — load a model by id
11
- * POST /api/v1/models/unload — unload a model by id
13
+ * GET /api/v1/models (→ /api/v0/models fallback) — model list with state, type, context length
14
+ * POST /api/v1/models/load — load a model by id
15
+ * POST /api/v1/models/unload — unload a model by id
12
16
  *
13
17
  * Fingerprint discriminator: data[] entries have both `state` and
14
- * `compatibility_type` fields (unique to LM Studio's v0 API).
18
+ * `compatibility_type` fields (unique to LM Studio's native API).
15
19
  */
16
20
 
17
21
  import { Capability } from "../core/capability.ts";
@@ -24,9 +28,14 @@ import type {
24
28
  ModelDescriptor,
25
29
  PiModelEntry,
26
30
  Probe,
31
+ ProbeResult,
27
32
  ServerCredential,
28
33
  } from "../core/types.ts";
29
34
 
35
+ /** Native model-list endpoints, in preference order (v1 first, v0 fallback for <0.4.0). */
36
+ const MODELS_V1 = "/api/v1/models";
37
+ const MODELS_V0 = "/api/v0/models";
38
+
30
39
  // ---------------------------------------------------------------------------
31
40
  // LM Studio API shapes (narrowed from unknown JSON)
32
41
  // ---------------------------------------------------------------------------
@@ -103,6 +112,7 @@ function hasLmsDiscriminator(json: unknown): boolean {
103
112
  function toDescriptor(m: LmsModelEntry): ModelDescriptor {
104
113
  const isEmbeddings = m.type === "embeddings";
105
114
  const isVlm = m.type === "vlm";
115
+ const isLoaded = m.state === "loaded";
106
116
 
107
117
  const input: ("text" | "image")[] = ["text"];
108
118
  if (isVlm) input.push("image");
@@ -112,11 +122,24 @@ function toDescriptor(m: LmsModelEntry): ModelDescriptor {
112
122
  name: m.id,
113
123
  input,
114
124
  embeddings: isEmbeddings,
115
- loaded: m.state === "loaded",
125
+ loaded: isLoaded,
116
126
  raw: m,
117
127
  };
118
- if (m.max_context_length !== undefined) {
119
- desc.contextWindow = m.max_context_length;
128
+
129
+ // Context window: LM Studio reports both the model ceiling (`max_context_length`)
130
+ // and the window the model was actually loaded with (`loaded_context_length`),
131
+ // which is frequently configured well below the ceiling (e.g. a 128k model loaded
132
+ // at 4096). Register the OPERATIVE window so Pi budgets against what the server
133
+ // will really accept: prefer the loaded length when the model is resident (and
134
+ // non-zero), otherwise fall back to the model max. `loaded_context_length` is 0 or
135
+ // absent while the model is not loaded, so it never masks the ceiling in that case.
136
+ const loadedCtx =
137
+ isLoaded && typeof m.loaded_context_length === "number" && m.loaded_context_length > 0
138
+ ? m.loaded_context_length
139
+ : undefined;
140
+ const ctx = loadedCtx ?? m.max_context_length;
141
+ if (ctx !== undefined) {
142
+ desc.contextWindow = ctx;
120
143
  }
121
144
  return desc;
122
145
  }
@@ -140,10 +163,21 @@ class LmStudioAdapter implements BackendAdapter {
140
163
  Capability.Streaming,
141
164
  ]);
142
165
 
166
+ /**
167
+ * Fetch the native model list, preferring /api/v1/models and falling back to
168
+ * /api/v0/models for LM Studio < 0.4.0 (which only exposes the v0 REST API).
169
+ * Falls back ONLY on a 404 so auth (401) and unreachable (0) errors propagate.
170
+ */
171
+ private async modelsResponse(probe: Probe): Promise<ProbeResult> {
172
+ const v1 = await probe(MODELS_V1);
173
+ if (v1.status === 404) return probe(MODELS_V0);
174
+ return v1;
175
+ }
176
+
143
177
  // --- fingerprint ----------------------------------------------------------
144
178
 
145
179
  async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
146
- const r = await probe("/api/v0/models");
180
+ const r = await this.modelsResponse(probe);
147
181
  if (!r.ok || r.status === 0) return null;
148
182
  if (!hasLmsDiscriminator(r.json)) return null;
149
183
 
@@ -163,7 +197,7 @@ class LmStudioAdapter implements BackendAdapter {
163
197
  _cred: ServerCredential,
164
198
  probe: Probe,
165
199
  ): Promise<HealthStatus> {
166
- const r = await probe("/api/v0/models");
200
+ const r = await this.modelsResponse(probe);
167
201
  if (r.status === 0) return { state: "unreachable" };
168
202
  if (r.status === 401) return { state: "unauthorized" };
169
203
  if (!r.ok) return { state: "degraded" };
@@ -179,7 +213,7 @@ class LmStudioAdapter implements BackendAdapter {
179
213
  _cred: ServerCredential,
180
214
  probe: Probe,
181
215
  ): Promise<ModelDescriptor[]> {
182
- const r = await probe("/api/v0/models");
216
+ const r = await this.modelsResponse(probe);
183
217
  if (!r.ok) {
184
218
  if (r.status === 401) throw new Error("401 Unauthorized");
185
219
  if (r.status === 0) throw new Error("listModels failed: server unreachable");
@@ -197,7 +231,7 @@ class LmStudioAdapter implements BackendAdapter {
197
231
  _cred: ServerCredential,
198
232
  probe: Probe,
199
233
  ): Promise<LoadedState> {
200
- const r = await probe("/api/v0/models");
234
+ const r = await this.modelsResponse(probe);
201
235
  if (!r.ok) {
202
236
  if (r.status === 401) throw new Error("401 Unauthorized");
203
237
  if (r.status === 0) throw new Error("introspectLoaded failed: server unreachable");
@@ -242,7 +276,7 @@ class LmStudioAdapter implements BackendAdapter {
242
276
  }
243
277
 
244
278
  // Step 2: Confirm via model list that the target is now loaded
245
- const r2 = await probe("/api/v0/models");
279
+ const r2 = await this.modelsResponse(probe);
246
280
  if (!r2.ok) {
247
281
  if (r2.status === 0) throw new Error("switchModel confirmation failed: server went down");
248
282
  if (r2.status === 401) throw new Error("401 Unauthorized");
@@ -287,9 +321,18 @@ class LmStudioAdapter implements BackendAdapter {
287
321
  name: model.name,
288
322
  reasoning: model.reasoning ?? false,
289
323
  input: model.input.length > 0 ? (model.input as ("text" | "image")[]) : ["text"],
324
+ // Local inference is free, so per-token COSTS are zero. The cache-hit token
325
+ // COUNTS still flow and are worth recording: LM Studio's OpenAI-compatible
326
+ // responses report `usage.prompt_tokens_details.cached_tokens`, which Pi maps to
327
+ // `Usage.cacheRead` and surfaces in the TUI regardless of cost. Keep usage
328
+ // reporting on during streaming so those automatic-prefix-cache hits are
329
+ // recorded. We intentionally do NOT set `cacheControlFormat`: LM Studio (llama.cpp
330
+ // engine) caches matching prefixes automatically, so injecting Anthropic-style
331
+ // `cache_control` markers would be wrong for this OpenAI-completions backend.
290
332
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
291
333
  contextWindow: model.contextWindow ?? 8192,
292
334
  maxTokens: model.maxTokens ?? 4096,
335
+ compat: { supportsUsageInStreaming: true },
293
336
  };
294
337
  }
295
338
 
@@ -320,9 +320,14 @@ class OllamaAdapter implements BackendAdapter {
320
320
  name: model.name,
321
321
  reasoning: model.reasoning ?? false,
322
322
  input: model.input.length > 0 ? model.input : ["text"],
323
+ // Local inference is free → per-token costs are zero, but cache-hit token
324
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
325
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
326
+ // streaming usage reporting on so those prompt-cache hits are recorded.
323
327
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
324
328
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
325
329
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
330
+ compat: { supportsUsageInStreaming: true },
326
331
  };
327
332
  }
328
333
 
@@ -177,9 +177,15 @@ class VllmAdapter implements BackendAdapter {
177
177
  name: model.name,
178
178
  reasoning: model.reasoning ?? false,
179
179
  input: model.input.length > 0 ? model.input : ["text"],
180
+ // Local inference is free → per-token costs are zero, but cache-hit token
181
+ // COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
182
+ // .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. vLLM
183
+ // reports cached tokens from its automatic prefix cache; keep streaming usage
184
+ // reporting on so those hits are recorded.
180
185
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
181
186
  contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
182
187
  maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
188
+ compat: { supportsUsageInStreaming: true },
183
189
  };
184
190
  }
185
191
 
@@ -24,45 +24,56 @@ import { Container, type SelectItem, SelectList, Text, matchesKey } from "@earen
24
24
 
25
25
  import type { BackendAdapter } from "../core/backend-adapter.ts";
26
26
  import { canIntrospect, canLoadUnload, canSwitch } from "../core/backend-adapter.ts";
27
- import type { DiscoveredServer, ModelDescriptor, ServerRecord } from "../core/types.ts";
27
+ import type { DiscoveredServer, LoadedState, ModelDescriptor, ServerRecord } from "../core/types.ts";
28
28
  import type { ServerRegistry } from "../registry/registry.ts";
29
29
  import { serverId } from "../registry/ids.ts";
30
30
  import { adapterFor } from "../adapters/index.ts";
31
+ import { unregisterServer } from "../shim/provider-shim.ts";
31
32
  import { createProbe } from "../discovery/probe.ts";
32
33
 
33
34
  // ─── Pure helpers ────────────────────────────────────────────────────────────
34
35
 
36
+ /** Extract a `host:port` string from a base URL for compact labels. */
37
+ function hostPortOf(baseUrl: string): string {
38
+ try {
39
+ const u = new URL(baseUrl);
40
+ return `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
41
+ } catch {
42
+ return baseUrl.replace(/^https?:\/\//, "");
43
+ }
44
+ }
45
+
46
+ /** Capitalise a backend kind for display, e.g. "lmstudio" → "Lmstudio". */
47
+ function kindLabelOf(kind: string): string {
48
+ return kind.charAt(0).toUpperCase() + kind.slice(1);
49
+ }
50
+
35
51
  /**
36
- * Build a `SelectItem[]` representing the discovered servers for the top-level
37
- * onboarding list. Already-registered servers are marked with a "(added)" suffix
38
- * so the user can see what is new vs. what Crossbar already knows about.
52
+ * Build a `SelectItem[]` representing the servers shown in the top-level onboarding
53
+ * list. Three kinds of entry can appear:
54
+ * - discovered servers (in discovery order) already-registered ones get an
55
+ * "(added)" suffix so the user can tell new from known;
56
+ * - registered servers that are NOT currently discovered (e.g. offline), so they
57
+ * can still be managed/removed;
58
+ * - a sentinel "Add manually" entry, always last.
39
59
  *
40
- * Items are ordered: discovered servers first (in discovery order), then a
41
- * sentinel "Add manually" entry at the end.
60
+ * Selecting any already-registered entry opens the manage overlay; selecting a new
61
+ * discovered entry or the sentinel runs the add flow.
42
62
  */
43
63
  export function buildDiscoveredItems(
44
64
  discovered: DiscoveredServer[],
45
65
  existing: ServerRecord[],
46
66
  ): SelectItem[] {
47
67
  const existingIds = new Set(existing.map((r) => r.id));
68
+ const discoveredUrls = new Set(discovered.map((s) => s.baseUrl));
48
69
 
49
70
  const items: SelectItem[] = discovered.map((server): SelectItem => {
50
71
  const id = serverId(server.kind, server.baseUrl);
51
72
  const isAdded = existingIds.has(id);
52
73
 
53
- // Extract host:port from baseUrl for the label suffix
54
- let hostPort: string;
55
- try {
56
- const u = new URL(server.baseUrl);
57
- hostPort = `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
58
- } catch {
59
- hostPort = server.baseUrl.replace(/^https?:\/\//, "");
60
- }
61
-
62
74
  // Compose a label: "[kind] host:port ✓ healthy" or "(added)"
63
- const kindLabel = server.kind.charAt(0).toUpperCase() + server.kind.slice(1);
64
75
  const healthMark = isAdded ? "(added)" : "✓ healthy";
65
- const label = `${kindLabel} (${hostPort})`;
76
+ const label = `${kindLabelOf(server.kind)} (${hostPortOf(server.baseUrl)})`;
66
77
 
67
78
  return {
68
79
  value: server.baseUrl,
@@ -73,6 +84,18 @@ export function buildDiscoveredItems(
73
84
  };
74
85
  });
75
86
 
87
+ // Append registered servers that weren't discovered this scan (offline / not
88
+ // reachable right now) so they remain manageable from the same list.
89
+ for (const record of existing) {
90
+ if (!record.enabled) continue;
91
+ if (discoveredUrls.has(record.baseUrl)) continue;
92
+ items.push({
93
+ value: record.baseUrl,
94
+ label: `${kindLabelOf(record.kind)} (${hostPortOf(record.baseUrl)}) (added)`,
95
+ description: "Registered · not currently discovered",
96
+ });
97
+ }
98
+
76
99
  // Always append the manual-add sentinel
77
100
  items.push({
78
101
  value: "__manual__",
@@ -151,6 +174,36 @@ export function capabilityActions(
151
174
  return actions;
152
175
  }
153
176
 
177
+ /** One-line hints shown under each manage action. */
178
+ const ACTION_DESCRIPTIONS: Record<string, string> = {
179
+ switch: "Make a model the active/served one",
180
+ load: "Load a model into memory",
181
+ unload: "Evict a loaded model from memory",
182
+ introspect: "Show which models are currently loaded",
183
+ remove: "Forget this server and delete its stored key",
184
+ };
185
+
186
+ /**
187
+ * Build the manage-overlay action list for an already-registered server: the
188
+ * adapter's capability-filtered actions (switch / load / unload / introspect) plus
189
+ * a "Remove server" action that is always available. Backends without any local
190
+ * capabilities (vLLM, OpenAI, Anthropic, generic) show only "Remove server".
191
+ */
192
+ export function buildManageItems(adapter: BackendAdapter): SelectItem[] {
193
+ const items: SelectItem[] = capabilityActions(adapter).map((a) => {
194
+ const item: SelectItem = { value: a.value, label: a.label };
195
+ const desc = ACTION_DESCRIPTIONS[a.value];
196
+ if (desc !== undefined) item.description = desc;
197
+ return item;
198
+ });
199
+ items.push({
200
+ value: "remove",
201
+ label: "Remove server",
202
+ description: ACTION_DESCRIPTIONS["remove"]!,
203
+ });
204
+ return items;
205
+ }
206
+
154
207
  /**
155
208
  * Coerce a user-supplied string (which may be bare "host:port", missing a scheme,
156
209
  * or already a valid URL) into a well-formed origin with no trailing slash.
@@ -177,6 +230,263 @@ export function normalizeManualUrl(input: string): string {
177
230
  return u.origin.replace(/\/+$/, "");
178
231
  }
179
232
 
233
+ // ─── Shared overlay + server-action helpers ─────────────────────────────────
234
+
235
+ /** Reconstruct a minimal DiscoveredServer from a persisted record for adapter calls. */
236
+ function serverFromRecord(record: ServerRecord): DiscoveredServer {
237
+ return {
238
+ kind: record.kind,
239
+ baseUrl: record.baseUrl,
240
+ auth: record.auth,
241
+ label: record.label,
242
+ confidence: 1,
243
+ };
244
+ }
245
+
246
+ /**
247
+ * Render a single-select overlay (titled SelectList in an accent border) and resolve
248
+ * to the chosen item value, or `null` on Esc/cancel. Shared by the model picker and
249
+ * the manage menus so they stay visually consistent.
250
+ */
251
+ function selectOverlay(
252
+ ctx: ExtensionCommandContext,
253
+ title: string,
254
+ items: SelectItem[],
255
+ hint: string,
256
+ ): Promise<string | null> {
257
+ return ctx.ui.custom<string | null>(
258
+ (_tui, theme, _kb, done) => {
259
+ const container = new Container();
260
+ container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
261
+ container.addChild(new Text(theme.fg("accent", theme.bold(title))));
262
+
263
+ const list = new SelectList(items, Math.min(items.length, 12), getSelectListTheme());
264
+ list.onSelect = (item) => done(item.value);
265
+ list.onCancel = () => done(null);
266
+
267
+ container.addChild(list);
268
+ container.addChild(new Text(theme.fg("dim", hint)));
269
+ container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
270
+
271
+ return {
272
+ render: (width: number) => container.render(width),
273
+ invalidate: () => container.invalidate(),
274
+ handleInput: (data: string) => {
275
+ if (matchesKey(data, "escape")) {
276
+ done(null);
277
+ return;
278
+ }
279
+ list.handleInput(data);
280
+ _tui.requestRender();
281
+ },
282
+ };
283
+ },
284
+ { overlay: true, overlayOptions: { width: "60%" } },
285
+ );
286
+ }
287
+
288
+ const errMsg = (err: unknown): string => (err instanceof Error ? err.message : String(err));
289
+
290
+ /** Fetch a server's models (live, falling back to last-known on failure). */
291
+ async function fetchModels(
292
+ ctx: ExtensionCommandContext,
293
+ registry: ServerRegistry,
294
+ record: ServerRecord,
295
+ ): Promise<ModelDescriptor[] | null> {
296
+ const adapter = adapterFor(record.kind);
297
+ const cred = await registry.resolveCredential(record);
298
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
299
+ try {
300
+ return await adapter.listModels(serverFromRecord(record), cred, probe);
301
+ } catch (err) {
302
+ if (record.lastKnownModels && record.lastKnownModels.length > 0) {
303
+ return record.lastKnownModels;
304
+ }
305
+ ctx.ui.notify(`Crossbar: could not list models — ${errMsg(err)}`, "error");
306
+ return null;
307
+ }
308
+ }
309
+
310
+ /** Switch the active model or load a model: pick from the list, then call the adapter. */
311
+ async function performModelAction(
312
+ ctx: ExtensionCommandContext,
313
+ registry: ServerRegistry,
314
+ record: ServerRecord,
315
+ action: "switch" | "load",
316
+ ): Promise<void> {
317
+ const adapter = adapterFor(record.kind);
318
+ const models = await fetchModels(ctx, registry, record);
319
+ if (!models) return;
320
+ if (models.length === 0) {
321
+ ctx.ui.notify("Crossbar: server returned no models.", "warning");
322
+ return;
323
+ }
324
+
325
+ const title = action === "switch"
326
+ ? `Switch model — ${record.label}`
327
+ : `Load model — ${record.label}`;
328
+ const modelId = await selectOverlay(
329
+ ctx,
330
+ title,
331
+ buildModelItems(models.filter((m) => !m.embeddings)),
332
+ "↑↓ navigate · Enter select · Esc cancel",
333
+ );
334
+ if (!modelId) return;
335
+
336
+ const cred = await registry.resolveCredential(record);
337
+ // Loads can be slow (cold model into VRAM) — give them a generous budget.
338
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 60_000 });
339
+
340
+ ctx.ui.notify(
341
+ `Crossbar: ${action === "switch" ? "switching to" : "loading"} ${modelId}…`,
342
+ "info",
343
+ );
344
+ try {
345
+ if (action === "switch") {
346
+ if (!canSwitch(adapter)) return;
347
+ await adapter.switchModel(serverFromRecord(record), cred, modelId, probe);
348
+ } else {
349
+ if (!canLoadUnload(adapter)) return;
350
+ await adapter.loadUnload(serverFromRecord(record), cred, modelId, "load", probe);
351
+ }
352
+ ctx.ui.notify(
353
+ `Crossbar: ${modelId} ${action === "switch" ? "is now active" : "loaded"}.`,
354
+ "info",
355
+ );
356
+ } catch (err) {
357
+ ctx.ui.notify(`Crossbar: ${action} failed — ${errMsg(err)}`, "error");
358
+ }
359
+ }
360
+
361
+ /** Unload a currently-loaded model: resolve the loaded set, pick one, evict it. */
362
+ async function performUnload(
363
+ ctx: ExtensionCommandContext,
364
+ registry: ServerRegistry,
365
+ record: ServerRecord,
366
+ ): Promise<void> {
367
+ const adapter = adapterFor(record.kind);
368
+ if (!canLoadUnload(adapter)) return;
369
+ const cred = await registry.resolveCredential(record);
370
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
371
+
372
+ let loadedIds: string[] = record.lastKnownLoaded ?? [];
373
+ if (canIntrospect(adapter)) {
374
+ try {
375
+ const state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
376
+ loadedIds = state.loadedModelIds;
377
+ } catch {
378
+ // Fall back to last-known on a failed introspection.
379
+ }
380
+ }
381
+ if (loadedIds.length === 0) {
382
+ ctx.ui.notify("Crossbar: no models are currently loaded.", "info");
383
+ return;
384
+ }
385
+
386
+ const modelId = await selectOverlay(
387
+ ctx,
388
+ `Unload model — ${record.label}`,
389
+ loadedIds.map((id) => ({ value: id, label: id })),
390
+ "↑↓ navigate · Enter select · Esc cancel",
391
+ );
392
+ if (!modelId) return;
393
+
394
+ ctx.ui.notify(`Crossbar: unloading ${modelId}…`, "info");
395
+ try {
396
+ await adapter.loadUnload(serverFromRecord(record), cred, modelId, "unload", probe);
397
+ ctx.ui.notify(`Crossbar: ${modelId} unloaded.`, "info");
398
+ } catch (err) {
399
+ ctx.ui.notify(`Crossbar: unload failed — ${errMsg(err)}`, "error");
400
+ }
401
+ }
402
+
403
+ /** Read and report the currently-loaded models for a server. */
404
+ async function performIntrospect(
405
+ ctx: ExtensionCommandContext,
406
+ registry: ServerRegistry,
407
+ record: ServerRecord,
408
+ ): Promise<void> {
409
+ const adapter = adapterFor(record.kind);
410
+ if (!canIntrospect(adapter)) return;
411
+ const cred = await registry.resolveCredential(record);
412
+ const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
413
+
414
+ let state: LoadedState;
415
+ try {
416
+ state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
417
+ } catch (err) {
418
+ ctx.ui.notify(`Crossbar: could not read loaded models — ${errMsg(err)}`, "error");
419
+ return;
420
+ }
421
+ if (state.loadedModelIds.length === 0) {
422
+ ctx.ui.notify(`Crossbar: ${record.label} has no models loaded.`, "info");
423
+ return;
424
+ }
425
+ const summary = state.loadedModelIds
426
+ .map((id) => {
427
+ const ctxLen = state.perModel?.[id]?.contextLength;
428
+ if (ctxLen === undefined) return id;
429
+ const ctxStr = ctxLen >= 1000 ? `${Math.round(ctxLen / 1000)}k` : `${ctxLen}`;
430
+ return `${id} (${ctxStr} ctx)`;
431
+ })
432
+ .join(", ");
433
+ ctx.ui.notify(`Crossbar: ${record.label} loaded — ${summary}`, "info");
434
+ }
435
+
436
+ /** Confirm and remove a server from the registry, auth.json, and Pi. */
437
+ async function performRemove(
438
+ pi: ExtensionAPI,
439
+ ctx: ExtensionCommandContext,
440
+ registry: ServerRegistry,
441
+ record: ServerRecord,
442
+ ): Promise<void> {
443
+ const confirm = await ctx.ui.select(`Remove ${record.label}?`, ["Cancel", "Remove server"]);
444
+ if (confirm !== "Remove server") return;
445
+ unregisterServer(pi, record);
446
+ await registry.remove(record.id);
447
+ ctx.ui.notify(`Crossbar: removed ${record.label}.`, "info");
448
+ }
449
+
450
+ /**
451
+ * Open the manage overlay for an already-registered server: show the
452
+ * capability-filtered action menu and dispatch the chosen action.
453
+ */
454
+ export async function openServerActions(
455
+ pi: ExtensionAPI,
456
+ ctx: ExtensionCommandContext,
457
+ deps: OnboardingDeps,
458
+ record: ServerRecord,
459
+ ): Promise<void> {
460
+ const { registry } = deps;
461
+ const adapter = adapterFor(record.kind);
462
+
463
+ const choice = await selectOverlay(
464
+ ctx,
465
+ `Manage — ${record.label}`,
466
+ buildManageItems(adapter),
467
+ "↑↓ navigate · Enter select · Esc close",
468
+ );
469
+ if (!choice) return;
470
+
471
+ switch (choice) {
472
+ case "switch":
473
+ await performModelAction(ctx, registry, record, "switch");
474
+ break;
475
+ case "load":
476
+ await performModelAction(ctx, registry, record, "load");
477
+ break;
478
+ case "unload":
479
+ await performUnload(ctx, registry, record);
480
+ break;
481
+ case "introspect":
482
+ await performIntrospect(ctx, registry, record);
483
+ break;
484
+ case "remove":
485
+ await performRemove(pi, ctx, registry, record);
486
+ break;
487
+ }
488
+ }
489
+
180
490
  // ─── Overlay flow driver ────────────────────────────────────────────────────
181
491
 
182
492
  export interface OnboardingDeps {
@@ -203,7 +513,7 @@ export interface OnboardingDeps {
203
513
  * @param deps - injected registry + discover function (for testability)
204
514
  */
205
515
  export async function openOnboarding(
206
- _pi: ExtensionAPI,
516
+ pi: ExtensionAPI,
207
517
  ctx: ExtensionCommandContext,
208
518
  deps: OnboardingDeps,
209
519
  ): Promise<void> {
@@ -299,7 +609,15 @@ export async function openOnboarding(
299
609
 
300
610
  targetBaseUrl = normalizedUrl;
301
611
  } else {
302
- // Discovered server path
612
+ // Already-registered server (discovered or offline) → open the manage overlay
613
+ // instead of re-running the add flow.
614
+ const existingRecord = registry.list().find((r) => r.baseUrl === chosenBaseUrl);
615
+ if (existingRecord) {
616
+ await openServerActions(pi, ctx, deps, existingRecord);
617
+ return;
618
+ }
619
+
620
+ // New discovered server path
303
621
  discoveredServer = discovered.find((s) => s.baseUrl === chosenBaseUrl);
304
622
  targetBaseUrl = chosenBaseUrl;
305
623
  }
@@ -368,47 +686,11 @@ export async function openOnboarding(
368
686
  }
369
687
 
370
688
  // ── Step 5: pick default model ─────────────────────────────────────────────
371
- const modelItems = buildModelItems(models);
372
-
373
- const chosenModelId = await ctx.ui.custom<string | null>(
374
- (_tui, theme, _kb, done) => {
375
- const container = new Container();
376
-
377
- container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
378
- container.addChild(
379
- new Text(theme.fg("accent", theme.bold(`Pick default model — ${discoveredServer!.label}`))),
380
- );
381
-
382
- const list = new SelectList(
383
- modelItems,
384
- Math.min(modelItems.length, 12),
385
- getSelectListTheme(),
386
- );
387
-
388
- list.onSelect = (item) => done(item.value);
389
- list.onCancel = () => done(null);
390
-
391
- container.addChild(list);
392
- container.addChild(
393
- new Text(theme.fg("dim", "↑↓ navigate · Enter select · Esc skip")),
394
- );
395
- container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
396
-
397
- return {
398
- render: (width: number) => container.render(width),
399
- invalidate: () => container.invalidate(),
400
- handleInput: (data: string) => {
401
- // Allow Esc to skip model selection
402
- if (matchesKey(data, "escape")) {
403
- done(null);
404
- return;
405
- }
406
- list.handleInput(data);
407
- _tui.requestRender();
408
- },
409
- };
410
- },
411
- { overlay: true, overlayOptions: { width: "60%" } },
689
+ const chosenModelId = await selectOverlay(
690
+ ctx,
691
+ `Pick default model ${discoveredServer.label}`,
692
+ buildModelItems(models),
693
+ "↑↓ navigate · Enter select · Esc skip",
412
694
  );
413
695
 
414
696
  // chosenModelId === null means the user skipped — still register the server