@hypabolic/crossbar 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CAPABILITY-MATRIX.md +2 -2
- package/README.md +6 -17
- package/docs/onboarding.gif +0 -0
- package/package.json +16 -5
- package/src/adapters/generic.ts +6 -0
- package/src/adapters/llamacpp.ts +5 -0
- package/src/adapters/llamaswap.ts +5 -0
- package/src/adapters/lmstudio.ts +57 -14
- package/src/adapters/ollama.ts +5 -0
- package/src/adapters/vllm.ts +6 -0
- package/src/ui/onboarding.ts +342 -60
package/CAPABILITY-MATRIX.md
CHANGED
|
@@ -7,7 +7,7 @@ adapter registers under (`oai` = `openai-completions`, `ant` = `anthropic-messag
|
|
|
7
7
|
| Backend | port | pi api | listModels | introspectLoaded | switchModel | loadUnload | auth | health | perModelCaps | streaming | discovery fingerprint |
|
|
8
8
|
|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
9
9
|
| **Ollama** | 11434 | oai | ✅ `/api/tags`,`/v1/models` | ✅ `/api/ps` | ✅ implicit (request id) | ✅ `keep_alive:0` | ◐ none local | ✅ `GET /` text | ✅ `/api/show` caps + ctx | ✅ | `GET /` → `Ollama is running` |
|
|
10
|
-
| **LM Studio** | 1234 | oai | ✅ `/api/
|
|
10
|
+
| **LM Studio** | 1234 | oai | ✅ `/api/v1/models` (v0 fallback) | ✅ `state` field | ✅ JIT + `/api/v1/models/load` | ✅ load/unload + `lms` | ◐ Bearer, none default | ◐ infer 200 | ✅ type+`max_context_length` | ✅ | `/api/v1/models` (v0 fallback) w/ `state`,`compatibility_type` |
|
|
11
11
|
| **llama-server** | 8080 | oai | ✅ `/v1/models` | ◐ `/props`,`/slots` (single) | ❌ (1/instance) | ❌ classic | ◐ none / `--api-key` | ✅ `/health` | ◐ ctx via `/props`,`meta` | ✅ | `/props` w/ `default_generation_settings`+`build_info` |
|
|
12
12
|
| **llama-swap** | 8080 | oai/ant | ✅ `/v1/models` (all config) | ✅ `/running` | ✅ via `model` → restart upstream | ✅ `/api/models/unload`, ttl | ◐ optional multi-scheme | ✅ `/health`→OK | ◐ via upstream | ✅ | `/` → `/ui/`; `/running`,`/upstream/{model}` |
|
|
13
13
|
| **vLLM** | 8000 | oai | ✅ `/v1/models` | ◐ `/is_sleeping` (dev) | ❌ base · ◐ LoRA | ◐ sleep/wake + LoRA | ◐ none / `--api-key` | ✅ `/health` | ◐ `max_model_len` only | ✅ | `/version` + `/metrics` `vllm:` + `owned_by:"vllm"` |
|
|
@@ -38,7 +38,7 @@ adapter registers under (`oai` = `openai-completions`, `ant` = `anthropic-messag
|
|
|
38
38
|
|
|
39
39
|
1. `GET /` → `Ollama is running` ⇒ Ollama · redirect `/ui/` ⇒ llama-swap
|
|
40
40
|
2. `GET /api/extra/version` → `{"result":"KoboldCpp"}` ⇒ KoboldCpp
|
|
41
|
-
3. `GET /api/
|
|
41
|
+
3. `GET /api/v1/models` (v0 fallback) 200 w/ `state`/`compatibility_type` ⇒ LM Studio
|
|
42
42
|
4. `GET /props` w/ `default_generation_settings`+`build_info` ⇒ llama-server / llamafile
|
|
43
43
|
5. `GET /version` + `/metrics` `vllm:` ⇒ vLLM
|
|
44
44
|
6. `GET /v1/models` shape: `owned_by:"vllm"`⇒vLLM · `meta.n_ctx_train`⇒llama.cpp ·
|
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://github.com/Hypabolic/Crossbar/actions/workflows/ci.yml)
|
|
4
4
|
[](https://www.npmjs.com/package/@hypabolic/crossbar)
|
|
5
5
|
|
|
6
|
-
**
|
|
6
|
+
**Effortless local & self-hosted model backends for the Pi coding agent.**
|
|
7
7
|
|
|
8
8
|
Crossbar is an extension for the [Pi coding agent](https://github.com/earendil-works/pi) that makes
|
|
9
9
|
wiring Pi to *any* local or self-hosted model backend effortless — zero hand-edited JSON, all setup
|
|
@@ -12,6 +12,8 @@ in-place model switching.
|
|
|
12
12
|
|
|
13
13
|
> Built by [Hypabolic](https://github.com/hypabolic).
|
|
14
14
|
|
|
15
|
+

|
|
16
|
+
|
|
15
17
|
---
|
|
16
18
|
|
|
17
19
|
## Why Crossbar
|
|
@@ -104,23 +106,10 @@ The `BackendAdapter` contract (`src/core/`) is the frozen boundary every adapter
|
|
|
104
106
|
conformance suite (`tests/conformance/`) validates every adapter against it, and
|
|
105
107
|
`tests/integration/` exercises the real discovery path over live sockets.
|
|
106
108
|
|
|
107
|
-
### CI
|
|
108
|
-
|
|
109
|
-
- **CI** (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
|
|
110
|
-
(Node 22 & 24).
|
|
111
|
-
- **Releases** (`.github/workflows/release.yml`) publish to npm via **GitHub→npm OIDC trusted
|
|
112
|
-
publishing** — no tokens or secrets. [Provenance](https://docs.npmjs.com/generating-provenance-statements)
|
|
113
|
-
is attached automatically. Two ways:
|
|
114
|
-
1. **Manual** — GitHub → *Actions → Release → Run workflow* → choose `patch` / `minor` / `major`.
|
|
115
|
-
It bumps `package.json`, commits, tags `vX.Y.Z`, and publishes.
|
|
116
|
-
2. **Tag push** — `npm version patch && git push --follow-tags` locally.
|
|
117
|
-
|
|
118
|
-
**One-time setup:** on npmjs.com, add a **Trusted Publisher** for `@hypabolic/crossbar`
|
|
119
|
-
(*Package settings → Trusted Publisher → GitHub Actions*) pointing at repo **`Hypabolic/Crossbar`**
|
|
120
|
-
and workflow **`release.yml`**. The workflow authenticates through the OIDC `id-token` it already
|
|
121
|
-
requests — no `NPM_TOKEN` needed.
|
|
109
|
+
### CI
|
|
122
110
|
|
|
123
|
-
|
|
111
|
+
CI (`.github/workflows/ci.yml`) runs `tsc --noEmit` + the full test suite on every push and PR
|
|
112
|
+
(Node 22 & 24).
|
|
124
113
|
|
|
125
114
|
## License
|
|
126
115
|
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,11 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hypabolic/crossbar",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "The local/self-hosted inference connector Pi
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "The local/self-hosted inference connector for Pi — multi-backend discovery, model switching, and zero-JSON in-TUI onboarding for the Pi coding agent.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"author": "Hypabolic",
|
|
8
|
-
"homepage": "https://github.com/
|
|
8
|
+
"homepage": "https://github.com/Hypabolic/Crossbar#readme",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+https://github.com/Hypabolic/Crossbar.git"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/Hypabolic/Crossbar/issues"
|
|
15
|
+
},
|
|
9
16
|
"keywords": [
|
|
10
17
|
"pi-package",
|
|
11
18
|
"pi-extension",
|
|
@@ -22,10 +29,12 @@
|
|
|
22
29
|
"pi": {
|
|
23
30
|
"extensions": [
|
|
24
31
|
"./src/index.ts"
|
|
25
|
-
]
|
|
32
|
+
],
|
|
33
|
+
"image": "https://raw.githubusercontent.com/Hypabolic/Crossbar/main/docs/onboarding.gif"
|
|
26
34
|
},
|
|
27
35
|
"files": [
|
|
28
36
|
"src",
|
|
37
|
+
"docs/onboarding.gif",
|
|
29
38
|
"RESEARCH.md",
|
|
30
39
|
"CAPABILITY-MATRIX.md",
|
|
31
40
|
"ARCHITECTURE.md",
|
|
@@ -35,7 +44,9 @@
|
|
|
35
44
|
"scripts": {
|
|
36
45
|
"check": "tsc --noEmit",
|
|
37
46
|
"test": "vitest run",
|
|
38
|
-
"test:watch": "vitest"
|
|
47
|
+
"test:watch": "vitest",
|
|
48
|
+
"demo:lmstudio": "node scripts/fake-lmstudio.mjs",
|
|
49
|
+
"demo:gif": "node scripts/gen-onboarding-gif.mjs"
|
|
39
50
|
},
|
|
40
51
|
"peerDependencies": {
|
|
41
52
|
"@earendil-works/pi-coding-agent": "0.79.9",
|
package/src/adapters/generic.ts
CHANGED
|
@@ -140,9 +140,15 @@ class GenericAdapter implements BackendAdapter {
|
|
|
140
140
|
name: model.name,
|
|
141
141
|
reasoning: model.reasoning ?? false,
|
|
142
142
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
143
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
144
|
+
// COUNTS still matter: Pi maps any `usage.prompt_tokens_details.cached_tokens` the
|
|
145
|
+
// backend reports to `Usage.cacheRead` and displays it regardless of cost. The
|
|
146
|
+
// flag only asks for usage in streaming (never fabricates), so it is safe even for
|
|
147
|
+
// unknown OpenAI-compatible servers that may not report cache hits.
|
|
143
148
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
144
149
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
145
150
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
151
|
+
compat: { supportsUsageInStreaming: true },
|
|
146
152
|
};
|
|
147
153
|
}
|
|
148
154
|
|
package/src/adapters/llamacpp.ts
CHANGED
|
@@ -197,9 +197,14 @@ class LlamacppAdapter implements BackendAdapter {
|
|
|
197
197
|
name: model.name,
|
|
198
198
|
reasoning: model.reasoning ?? false,
|
|
199
199
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
200
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
201
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
202
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
203
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
200
204
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
201
205
|
contextWindow: model.contextWindow ?? 8192,
|
|
202
206
|
maxTokens: model.maxTokens ?? 4096,
|
|
207
|
+
compat: { supportsUsageInStreaming: true },
|
|
203
208
|
};
|
|
204
209
|
}
|
|
205
210
|
|
|
@@ -256,9 +256,14 @@ class LlamaswapAdapter implements BackendAdapter {
|
|
|
256
256
|
name: model.name,
|
|
257
257
|
reasoning: model.reasoning ?? false,
|
|
258
258
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
259
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
260
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
261
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
262
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
259
263
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
260
264
|
contextWindow: model.contextWindow ?? 8192,
|
|
261
265
|
maxTokens: model.maxTokens ?? 4096,
|
|
266
|
+
compat: { supportsUsageInStreaming: true },
|
|
262
267
|
};
|
|
263
268
|
}
|
|
264
269
|
|
package/src/adapters/lmstudio.ts
CHANGED
|
@@ -2,16 +2,20 @@
|
|
|
2
2
|
* LM Studio backend adapter.
|
|
3
3
|
*
|
|
4
4
|
* Implements the BackendAdapter contract for LM Studio's local server.
|
|
5
|
-
* Uses the LM Studio-native
|
|
6
|
-
*
|
|
5
|
+
* Uses the LM Studio-native REST API for discovery and management, and delegates
|
|
6
|
+
* inference to the OpenAI-compatible /v1/* layer.
|
|
7
|
+
*
|
|
8
|
+
* LM Studio 0.4.0+ ships a native `/api/v1/*` REST API (recommended); the older
|
|
9
|
+
* `/api/v0/*` API carries the same rich model fields and is kept as a fallback for
|
|
10
|
+
* pre-0.4.0 servers. We prefer v1 and fall back to v0 only on a 404.
|
|
7
11
|
*
|
|
8
12
|
* Key API endpoints:
|
|
9
|
-
* GET /api/v0/models
|
|
10
|
-
* POST /api/v1/models/load
|
|
11
|
-
* POST /api/v1/models/unload
|
|
13
|
+
* GET /api/v1/models (→ /api/v0/models fallback) — model list with state, type, context length
|
|
14
|
+
* POST /api/v1/models/load — load a model by id
|
|
15
|
+
* POST /api/v1/models/unload — unload a model by id
|
|
12
16
|
*
|
|
13
17
|
* Fingerprint discriminator: data[] entries have both `state` and
|
|
14
|
-
* `compatibility_type` fields (unique to LM Studio's
|
|
18
|
+
* `compatibility_type` fields (unique to LM Studio's native API).
|
|
15
19
|
*/
|
|
16
20
|
|
|
17
21
|
import { Capability } from "../core/capability.ts";
|
|
@@ -24,9 +28,14 @@ import type {
|
|
|
24
28
|
ModelDescriptor,
|
|
25
29
|
PiModelEntry,
|
|
26
30
|
Probe,
|
|
31
|
+
ProbeResult,
|
|
27
32
|
ServerCredential,
|
|
28
33
|
} from "../core/types.ts";
|
|
29
34
|
|
|
35
|
+
/** Native model-list endpoints, in preference order (v1 first, v0 fallback for <0.4.0). */
|
|
36
|
+
const MODELS_V1 = "/api/v1/models";
|
|
37
|
+
const MODELS_V0 = "/api/v0/models";
|
|
38
|
+
|
|
30
39
|
// ---------------------------------------------------------------------------
|
|
31
40
|
// LM Studio API shapes (narrowed from unknown JSON)
|
|
32
41
|
// ---------------------------------------------------------------------------
|
|
@@ -103,6 +112,7 @@ function hasLmsDiscriminator(json: unknown): boolean {
|
|
|
103
112
|
function toDescriptor(m: LmsModelEntry): ModelDescriptor {
|
|
104
113
|
const isEmbeddings = m.type === "embeddings";
|
|
105
114
|
const isVlm = m.type === "vlm";
|
|
115
|
+
const isLoaded = m.state === "loaded";
|
|
106
116
|
|
|
107
117
|
const input: ("text" | "image")[] = ["text"];
|
|
108
118
|
if (isVlm) input.push("image");
|
|
@@ -112,11 +122,24 @@ function toDescriptor(m: LmsModelEntry): ModelDescriptor {
|
|
|
112
122
|
name: m.id,
|
|
113
123
|
input,
|
|
114
124
|
embeddings: isEmbeddings,
|
|
115
|
-
loaded:
|
|
125
|
+
loaded: isLoaded,
|
|
116
126
|
raw: m,
|
|
117
127
|
};
|
|
118
|
-
|
|
119
|
-
|
|
128
|
+
|
|
129
|
+
// Context window: LM Studio reports both the model ceiling (`max_context_length`)
|
|
130
|
+
// and the window the model was actually loaded with (`loaded_context_length`),
|
|
131
|
+
// which is frequently configured well below the ceiling (e.g. a 128k model loaded
|
|
132
|
+
// at 4096). Register the OPERATIVE window so Pi budgets against what the server
|
|
133
|
+
// will really accept: prefer the loaded length when the model is resident (and
|
|
134
|
+
// non-zero), otherwise fall back to the model max. `loaded_context_length` is 0 or
|
|
135
|
+
// absent while the model is not loaded, so it never masks the ceiling in that case.
|
|
136
|
+
const loadedCtx =
|
|
137
|
+
isLoaded && typeof m.loaded_context_length === "number" && m.loaded_context_length > 0
|
|
138
|
+
? m.loaded_context_length
|
|
139
|
+
: undefined;
|
|
140
|
+
const ctx = loadedCtx ?? m.max_context_length;
|
|
141
|
+
if (ctx !== undefined) {
|
|
142
|
+
desc.contextWindow = ctx;
|
|
120
143
|
}
|
|
121
144
|
return desc;
|
|
122
145
|
}
|
|
@@ -140,10 +163,21 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
140
163
|
Capability.Streaming,
|
|
141
164
|
]);
|
|
142
165
|
|
|
166
|
+
/**
|
|
167
|
+
* Fetch the native model list, preferring /api/v1/models and falling back to
|
|
168
|
+
* /api/v0/models for LM Studio < 0.4.0 (which only exposes the v0 REST API).
|
|
169
|
+
* Falls back ONLY on a 404 so auth (401) and unreachable (0) errors propagate.
|
|
170
|
+
*/
|
|
171
|
+
private async modelsResponse(probe: Probe): Promise<ProbeResult> {
|
|
172
|
+
const v1 = await probe(MODELS_V1);
|
|
173
|
+
if (v1.status === 404) return probe(MODELS_V0);
|
|
174
|
+
return v1;
|
|
175
|
+
}
|
|
176
|
+
|
|
143
177
|
// --- fingerprint ----------------------------------------------------------
|
|
144
178
|
|
|
145
179
|
async fingerprint(baseUrl: string, probe: Probe): Promise<DiscoveredServer | null> {
|
|
146
|
-
const r = await probe
|
|
180
|
+
const r = await this.modelsResponse(probe);
|
|
147
181
|
if (!r.ok || r.status === 0) return null;
|
|
148
182
|
if (!hasLmsDiscriminator(r.json)) return null;
|
|
149
183
|
|
|
@@ -163,7 +197,7 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
163
197
|
_cred: ServerCredential,
|
|
164
198
|
probe: Probe,
|
|
165
199
|
): Promise<HealthStatus> {
|
|
166
|
-
const r = await probe
|
|
200
|
+
const r = await this.modelsResponse(probe);
|
|
167
201
|
if (r.status === 0) return { state: "unreachable" };
|
|
168
202
|
if (r.status === 401) return { state: "unauthorized" };
|
|
169
203
|
if (!r.ok) return { state: "degraded" };
|
|
@@ -179,7 +213,7 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
179
213
|
_cred: ServerCredential,
|
|
180
214
|
probe: Probe,
|
|
181
215
|
): Promise<ModelDescriptor[]> {
|
|
182
|
-
const r = await probe
|
|
216
|
+
const r = await this.modelsResponse(probe);
|
|
183
217
|
if (!r.ok) {
|
|
184
218
|
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
185
219
|
if (r.status === 0) throw new Error("listModels failed: server unreachable");
|
|
@@ -197,7 +231,7 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
197
231
|
_cred: ServerCredential,
|
|
198
232
|
probe: Probe,
|
|
199
233
|
): Promise<LoadedState> {
|
|
200
|
-
const r = await probe
|
|
234
|
+
const r = await this.modelsResponse(probe);
|
|
201
235
|
if (!r.ok) {
|
|
202
236
|
if (r.status === 401) throw new Error("401 Unauthorized");
|
|
203
237
|
if (r.status === 0) throw new Error("introspectLoaded failed: server unreachable");
|
|
@@ -242,7 +276,7 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
242
276
|
}
|
|
243
277
|
|
|
244
278
|
// Step 2: Confirm via model list that the target is now loaded
|
|
245
|
-
const r2 = await probe
|
|
279
|
+
const r2 = await this.modelsResponse(probe);
|
|
246
280
|
if (!r2.ok) {
|
|
247
281
|
if (r2.status === 0) throw new Error("switchModel confirmation failed: server went down");
|
|
248
282
|
if (r2.status === 401) throw new Error("401 Unauthorized");
|
|
@@ -287,9 +321,18 @@ class LmStudioAdapter implements BackendAdapter {
|
|
|
287
321
|
name: model.name,
|
|
288
322
|
reasoning: model.reasoning ?? false,
|
|
289
323
|
input: model.input.length > 0 ? (model.input as ("text" | "image")[]) : ["text"],
|
|
324
|
+
// Local inference is free, so per-token COSTS are zero. The cache-hit token
|
|
325
|
+
// COUNTS still flow and are worth recording: LM Studio's OpenAI-compatible
|
|
326
|
+
// responses report `usage.prompt_tokens_details.cached_tokens`, which Pi maps to
|
|
327
|
+
// `Usage.cacheRead` and surfaces in the TUI regardless of cost. Keep usage
|
|
328
|
+
// reporting on during streaming so those automatic-prefix-cache hits are
|
|
329
|
+
// recorded. We intentionally do NOT set `cacheControlFormat`: LM Studio (llama.cpp
|
|
330
|
+
// engine) caches matching prefixes automatically, so injecting Anthropic-style
|
|
331
|
+
// `cache_control` markers would be wrong for this OpenAI-completions backend.
|
|
290
332
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
291
333
|
contextWindow: model.contextWindow ?? 8192,
|
|
292
334
|
maxTokens: model.maxTokens ?? 4096,
|
|
335
|
+
compat: { supportsUsageInStreaming: true },
|
|
293
336
|
};
|
|
294
337
|
}
|
|
295
338
|
|
package/src/adapters/ollama.ts
CHANGED
|
@@ -320,9 +320,14 @@ class OllamaAdapter implements BackendAdapter {
|
|
|
320
320
|
name: model.name,
|
|
321
321
|
reasoning: model.reasoning ?? false,
|
|
322
322
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
323
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
324
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
325
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. Keep
|
|
326
|
+
// streaming usage reporting on so those prompt-cache hits are recorded.
|
|
323
327
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
324
328
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
325
329
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
330
|
+
compat: { supportsUsageInStreaming: true },
|
|
326
331
|
};
|
|
327
332
|
}
|
|
328
333
|
|
package/src/adapters/vllm.ts
CHANGED
|
@@ -177,9 +177,15 @@ class VllmAdapter implements BackendAdapter {
|
|
|
177
177
|
name: model.name,
|
|
178
178
|
reasoning: model.reasoning ?? false,
|
|
179
179
|
input: model.input.length > 0 ? model.input : ["text"],
|
|
180
|
+
// Local inference is free → per-token costs are zero, but cache-hit token
|
|
181
|
+
// COUNTS still matter: Pi maps the backend's `usage.prompt_tokens_details
|
|
182
|
+
// .cached_tokens` to `Usage.cacheRead` and displays it regardless of cost. vLLM
|
|
183
|
+
// reports cached tokens from its automatic prefix cache; keep streaming usage
|
|
184
|
+
// reporting on so those hits are recorded.
|
|
180
185
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
181
186
|
contextWindow: model.contextWindow ?? DEFAULT_CONTEXT_WINDOW,
|
|
182
187
|
maxTokens: model.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
188
|
+
compat: { supportsUsageInStreaming: true },
|
|
183
189
|
};
|
|
184
190
|
}
|
|
185
191
|
|
package/src/ui/onboarding.ts
CHANGED
|
@@ -24,45 +24,56 @@ import { Container, type SelectItem, SelectList, Text, matchesKey } from "@earen
|
|
|
24
24
|
|
|
25
25
|
import type { BackendAdapter } from "../core/backend-adapter.ts";
|
|
26
26
|
import { canIntrospect, canLoadUnload, canSwitch } from "../core/backend-adapter.ts";
|
|
27
|
-
import type { DiscoveredServer, ModelDescriptor, ServerRecord } from "../core/types.ts";
|
|
27
|
+
import type { DiscoveredServer, LoadedState, ModelDescriptor, ServerRecord } from "../core/types.ts";
|
|
28
28
|
import type { ServerRegistry } from "../registry/registry.ts";
|
|
29
29
|
import { serverId } from "../registry/ids.ts";
|
|
30
30
|
import { adapterFor } from "../adapters/index.ts";
|
|
31
|
+
import { unregisterServer } from "../shim/provider-shim.ts";
|
|
31
32
|
import { createProbe } from "../discovery/probe.ts";
|
|
32
33
|
|
|
33
34
|
// ─── Pure helpers ────────────────────────────────────────────────────────────
|
|
34
35
|
|
|
36
|
+
/** Extract a `host:port` string from a base URL for compact labels. */
|
|
37
|
+
function hostPortOf(baseUrl: string): string {
|
|
38
|
+
try {
|
|
39
|
+
const u = new URL(baseUrl);
|
|
40
|
+
return `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
|
|
41
|
+
} catch {
|
|
42
|
+
return baseUrl.replace(/^https?:\/\//, "");
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Capitalise a backend kind for display, e.g. "lmstudio" → "Lmstudio". */
|
|
47
|
+
function kindLabelOf(kind: string): string {
|
|
48
|
+
return kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
49
|
+
}
|
|
50
|
+
|
|
35
51
|
/**
|
|
36
|
-
* Build a `SelectItem[]` representing the
|
|
37
|
-
*
|
|
38
|
-
*
|
|
52
|
+
* Build a `SelectItem[]` representing the servers shown in the top-level onboarding
|
|
53
|
+
* list. Three kinds of entry can appear:
|
|
54
|
+
* - discovered servers (in discovery order) — already-registered ones get an
|
|
55
|
+
* "(added)" suffix so the user can tell new from known;
|
|
56
|
+
* - registered servers that are NOT currently discovered (e.g. offline), so they
|
|
57
|
+
* can still be managed/removed;
|
|
58
|
+
* - a sentinel "Add manually" entry, always last.
|
|
39
59
|
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
60
|
+
* Selecting any already-registered entry opens the manage overlay; selecting a new
|
|
61
|
+
* discovered entry or the sentinel runs the add flow.
|
|
42
62
|
*/
|
|
43
63
|
export function buildDiscoveredItems(
|
|
44
64
|
discovered: DiscoveredServer[],
|
|
45
65
|
existing: ServerRecord[],
|
|
46
66
|
): SelectItem[] {
|
|
47
67
|
const existingIds = new Set(existing.map((r) => r.id));
|
|
68
|
+
const discoveredUrls = new Set(discovered.map((s) => s.baseUrl));
|
|
48
69
|
|
|
49
70
|
const items: SelectItem[] = discovered.map((server): SelectItem => {
|
|
50
71
|
const id = serverId(server.kind, server.baseUrl);
|
|
51
72
|
const isAdded = existingIds.has(id);
|
|
52
73
|
|
|
53
|
-
// Extract host:port from baseUrl for the label suffix
|
|
54
|
-
let hostPort: string;
|
|
55
|
-
try {
|
|
56
|
-
const u = new URL(server.baseUrl);
|
|
57
|
-
hostPort = `${u.hostname}:${u.port || (u.protocol === "https:" ? "443" : "80")}`;
|
|
58
|
-
} catch {
|
|
59
|
-
hostPort = server.baseUrl.replace(/^https?:\/\//, "");
|
|
60
|
-
}
|
|
61
|
-
|
|
62
74
|
// Compose a label: "[kind] host:port ✓ healthy" or "(added)"
|
|
63
|
-
const kindLabel = server.kind.charAt(0).toUpperCase() + server.kind.slice(1);
|
|
64
75
|
const healthMark = isAdded ? "(added)" : "✓ healthy";
|
|
65
|
-
const label = `${
|
|
76
|
+
const label = `${kindLabelOf(server.kind)} (${hostPortOf(server.baseUrl)})`;
|
|
66
77
|
|
|
67
78
|
return {
|
|
68
79
|
value: server.baseUrl,
|
|
@@ -73,6 +84,18 @@ export function buildDiscoveredItems(
|
|
|
73
84
|
};
|
|
74
85
|
});
|
|
75
86
|
|
|
87
|
+
// Append registered servers that weren't discovered this scan (offline / not
|
|
88
|
+
// reachable right now) so they remain manageable from the same list.
|
|
89
|
+
for (const record of existing) {
|
|
90
|
+
if (!record.enabled) continue;
|
|
91
|
+
if (discoveredUrls.has(record.baseUrl)) continue;
|
|
92
|
+
items.push({
|
|
93
|
+
value: record.baseUrl,
|
|
94
|
+
label: `${kindLabelOf(record.kind)} (${hostPortOf(record.baseUrl)}) (added)`,
|
|
95
|
+
description: "Registered · not currently discovered",
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
76
99
|
// Always append the manual-add sentinel
|
|
77
100
|
items.push({
|
|
78
101
|
value: "__manual__",
|
|
@@ -151,6 +174,36 @@ export function capabilityActions(
|
|
|
151
174
|
return actions;
|
|
152
175
|
}
|
|
153
176
|
|
|
177
|
+
/** One-line hints shown under each manage action. */
|
|
178
|
+
const ACTION_DESCRIPTIONS: Record<string, string> = {
|
|
179
|
+
switch: "Make a model the active/served one",
|
|
180
|
+
load: "Load a model into memory",
|
|
181
|
+
unload: "Evict a loaded model from memory",
|
|
182
|
+
introspect: "Show which models are currently loaded",
|
|
183
|
+
remove: "Forget this server and delete its stored key",
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Build the manage-overlay action list for an already-registered server: the
|
|
188
|
+
* adapter's capability-filtered actions (switch / load / unload / introspect) plus
|
|
189
|
+
* a "Remove server" action that is always available. Backends without any local
|
|
190
|
+
* capabilities (vLLM, OpenAI, Anthropic, generic) show only "Remove server".
|
|
191
|
+
*/
|
|
192
|
+
export function buildManageItems(adapter: BackendAdapter): SelectItem[] {
|
|
193
|
+
const items: SelectItem[] = capabilityActions(adapter).map((a) => {
|
|
194
|
+
const item: SelectItem = { value: a.value, label: a.label };
|
|
195
|
+
const desc = ACTION_DESCRIPTIONS[a.value];
|
|
196
|
+
if (desc !== undefined) item.description = desc;
|
|
197
|
+
return item;
|
|
198
|
+
});
|
|
199
|
+
items.push({
|
|
200
|
+
value: "remove",
|
|
201
|
+
label: "Remove server",
|
|
202
|
+
description: ACTION_DESCRIPTIONS["remove"]!,
|
|
203
|
+
});
|
|
204
|
+
return items;
|
|
205
|
+
}
|
|
206
|
+
|
|
154
207
|
/**
|
|
155
208
|
* Coerce a user-supplied string (which may be bare "host:port", missing a scheme,
|
|
156
209
|
* or already a valid URL) into a well-formed origin with no trailing slash.
|
|
@@ -177,6 +230,263 @@ export function normalizeManualUrl(input: string): string {
|
|
|
177
230
|
return u.origin.replace(/\/+$/, "");
|
|
178
231
|
}
|
|
179
232
|
|
|
233
|
+
// ─── Shared overlay + server-action helpers ─────────────────────────────────
|
|
234
|
+
|
|
235
|
+
/** Reconstruct a minimal DiscoveredServer from a persisted record for adapter calls. */
|
|
236
|
+
function serverFromRecord(record: ServerRecord): DiscoveredServer {
|
|
237
|
+
return {
|
|
238
|
+
kind: record.kind,
|
|
239
|
+
baseUrl: record.baseUrl,
|
|
240
|
+
auth: record.auth,
|
|
241
|
+
label: record.label,
|
|
242
|
+
confidence: 1,
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Render a single-select overlay (titled SelectList in an accent border) and resolve
|
|
248
|
+
* to the chosen item value, or `null` on Esc/cancel. Shared by the model picker and
|
|
249
|
+
* the manage menus so they stay visually consistent.
|
|
250
|
+
*/
|
|
251
|
+
function selectOverlay(
|
|
252
|
+
ctx: ExtensionCommandContext,
|
|
253
|
+
title: string,
|
|
254
|
+
items: SelectItem[],
|
|
255
|
+
hint: string,
|
|
256
|
+
): Promise<string | null> {
|
|
257
|
+
return ctx.ui.custom<string | null>(
|
|
258
|
+
(_tui, theme, _kb, done) => {
|
|
259
|
+
const container = new Container();
|
|
260
|
+
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
261
|
+
container.addChild(new Text(theme.fg("accent", theme.bold(title))));
|
|
262
|
+
|
|
263
|
+
const list = new SelectList(items, Math.min(items.length, 12), getSelectListTheme());
|
|
264
|
+
list.onSelect = (item) => done(item.value);
|
|
265
|
+
list.onCancel = () => done(null);
|
|
266
|
+
|
|
267
|
+
container.addChild(list);
|
|
268
|
+
container.addChild(new Text(theme.fg("dim", hint)));
|
|
269
|
+
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
render: (width: number) => container.render(width),
|
|
273
|
+
invalidate: () => container.invalidate(),
|
|
274
|
+
handleInput: (data: string) => {
|
|
275
|
+
if (matchesKey(data, "escape")) {
|
|
276
|
+
done(null);
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
list.handleInput(data);
|
|
280
|
+
_tui.requestRender();
|
|
281
|
+
},
|
|
282
|
+
};
|
|
283
|
+
},
|
|
284
|
+
{ overlay: true, overlayOptions: { width: "60%" } },
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const errMsg = (err: unknown): string => (err instanceof Error ? err.message : String(err));
|
|
289
|
+
|
|
290
|
+
/** Fetch a server's models (live, falling back to last-known on failure). */
|
|
291
|
+
async function fetchModels(
|
|
292
|
+
ctx: ExtensionCommandContext,
|
|
293
|
+
registry: ServerRegistry,
|
|
294
|
+
record: ServerRecord,
|
|
295
|
+
): Promise<ModelDescriptor[] | null> {
|
|
296
|
+
const adapter = adapterFor(record.kind);
|
|
297
|
+
const cred = await registry.resolveCredential(record);
|
|
298
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
299
|
+
try {
|
|
300
|
+
return await adapter.listModels(serverFromRecord(record), cred, probe);
|
|
301
|
+
} catch (err) {
|
|
302
|
+
if (record.lastKnownModels && record.lastKnownModels.length > 0) {
|
|
303
|
+
return record.lastKnownModels;
|
|
304
|
+
}
|
|
305
|
+
ctx.ui.notify(`Crossbar: could not list models — ${errMsg(err)}`, "error");
|
|
306
|
+
return null;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/** Switch the active model or load a model: pick from the list, then call the adapter. */
|
|
311
|
+
async function performModelAction(
|
|
312
|
+
ctx: ExtensionCommandContext,
|
|
313
|
+
registry: ServerRegistry,
|
|
314
|
+
record: ServerRecord,
|
|
315
|
+
action: "switch" | "load",
|
|
316
|
+
): Promise<void> {
|
|
317
|
+
const adapter = adapterFor(record.kind);
|
|
318
|
+
const models = await fetchModels(ctx, registry, record);
|
|
319
|
+
if (!models) return;
|
|
320
|
+
if (models.length === 0) {
|
|
321
|
+
ctx.ui.notify("Crossbar: server returned no models.", "warning");
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const title = action === "switch"
|
|
326
|
+
? `Switch model — ${record.label}`
|
|
327
|
+
: `Load model — ${record.label}`;
|
|
328
|
+
const modelId = await selectOverlay(
|
|
329
|
+
ctx,
|
|
330
|
+
title,
|
|
331
|
+
buildModelItems(models.filter((m) => !m.embeddings)),
|
|
332
|
+
"↑↓ navigate · Enter select · Esc cancel",
|
|
333
|
+
);
|
|
334
|
+
if (!modelId) return;
|
|
335
|
+
|
|
336
|
+
const cred = await registry.resolveCredential(record);
|
|
337
|
+
// Loads can be slow (cold model into VRAM) — give them a generous budget.
|
|
338
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 60_000 });
|
|
339
|
+
|
|
340
|
+
ctx.ui.notify(
|
|
341
|
+
`Crossbar: ${action === "switch" ? "switching to" : "loading"} ${modelId}…`,
|
|
342
|
+
"info",
|
|
343
|
+
);
|
|
344
|
+
try {
|
|
345
|
+
if (action === "switch") {
|
|
346
|
+
if (!canSwitch(adapter)) return;
|
|
347
|
+
await adapter.switchModel(serverFromRecord(record), cred, modelId, probe);
|
|
348
|
+
} else {
|
|
349
|
+
if (!canLoadUnload(adapter)) return;
|
|
350
|
+
await adapter.loadUnload(serverFromRecord(record), cred, modelId, "load", probe);
|
|
351
|
+
}
|
|
352
|
+
ctx.ui.notify(
|
|
353
|
+
`Crossbar: ${modelId} ${action === "switch" ? "is now active" : "loaded"}.`,
|
|
354
|
+
"info",
|
|
355
|
+
);
|
|
356
|
+
} catch (err) {
|
|
357
|
+
ctx.ui.notify(`Crossbar: ${action} failed — ${errMsg(err)}`, "error");
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/** Unload a currently-loaded model: resolve the loaded set, pick one, evict it. */
|
|
362
|
+
async function performUnload(
|
|
363
|
+
ctx: ExtensionCommandContext,
|
|
364
|
+
registry: ServerRegistry,
|
|
365
|
+
record: ServerRecord,
|
|
366
|
+
): Promise<void> {
|
|
367
|
+
const adapter = adapterFor(record.kind);
|
|
368
|
+
if (!canLoadUnload(adapter)) return;
|
|
369
|
+
const cred = await registry.resolveCredential(record);
|
|
370
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
371
|
+
|
|
372
|
+
let loadedIds: string[] = record.lastKnownLoaded ?? [];
|
|
373
|
+
if (canIntrospect(adapter)) {
|
|
374
|
+
try {
|
|
375
|
+
const state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
|
|
376
|
+
loadedIds = state.loadedModelIds;
|
|
377
|
+
} catch {
|
|
378
|
+
// Fall back to last-known on a failed introspection.
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
if (loadedIds.length === 0) {
|
|
382
|
+
ctx.ui.notify("Crossbar: no models are currently loaded.", "info");
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const modelId = await selectOverlay(
|
|
387
|
+
ctx,
|
|
388
|
+
`Unload model — ${record.label}`,
|
|
389
|
+
loadedIds.map((id) => ({ value: id, label: id })),
|
|
390
|
+
"↑↓ navigate · Enter select · Esc cancel",
|
|
391
|
+
);
|
|
392
|
+
if (!modelId) return;
|
|
393
|
+
|
|
394
|
+
ctx.ui.notify(`Crossbar: unloading ${modelId}…`, "info");
|
|
395
|
+
try {
|
|
396
|
+
await adapter.loadUnload(serverFromRecord(record), cred, modelId, "unload", probe);
|
|
397
|
+
ctx.ui.notify(`Crossbar: ${modelId} unloaded.`, "info");
|
|
398
|
+
} catch (err) {
|
|
399
|
+
ctx.ui.notify(`Crossbar: unload failed — ${errMsg(err)}`, "error");
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/** Read and report the currently-loaded models for a server. */
|
|
404
|
+
async function performIntrospect(
|
|
405
|
+
ctx: ExtensionCommandContext,
|
|
406
|
+
registry: ServerRegistry,
|
|
407
|
+
record: ServerRecord,
|
|
408
|
+
): Promise<void> {
|
|
409
|
+
const adapter = adapterFor(record.kind);
|
|
410
|
+
if (!canIntrospect(adapter)) return;
|
|
411
|
+
const cred = await registry.resolveCredential(record);
|
|
412
|
+
const probe = createProbe(record.baseUrl, { auth: cred, defaultTimeoutMs: 5000 });
|
|
413
|
+
|
|
414
|
+
let state: LoadedState;
|
|
415
|
+
try {
|
|
416
|
+
state = await adapter.introspectLoaded(serverFromRecord(record), cred, probe);
|
|
417
|
+
} catch (err) {
|
|
418
|
+
ctx.ui.notify(`Crossbar: could not read loaded models — ${errMsg(err)}`, "error");
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
if (state.loadedModelIds.length === 0) {
|
|
422
|
+
ctx.ui.notify(`Crossbar: ${record.label} has no models loaded.`, "info");
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
const summary = state.loadedModelIds
|
|
426
|
+
.map((id) => {
|
|
427
|
+
const ctxLen = state.perModel?.[id]?.contextLength;
|
|
428
|
+
if (ctxLen === undefined) return id;
|
|
429
|
+
const ctxStr = ctxLen >= 1000 ? `${Math.round(ctxLen / 1000)}k` : `${ctxLen}`;
|
|
430
|
+
return `${id} (${ctxStr} ctx)`;
|
|
431
|
+
})
|
|
432
|
+
.join(", ");
|
|
433
|
+
ctx.ui.notify(`Crossbar: ${record.label} loaded — ${summary}`, "info");
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/** Confirm and remove a server from the registry, auth.json, and Pi. */
|
|
437
|
+
async function performRemove(
|
|
438
|
+
pi: ExtensionAPI,
|
|
439
|
+
ctx: ExtensionCommandContext,
|
|
440
|
+
registry: ServerRegistry,
|
|
441
|
+
record: ServerRecord,
|
|
442
|
+
): Promise<void> {
|
|
443
|
+
const confirm = await ctx.ui.select(`Remove ${record.label}?`, ["Cancel", "Remove server"]);
|
|
444
|
+
if (confirm !== "Remove server") return;
|
|
445
|
+
unregisterServer(pi, record);
|
|
446
|
+
await registry.remove(record.id);
|
|
447
|
+
ctx.ui.notify(`Crossbar: removed ${record.label}.`, "info");
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Open the manage overlay for an already-registered server: show the
|
|
452
|
+
* capability-filtered action menu and dispatch the chosen action.
|
|
453
|
+
*/
|
|
454
|
+
export async function openServerActions(
|
|
455
|
+
pi: ExtensionAPI,
|
|
456
|
+
ctx: ExtensionCommandContext,
|
|
457
|
+
deps: OnboardingDeps,
|
|
458
|
+
record: ServerRecord,
|
|
459
|
+
): Promise<void> {
|
|
460
|
+
const { registry } = deps;
|
|
461
|
+
const adapter = adapterFor(record.kind);
|
|
462
|
+
|
|
463
|
+
const choice = await selectOverlay(
|
|
464
|
+
ctx,
|
|
465
|
+
`Manage — ${record.label}`,
|
|
466
|
+
buildManageItems(adapter),
|
|
467
|
+
"↑↓ navigate · Enter select · Esc close",
|
|
468
|
+
);
|
|
469
|
+
if (!choice) return;
|
|
470
|
+
|
|
471
|
+
switch (choice) {
|
|
472
|
+
case "switch":
|
|
473
|
+
await performModelAction(ctx, registry, record, "switch");
|
|
474
|
+
break;
|
|
475
|
+
case "load":
|
|
476
|
+
await performModelAction(ctx, registry, record, "load");
|
|
477
|
+
break;
|
|
478
|
+
case "unload":
|
|
479
|
+
await performUnload(ctx, registry, record);
|
|
480
|
+
break;
|
|
481
|
+
case "introspect":
|
|
482
|
+
await performIntrospect(ctx, registry, record);
|
|
483
|
+
break;
|
|
484
|
+
case "remove":
|
|
485
|
+
await performRemove(pi, ctx, registry, record);
|
|
486
|
+
break;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
180
490
|
// ─── Overlay flow driver ────────────────────────────────────────────────────
|
|
181
491
|
|
|
182
492
|
export interface OnboardingDeps {
|
|
@@ -203,7 +513,7 @@ export interface OnboardingDeps {
|
|
|
203
513
|
* @param deps - injected registry + discover function (for testability)
|
|
204
514
|
*/
|
|
205
515
|
export async function openOnboarding(
|
|
206
|
-
|
|
516
|
+
pi: ExtensionAPI,
|
|
207
517
|
ctx: ExtensionCommandContext,
|
|
208
518
|
deps: OnboardingDeps,
|
|
209
519
|
): Promise<void> {
|
|
@@ -299,7 +609,15 @@ export async function openOnboarding(
|
|
|
299
609
|
|
|
300
610
|
targetBaseUrl = normalizedUrl;
|
|
301
611
|
} else {
|
|
302
|
-
//
|
|
612
|
+
// Already-registered server (discovered or offline) → open the manage overlay
|
|
613
|
+
// instead of re-running the add flow.
|
|
614
|
+
const existingRecord = registry.list().find((r) => r.baseUrl === chosenBaseUrl);
|
|
615
|
+
if (existingRecord) {
|
|
616
|
+
await openServerActions(pi, ctx, deps, existingRecord);
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// New discovered server path
|
|
303
621
|
discoveredServer = discovered.find((s) => s.baseUrl === chosenBaseUrl);
|
|
304
622
|
targetBaseUrl = chosenBaseUrl;
|
|
305
623
|
}
|
|
@@ -368,47 +686,11 @@ export async function openOnboarding(
|
|
|
368
686
|
}
|
|
369
687
|
|
|
370
688
|
// ── Step 5: pick default model ─────────────────────────────────────────────
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
378
|
-
container.addChild(
|
|
379
|
-
new Text(theme.fg("accent", theme.bold(`Pick default model — ${discoveredServer!.label}`))),
|
|
380
|
-
);
|
|
381
|
-
|
|
382
|
-
const list = new SelectList(
|
|
383
|
-
modelItems,
|
|
384
|
-
Math.min(modelItems.length, 12),
|
|
385
|
-
getSelectListTheme(),
|
|
386
|
-
);
|
|
387
|
-
|
|
388
|
-
list.onSelect = (item) => done(item.value);
|
|
389
|
-
list.onCancel = () => done(null);
|
|
390
|
-
|
|
391
|
-
container.addChild(list);
|
|
392
|
-
container.addChild(
|
|
393
|
-
new Text(theme.fg("dim", "↑↓ navigate · Enter select · Esc skip")),
|
|
394
|
-
);
|
|
395
|
-
container.addChild(new DynamicBorder((s) => theme.fg("accent", s)));
|
|
396
|
-
|
|
397
|
-
return {
|
|
398
|
-
render: (width: number) => container.render(width),
|
|
399
|
-
invalidate: () => container.invalidate(),
|
|
400
|
-
handleInput: (data: string) => {
|
|
401
|
-
// Allow Esc to skip model selection
|
|
402
|
-
if (matchesKey(data, "escape")) {
|
|
403
|
-
done(null);
|
|
404
|
-
return;
|
|
405
|
-
}
|
|
406
|
-
list.handleInput(data);
|
|
407
|
-
_tui.requestRender();
|
|
408
|
-
},
|
|
409
|
-
};
|
|
410
|
-
},
|
|
411
|
-
{ overlay: true, overlayOptions: { width: "60%" } },
|
|
689
|
+
const chosenModelId = await selectOverlay(
|
|
690
|
+
ctx,
|
|
691
|
+
`Pick default model — ${discoveredServer.label}`,
|
|
692
|
+
buildModelItems(models),
|
|
693
|
+
"↑↓ navigate · Enter select · Esc skip",
|
|
412
694
|
);
|
|
413
695
|
|
|
414
696
|
// chosenModelId === null means the user skipped — still register the server
|