pi-llama-cpp 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -27
- package/package.json +5 -4
- package/src/constants.ts +9 -4
- package/src/enums/action.ts +3 -2
- package/src/enums/mode.ts +1 -0
- package/src/enums/status.ts +1 -0
- package/src/index.ts +33 -28
- package/src/interfaces/auth.ts +1 -5
- package/src/interfaces/endpoints/props.ts +1 -0
- package/src/managers/command.ts +290 -0
- package/src/managers/events.ts +63 -0
- package/src/managers/server.ts +71 -0
- package/src/models/baseModel.ts +68 -20
- package/src/models/legacyModel.ts +45 -0
- package/src/models/routerModel.ts +7 -30
- package/src/models/singleModel.ts +9 -6
- package/src/resolver.ts +123 -0
- package/src/server.ts +171 -0
- package/tests/commandManager.test.ts +182 -133
- package/tests/legacyModel.test.ts +112 -0
- package/tests/mocks.ts +97 -0
- package/tests/resolver.test.ts +163 -104
- package/tests/routerModel.test.ts +46 -68
- package/tests/server.test.ts +175 -0
- package/tests/serverManager.test.ts +117 -0
- package/tests/singleModel.test.ts +21 -29
- package/src/commands/models.ts +0 -228
- package/src/events.ts +0 -26
- package/src/manager.ts +0 -96
- package/src/tools/resolver.ts +0 -136
- package/src/tools/retriever.ts +0 -71
- package/tests/handlers.test.ts +0 -164
- package/tests/modelsCommand.test.ts +0 -270
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi-llama-cpp
|
|
2
2
|
|
|
3
|
-
A [Pi Coding Agent](https://pi.dev/) extension that integrates with
|
|
3
|
+
A [Pi Coding Agent](https://pi.dev/) extension that integrates with running [llama.cpp servers](https://github.com/ggml-org/llama.cpp) to provide live model browsing, loading, and switching directly from Pi.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
@@ -10,20 +10,25 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with a running [l
|
|
|
10
10
|
- **Multi-model router support** — works with both single-model and multi-model llama.cpp server configurations
|
|
11
11
|
- **Image capabilities detection** — detects multimodal models automatically
|
|
12
12
|
- **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
|
|
13
|
+
- **Auth support** — allows to login into a llama.cpp server that was secured with an API key
|
|
14
|
+
- **Multiple server support** — connect to multiple llama.cpp servers simultaneously by separating URLs with semicolons
|
|
13
15
|
|
|
14
16
|
### Status Indicators
|
|
15
17
|
|
|
16
|
-
| Icon | Status
|
|
17
|
-
|
|
18
|
-
| 🟢
|
|
19
|
-
| 🟡
|
|
20
|
-
| 🔴
|
|
21
|
-
| 🔵
|
|
22
|
-
| ⚪
|
|
18
|
+
| Icon | Status | Description |
|
|
19
|
+
| ---- | ------------ | -------------------------------------- |
|
|
20
|
+
| 🟢 | Loaded | Model is active and ready to use |
|
|
21
|
+
| 🟡 | Loading | Model is currently being loaded |
|
|
22
|
+
| 🔴 | Failed | Model failed to load |
|
|
23
|
+
| 🔵 | Sleeping | Model is available, but inactive |
|
|
24
|
+
| ⚪ | Unloaded | Model is not loaded on the server |
|
|
25
|
+
| ⛔ | Unauthorized | Model can't be used (API key required) |
|
|
23
26
|
|
|
24
27
|
> **Note**: The `Sleeping` status only shows when you start your server with `llama-server --sleep-idle-seconds <n> ...`.
|
|
25
|
-
This is a **llama.cpp server flag** that tells the server to put idle models to sleep after `n` seconds.
|
|
26
|
-
The model awakens automatically when you send a message.
|
|
28
|
+
> This is a **llama.cpp server flag** that tells the server to put idle models to sleep after `n` seconds.
|
|
29
|
+
> The model awakens automatically when you send a message.
|
|
30
|
+
|
|
31
|
+
> **Note:** You can run your server with API authentication with `llama-server --api-key <your key> ...`.
|
|
27
32
|
|
|
28
33
|
## Installation
|
|
29
34
|
|
|
@@ -41,7 +46,7 @@ pi install https://github.com/gsanhueza/pi-llama-cpp
|
|
|
41
46
|
|
|
42
47
|
## Configuration
|
|
43
48
|
|
|
44
|
-
The extension resolves the llama.cpp server URL using the following priority order:
|
|
49
|
+
The extension resolves the llama.cpp server URL(s) using the following priority order:
|
|
45
50
|
|
|
46
51
|
1. **Per-project config** — `.pi/llama-server.json` in your project root:
|
|
47
52
|
|
|
@@ -63,19 +68,33 @@ The extension resolves the llama.cpp server URL using the following priority ord
|
|
|
63
68
|
|
|
64
69
|
4. **Default** — `http://127.0.0.1:8080`
|
|
65
70
|
|
|
66
|
-
###
|
|
71
|
+
### Multiple Servers
|
|
72
|
+
|
|
73
|
+
To connect to multiple llama.cpp servers simultaneously, add your URLs as a single string **separated with semicolons** in any of the examples above:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Example for env, but you can use any of the other methods
|
|
77
|
+
LLAMA_SERVER_URL="http://127.0.0.1:8080;http://127.0.0.1:8081;http://10.0.0.5:8080"
|
|
78
|
+
```
|
|
67
79
|
|
|
68
|
-
|
|
80
|
+
Each server gets its own provider (e.g., **Llama.cpp (http://127.0.0.1:8080)**) and its own set of models. The `/models` command lists all models from all servers, labeled with their server URL.
|
|
81
|
+
|
|
82
|
+
### API Key
|
|
69
83
|
|
|
70
|
-
|
|
84
|
+
If your llama.cpp server requires authentication, use `/login` in Pi, select the "API key" option, and choose the provider from the list that correlates with the server needing the API key.
|
|
71
85
|
|
|
72
|
-
|
|
86
|
+
Alternatively, configure the API key in `~/.pi/agent/auth.json`:
|
|
87
|
+
Use the provider ID `llama-server=<url>`:
|
|
73
88
|
|
|
74
89
|
```json
|
|
75
90
|
{
|
|
76
|
-
"llama-server": {
|
|
91
|
+
"llama-server=http://127.0.0.1:8080": {
|
|
77
92
|
"type": "api_key",
|
|
78
|
-
"key": "<
|
|
93
|
+
"key": "<key-for-server-1>"
|
|
94
|
+
},
|
|
95
|
+
"llama-server=https://some-url-for-llama-cpp": {
|
|
96
|
+
"type": "api_key",
|
|
97
|
+
"key": "<key-for-server-2>"
|
|
79
98
|
}
|
|
80
99
|
}
|
|
81
100
|
```
|
|
@@ -98,22 +117,32 @@ llama-server --models-preset path/to/presets.ini ...
|
|
|
98
117
|
llama-server --model path/to/model.gguf ...
|
|
99
118
|
```
|
|
100
119
|
|
|
120
|
+
- For legacy-model mode (e.g., [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp)), the extension auto-detects and handles it transparently.
|
|
121
|
+
|
|
122
|
+
> **Note:** This extension is focused on llama.cpp, not on ik_llama.cpp. Nonetheless, since I found a way to make it work with this extension, I added the option.
|
|
123
|
+
|
|
124
|
+
> **Note:** The ik_llama.cpp fork is not legacy at all, but it uses an old way of describing models compared to llama.cpp.
|
|
125
|
+
|
|
101
126
|
The extension determines the context size as follows:
|
|
127
|
+
|
|
102
128
|
- **Router mode**
|
|
103
129
|
- When loaded, reads `meta.n_ctx` from the `/models` endpoint
|
|
104
130
|
- When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments, or `ctx-size` and/or `fit-ctx` keys from the **presets.ini** file.
|
|
105
131
|
- **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
|
|
132
|
+
- **Legacy mode** — reads `max_model_len` from `/models`, falling back to `n_ctx` from `/props`
|
|
106
133
|
- Falls back to `128000` if not available
|
|
107
134
|
|
|
108
135
|
### Commands
|
|
109
136
|
|
|
110
|
-
| Command | Description
|
|
111
|
-
| ---------------- |
|
|
112
|
-
| `/models` | Browse your models with live status. Select a model to load, switch, or unload it.
|
|
113
|
-
| `/models info` | Show detailed information for all available models at once.
|
|
114
|
-
| `/models unload` | Unload all loaded models at once
|
|
137
|
+
| Command | Description |
|
|
138
|
+
| ---------------- | ---------------------------------------------------------------------------------- |
|
|
139
|
+
| `/models` | Browse your models with live status. Select a model to load, switch, or unload it. |
|
|
140
|
+
| `/models info` | Show detailed information for all available models at once. |
|
|
141
|
+
| `/models unload` | Unload all loaded models at once. |
|
|
142
|
+
|
|
143
|
+
> **Note:** When a llama.cpp server is unreachable, `/models` displays an error notification with the configured server URL, but healthy servers continue to show their models.
|
|
115
144
|
|
|
116
|
-
> **Note:**
|
|
145
|
+
> **Note:** The `/models unload` command only makes sense in router mode.
|
|
117
146
|
|
|
118
147
|
### Model Actions
|
|
119
148
|
|
|
@@ -126,7 +155,7 @@ When browsing models via the `/models` command, you can:
|
|
|
126
155
|
- **Info** — View model details (ID, capabilities, context size)
|
|
127
156
|
- **Cancel** — Cancel the current operation
|
|
128
157
|
|
|
129
|
-
> **Note:** In single-model mode,
|
|
158
|
+
> **Note:** In single-model and legacy-model mode, **Unload** is not available, since there is only one model on the server.
|
|
130
159
|
|
|
131
160
|
### Model Selection Event
|
|
132
161
|
|
|
@@ -137,6 +166,7 @@ This keeps the server in sync with the active model in Pi, regardless of how the
|
|
|
137
166
|
### Loading Models
|
|
138
167
|
|
|
139
168
|
When you trigger a load, switch, or retry action, the extension polls the server to track progress. If a model takes longer than **60 seconds** to load, the polling times out with an error.
|
|
169
|
+
|
|
140
170
|
> **Note:** The timeout is only for the polling. The model might still be loading.
|
|
141
171
|
|
|
142
172
|
### Model Configuration
|
|
@@ -149,6 +179,7 @@ Each model exposed to Pi includes the following defaults:
|
|
|
149
179
|
|
|
150
180
|
## Dependencies
|
|
151
181
|
|
|
152
|
-
|
|
|
153
|
-
| --------------------------------- |
|
|
154
|
-
| `@earendil-works/pi-coding-agent` | Pi Coding Agent SDK
|
|
182
|
+
| Peer dependency | Purpose |
|
|
183
|
+
| --------------------------------- | ------------------- |
|
|
184
|
+
| `@earendil-works/pi-coding-agent` | Pi Coding Agent SDK |
|
|
185
|
+
| `@earendil-works/pi-tui` | Pi TUI SDK |
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-llama-cpp",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Pi extension for llama.cpp integration. Supports
|
|
3
|
+
"version": "0.6.0",
|
|
4
|
+
"description": "Pi extension for llama.cpp integration. Supports router, single and legacy models. Supports multiple servers.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi",
|
|
7
7
|
"pi-package",
|
|
@@ -32,11 +32,12 @@
|
|
|
32
32
|
]
|
|
33
33
|
},
|
|
34
34
|
"peerDependencies": {
|
|
35
|
-
"@earendil-works/pi-coding-agent": "*"
|
|
35
|
+
"@earendil-works/pi-coding-agent": "*",
|
|
36
|
+
"@earendil-works/pi-tui": "*"
|
|
36
37
|
},
|
|
37
38
|
"devDependencies": {
|
|
38
39
|
"@types/node": "^25.9.1",
|
|
39
40
|
"prettier-plugin-organize-imports": "^4.3.0",
|
|
40
|
-
"vitest": "^4.1.
|
|
41
|
+
"vitest": "^4.1.8"
|
|
41
42
|
}
|
|
42
43
|
}
|
package/src/constants.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* This provider's
|
|
2
|
+
* This provider's base ID
|
|
3
3
|
*/
|
|
4
|
-
export const
|
|
4
|
+
export const PROVIDER_PREFIX = "llama-server";
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* This provider's name
|
|
@@ -9,15 +9,20 @@ export const PROVIDER_ID = "llama-server";
|
|
|
9
9
|
export const PROVIDER_NAME = "Llama.cpp";
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
|
-
* The default
|
|
12
|
+
* The default API type used in Pi
|
|
13
13
|
*/
|
|
14
|
-
export const
|
|
14
|
+
export const API_TYPE = "openai-completions";
|
|
15
15
|
|
|
16
16
|
/**
|
|
17
17
|
* The placeholder api-key if it couldn't be resolved
|
|
18
18
|
*/
|
|
19
19
|
export const API_KEY_PLACEHOLDER = "sk-placeholder";
|
|
20
20
|
|
|
21
|
+
/**
|
|
22
|
+
* The default URL if the resolver couldn't find it
|
|
23
|
+
*/
|
|
24
|
+
export const DEFAULT_LLAMA_SERVER_URL = "http://127.0.0.1:8080";
|
|
25
|
+
|
|
21
26
|
/**
|
|
22
27
|
* The default context if the server didn't expose it
|
|
23
28
|
*/
|
package/src/enums/action.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
/** The possible actions for the /models command */
|
|
2
2
|
export enum Action {
|
|
3
|
+
LOAD_AND_SWITCH = "Load & switch",
|
|
3
4
|
SWITCH = "Switch model",
|
|
4
|
-
|
|
5
|
-
LOAD = "Load & switch",
|
|
5
|
+
LOAD = "Load only",
|
|
6
6
|
UNLOAD = "Unload",
|
|
7
|
+
RETRY = "Retry",
|
|
7
8
|
INFO = "Info",
|
|
8
9
|
CANCEL = "Cancel",
|
|
9
10
|
}
|
package/src/enums/mode.ts
CHANGED
package/src/enums/status.ts
CHANGED
package/src/index.ts
CHANGED
|
@@ -1,42 +1,47 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
ExtensionAPI,
|
|
3
3
|
ExtensionCommandContext,
|
|
4
|
+
ExtensionContext,
|
|
5
|
+
SessionBeforeSwitchEvent,
|
|
4
6
|
} from "@earendil-works/pi-coding-agent";
|
|
5
|
-
import type { AutocompleteItem } from "@earendil-works/pi-tui";
|
|
6
|
-
import { onSessionBeforeSwitch } from "./commands/models";
|
|
7
7
|
import { PROVIDER_NAME } from "./constants";
|
|
8
|
-
import {
|
|
9
|
-
import { CommandManager } from "./
|
|
8
|
+
import { ModelSelectEvent } from "./interfaces/events";
|
|
9
|
+
import { CommandManager } from "./managers/command";
|
|
10
|
+
import { EventManager } from "./managers/events";
|
|
11
|
+
import { ServerManager } from "./managers/server";
|
|
12
|
+
import { ConfigResolver } from "./resolver";
|
|
13
|
+
import { Server } from "./server";
|
|
10
14
|
|
|
11
15
|
export default async function (pi: ExtensionAPI) {
|
|
12
|
-
const
|
|
13
|
-
await
|
|
16
|
+
const resolver = new ConfigResolver();
|
|
17
|
+
const urls = await resolver.resolveUrls(process.cwd());
|
|
18
|
+
const servers = urls.map((url) => new Server(url));
|
|
14
19
|
|
|
15
|
-
|
|
20
|
+
const eventManager = new EventManager(servers);
|
|
21
|
+
const serverManager = new ServerManager(servers);
|
|
22
|
+
const commandManager = new CommandManager(serverManager);
|
|
23
|
+
|
|
24
|
+
// Register providers once at startup
|
|
25
|
+
await serverManager.registerAllProviders(pi);
|
|
26
|
+
|
|
27
|
+
// Single global /models command
|
|
16
28
|
pi.registerCommand("models", {
|
|
17
29
|
description: `Browse ${PROVIDER_NAME} models`,
|
|
18
|
-
getArgumentCompletions:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
value: "info",
|
|
22
|
-
label: "info",
|
|
23
|
-
description: "Show information of all models",
|
|
24
|
-
},
|
|
25
|
-
{
|
|
26
|
-
value: "unload",
|
|
27
|
-
label: "unload",
|
|
28
|
-
description: "Unload all models",
|
|
29
|
-
},
|
|
30
|
-
];
|
|
31
|
-
|
|
32
|
-
const filtered = available.filter((a) => a.value.startsWith(prefix));
|
|
33
|
-
return filtered.length > 0 ? filtered : null;
|
|
30
|
+
getArgumentCompletions: commandManager.getArgumentCompletions,
|
|
31
|
+
handler: async (args: string, ctx: ExtensionCommandContext) => {
|
|
32
|
+
await commandManager.handleCommand(args, ctx, pi);
|
|
34
33
|
},
|
|
35
|
-
handler: async (args: string, ctx: ExtensionCommandContext) =>
|
|
36
|
-
await manager.run(args, ctx),
|
|
37
34
|
});
|
|
38
35
|
|
|
39
|
-
// Events
|
|
40
|
-
pi.on(
|
|
41
|
-
|
|
36
|
+
// Events
|
|
37
|
+
pi.on(
|
|
38
|
+
"model_select",
|
|
39
|
+
async (event: ModelSelectEvent, ctx: ExtensionContext) =>
|
|
40
|
+
await eventManager.onModelSelect(event, ctx),
|
|
41
|
+
);
|
|
42
|
+
pi.on(
|
|
43
|
+
"session_before_switch",
|
|
44
|
+
async (_: SessionBeforeSwitchEvent, ctx: ExtensionContext) =>
|
|
45
|
+
await eventManager.onSessionBeforeSwitch(ctx),
|
|
46
|
+
);
|
|
42
47
|
}
|
package/src/interfaces/auth.ts
CHANGED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ExtensionAPI,
|
|
3
|
+
ExtensionCommandContext,
|
|
4
|
+
} from "@earendil-works/pi-coding-agent";
|
|
5
|
+
import { AutocompleteItem } from "@earendil-works/pi-tui";
|
|
6
|
+
import { PROVIDER_NAME } from "../constants";
|
|
7
|
+
import { Action } from "../enums/action";
|
|
8
|
+
import { Mode } from "../enums/mode";
|
|
9
|
+
import { Status } from "../enums/status";
|
|
10
|
+
import { BaseModel } from "../models/baseModel";
|
|
11
|
+
import { EventManager } from "./events";
|
|
12
|
+
import { ServerManager } from "./server";
|
|
13
|
+
|
|
14
|
+
export class CommandManager {
|
|
15
|
+
constructor(private readonly serverManager: ServerManager) {}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Sets up the argument completions for the `/models` command
|
|
19
|
+
*
|
|
20
|
+
* @param prefix Prefix written by the user
|
|
21
|
+
* @returns Completions with that prefix
|
|
22
|
+
*/
|
|
23
|
+
getArgumentCompletions(prefix: string): AutocompleteItem[] | null {
|
|
24
|
+
const available = [
|
|
25
|
+
{
|
|
26
|
+
value: "info",
|
|
27
|
+
label: "info",
|
|
28
|
+
description: "Show information of all models",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
value: "unload",
|
|
32
|
+
label: "unload",
|
|
33
|
+
description: "Unload all models",
|
|
34
|
+
},
|
|
35
|
+
];
|
|
36
|
+
const filtered = available.filter((a) => a.value.startsWith(prefix));
|
|
37
|
+
return filtered.length > 0 ? filtered : null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Executes the action for the `/models` command
|
|
42
|
+
*
|
|
43
|
+
* @param args Arguments of the command
|
|
44
|
+
* @param ctx The context used by Pi
|
|
45
|
+
* @param pi The Pi extension
|
|
46
|
+
*/
|
|
47
|
+
async handleCommand(
|
|
48
|
+
args: string,
|
|
49
|
+
ctx: ExtensionCommandContext,
|
|
50
|
+
pi: ExtensionAPI,
|
|
51
|
+
) {
|
|
52
|
+
// Re-register providers so Pi sees updated model states
|
|
53
|
+
await this.serverManager.registerAllProviders(pi);
|
|
54
|
+
|
|
55
|
+
// Notify about unreachable servers
|
|
56
|
+
for (const url of this.serverManager.failedUrls) {
|
|
57
|
+
this.notifyNotFound(ctx, url);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (args === "unload") {
|
|
61
|
+
await Promise.all(
|
|
62
|
+
this.serverManager.getAllModels().map((model) => model.unload()),
|
|
63
|
+
);
|
|
64
|
+
ctx.ui.notify(`Unloaded all ${PROVIDER_NAME} models`, "info");
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (args === "info") {
|
|
69
|
+
const infos = await Promise.all(
|
|
70
|
+
this.serverManager.getAllModels().map((model) => model.getInfo()),
|
|
71
|
+
);
|
|
72
|
+
ctx.ui.notify(ctx.ui.theme.fg("accent", infos.join("\n")), "info");
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Interactive menu: show <name> (<server_url>)
|
|
77
|
+
await this.runModelsMenu(ctx, pi);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Notifies the user that a server is unreachable.
|
|
82
|
+
*/
|
|
83
|
+
private notifyNotFound(ctx: ExtensionCommandContext, url: string): void {
|
|
84
|
+
ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Runs the interactive model selection menu.
|
|
89
|
+
*/
|
|
90
|
+
private async runModelsMenu(
|
|
91
|
+
ctx: ExtensionCommandContext,
|
|
92
|
+
pi: ExtensionAPI,
|
|
93
|
+
): Promise<void> {
|
|
94
|
+
const event = await this.modelSelectionHandler(
|
|
95
|
+
ctx,
|
|
96
|
+
this.serverManager.getAllModels(),
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
if (!event) return;
|
|
100
|
+
const { action, model } = event;
|
|
101
|
+
|
|
102
|
+
// Action: Cancel
|
|
103
|
+
if (!action || action === Action.CANCEL) return;
|
|
104
|
+
|
|
105
|
+
// Action: Info
|
|
106
|
+
if (action === Action.INFO) {
|
|
107
|
+
const info = await model.getInfo();
|
|
108
|
+
ctx.ui.notify(`${info}`, "info");
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Action: Unload
|
|
113
|
+
if (action === Action.UNLOAD) {
|
|
114
|
+
await model.unload();
|
|
115
|
+
ctx.ui.notify(`Unloaded ${model.name}`, "info");
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Action: Switch
|
|
120
|
+
if (action === Action.SWITCH) {
|
|
121
|
+
const { serverId } = model;
|
|
122
|
+
const piModel = ctx.modelRegistry.find(serverId, model.id);
|
|
123
|
+
if (!piModel)
|
|
124
|
+
throw new Error(`Cannot find model ${model.name} in pi registry`);
|
|
125
|
+
|
|
126
|
+
await pi.setModel(piModel);
|
|
127
|
+
ctx.ui.notify(`Model ${model.name} ready`, "info");
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Actions: Load / Load & Switch / Retry
|
|
132
|
+
const loadActions = [Action.LOAD, Action.LOAD_AND_SWITCH, Action.RETRY];
|
|
133
|
+
if (loadActions.includes(action)) {
|
|
134
|
+
ctx.ui.notify(`Loading ${model.name}...`, "info");
|
|
135
|
+
EventManager.inflightModel = model;
|
|
136
|
+
|
|
137
|
+
const onSuccess = async () => {
|
|
138
|
+
const { serverId } = model;
|
|
139
|
+
const piModel = ctx.modelRegistry.find(serverId, model.id);
|
|
140
|
+
if (!piModel)
|
|
141
|
+
throw new Error(`Cannot find model ${model.name} in pi registry`);
|
|
142
|
+
|
|
143
|
+
// Verify auth
|
|
144
|
+
if ((await model.getStatus()) === Status.UNAUTHORIZED)
|
|
145
|
+
throw new Error(
|
|
146
|
+
`Unauthorized for ${model.name}. Use /login and add your API key.`,
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
// Verify failure
|
|
150
|
+
if ((await model.getStatus()) === Status.FAILED)
|
|
151
|
+
throw new Error(`Failed to load model ${model.name}`);
|
|
152
|
+
|
|
153
|
+
// Select the model if asked
|
|
154
|
+
if (action === Action.LOAD_AND_SWITCH) await pi.setModel(piModel);
|
|
155
|
+
|
|
156
|
+
ctx.ui.notify(`Model ${model.name} ready`, "info");
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const onFailure = (err: any) => {
|
|
160
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
ctx.ui.notify(message, "error");
|
|
164
|
+
} catch {
|
|
165
|
+
// ctx went stale between error and notification
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Load the model without blocking the UI
|
|
170
|
+
model
|
|
171
|
+
.load()
|
|
172
|
+
.then(onSuccess)
|
|
173
|
+
.catch(onFailure)
|
|
174
|
+
.finally(EventManager.resetInflightModel);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Handles the menu for model selection.
|
|
180
|
+
* Loops: select model → select action → handle action.
|
|
181
|
+
*
|
|
182
|
+
* Escape on actions menu goes back to model selection.
|
|
183
|
+
* Escape on model selection exits.
|
|
184
|
+
*
|
|
185
|
+
* @returns The selected action and model
|
|
186
|
+
*/
|
|
187
|
+
private async modelSelectionHandler(
|
|
188
|
+
ctx: ExtensionCommandContext,
|
|
189
|
+
models: BaseModel[],
|
|
190
|
+
): Promise<{ action: Action; model: BaseModel } | null> {
|
|
191
|
+
while (true) {
|
|
192
|
+
// Select the model
|
|
193
|
+
const model = await this.selectModel(ctx, models);
|
|
194
|
+
if (!model) return null;
|
|
195
|
+
|
|
196
|
+
// Select the action
|
|
197
|
+
const actions = await this.getActionsForModel(model);
|
|
198
|
+
const action = await this.selectAction(ctx, model, actions);
|
|
199
|
+
if (action === null) {
|
|
200
|
+
// Escape key pressed => back to model selection
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Return the selected action and model
|
|
205
|
+
return { action, model };
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Select a model from the list. Returns null if user cancels.
|
|
211
|
+
*
|
|
212
|
+
* @returns The model selected by the user
|
|
213
|
+
*/
|
|
214
|
+
private async selectModel(
|
|
215
|
+
ctx: ExtensionCommandContext,
|
|
216
|
+
models: BaseModel[],
|
|
217
|
+
): Promise<BaseModel | null> {
|
|
218
|
+
const labels = await Promise.all(
|
|
219
|
+
models.map(async (model) => ({
|
|
220
|
+
label: (await model.getLabel()).trim(),
|
|
221
|
+
serverUrl: model.serverUrl,
|
|
222
|
+
})),
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
// Count grapheme clusters (not UTF-16 code units) so emoji padding aligns visually
|
|
226
|
+
const graphemeLength = (str: string) =>
|
|
227
|
+
[...new Intl.Segmenter().segment(str)].length;
|
|
228
|
+
|
|
229
|
+
// Decorate the label so the spacing makes it seem more like a table
|
|
230
|
+
const maxLength = Math.max(
|
|
231
|
+
...labels.map(({ label }) => graphemeLength(label)),
|
|
232
|
+
);
|
|
233
|
+
const choices = labels.map(({ label, serverUrl }) => {
|
|
234
|
+
const extraPadding = 2;
|
|
235
|
+
const padLen = maxLength - graphemeLength(label) + extraPadding;
|
|
236
|
+
return `${label}${" ".repeat(padLen)} [Server: ${serverUrl}]`;
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, choices);
|
|
240
|
+
if (!choice) return null;
|
|
241
|
+
const idx = choices.indexOf(choice);
|
|
242
|
+
|
|
243
|
+
return models[idx];
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Get available actions for a model based on its mode and status.
|
|
248
|
+
*
|
|
249
|
+
* @returns A mapping of actions for each status
|
|
250
|
+
*/
|
|
251
|
+
private async getActionsForModel(model: BaseModel): Promise<Array<Action>> {
|
|
252
|
+
const allActions: Record<Status, Array<Action>> = {
|
|
253
|
+
[Status.LOADED]:
|
|
254
|
+
model.mode === Mode.ROUTER
|
|
255
|
+
? [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL]
|
|
256
|
+
: [Action.SWITCH, Action.INFO, Action.CANCEL],
|
|
257
|
+
[Status.LOADING]: [Action.INFO, Action.CANCEL],
|
|
258
|
+
[Status.FAILED]: [Action.RETRY, Action.CANCEL],
|
|
259
|
+
[Status.SLEEPING]: [
|
|
260
|
+
Action.SWITCH,
|
|
261
|
+
Action.UNLOAD,
|
|
262
|
+
Action.INFO,
|
|
263
|
+
Action.CANCEL,
|
|
264
|
+
],
|
|
265
|
+
[Status.UNLOADED]: [Action.LOAD_AND_SWITCH, Action.LOAD, Action.CANCEL],
|
|
266
|
+
[Status.UNAUTHORIZED]: [Action.INFO, Action.CANCEL],
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
const status = await model.getStatus();
|
|
270
|
+
return allActions[status];
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Selects an action for a model.
|
|
275
|
+
*
|
|
276
|
+
* @returns The selected action
|
|
277
|
+
*/
|
|
278
|
+
private async selectAction(
|
|
279
|
+
ctx: ExtensionCommandContext,
|
|
280
|
+
model: BaseModel,
|
|
281
|
+
actions: Array<Action>,
|
|
282
|
+
): Promise<Action | null> {
|
|
283
|
+
const labels = actions.map((a) => String(a));
|
|
284
|
+
const choice = await ctx.ui.select(`${model.name}`, labels);
|
|
285
|
+
if (!choice) return null;
|
|
286
|
+
|
|
287
|
+
const idx = labels.indexOf(choice);
|
|
288
|
+
return actions[idx];
|
|
289
|
+
}
|
|
290
|
+
}
|