pi-llama-cpp 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ import type {
2
+ ExtensionAPI,
3
+ ExtensionCommandContext,
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { AutocompleteItem } from "@earendil-works/pi-tui";
6
+ import { PROVIDER_NAME } from "../constants";
7
+ import { Action } from "../enums/action";
8
+ import { Mode } from "../enums/mode";
9
+ import { Status } from "../enums/status";
10
+ import { BaseModel } from "../models/baseModel";
11
+ import { EventManager } from "./events";
12
+ import { ServerManager } from "./server";
13
+
14
+ export class CommandManager {
15
+ constructor(private readonly serverManager: ServerManager) {}
16
+
17
+ /**
18
+ * Sets up the argument completions for the `/models` command
19
+ *
20
+ * @param prefix Prefix written by the user
21
+ * @returns Completions with that prefix
22
+ */
23
+ getArgumentCompletions(prefix: string): AutocompleteItem[] | null {
24
+ const available = [
25
+ {
26
+ value: "info",
27
+ label: "info",
28
+ description: "Show information of all models",
29
+ },
30
+ {
31
+ value: "unload",
32
+ label: "unload",
33
+ description: "Unload all models",
34
+ },
35
+ ];
36
+ const filtered = available.filter((a) => a.value.startsWith(prefix));
37
+ return filtered.length > 0 ? filtered : null;
38
+ }
39
+
40
+ /**
41
+ * Executes the action for the `/models` command
42
+ *
43
+ * @param args Arguments of the command
44
+ * @param ctx The context used by Pi
45
+ * @param pi The Pi extension
46
+ */
47
+ async handleCommand(
48
+ args: string,
49
+ ctx: ExtensionCommandContext,
50
+ pi: ExtensionAPI,
51
+ ) {
52
+ // Re-register providers so Pi sees updated model states
53
+ await this.serverManager.update(pi);
54
+
55
+ // Notify about unreachable servers
56
+ for (const url of this.serverManager.failedUrls) {
57
+ this.notifyNotFound(ctx, url);
58
+ }
59
+
60
+ if (args === "unload") {
61
+ await Promise.all(
62
+ this.serverManager.getAllModels().map((model) => model.unload()),
63
+ );
64
+ ctx.ui.notify(`Unloaded all ${PROVIDER_NAME} models`, "info");
65
+ return;
66
+ }
67
+
68
+ if (args === "info") {
69
+ const infos = await Promise.all(
70
+ this.serverManager.getAllModels().map((model) => model.getInfo()),
71
+ );
72
+ ctx.ui.notify(ctx.ui.theme.fg("accent", infos.join("\n")), "info");
73
+ return;
74
+ }
75
+
76
+ // Interactive menu: show <name> (<server_url>)
77
+ await this.runModelsMenu(ctx, pi);
78
+ }
79
+
80
+ /**
81
+ * Notifies the user that a server is unreachable.
82
+ */
83
+ private notifyNotFound(ctx: ExtensionCommandContext, url: string): void {
84
+ ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
85
+ }
86
+
87
+ /**
88
+ * Runs the interactive model selection menu.
89
+ */
90
+ private async runModelsMenu(
91
+ ctx: ExtensionCommandContext,
92
+ pi: ExtensionAPI,
93
+ ): Promise<void> {
94
+ const event = await this.modelSelectionHandler(
95
+ ctx,
96
+ this.serverManager.getAllModels(),
97
+ );
98
+
99
+ if (!event) return;
100
+ const { action, model } = event;
101
+
102
+ // Action: Cancel
103
+ if (!action || action === Action.CANCEL) return;
104
+
105
+ // Action: Info
106
+ if (action === Action.INFO) {
107
+ const info = await model.getInfo();
108
+ ctx.ui.notify(`${info}`, "info");
109
+ return;
110
+ }
111
+
112
+ // Action: Unload
113
+ if (action === Action.UNLOAD) {
114
+ await model.unload();
115
+ ctx.ui.notify(`Unloaded ${model.name}`, "info");
116
+ return;
117
+ }
118
+
119
+ // Action: Switch
120
+ if (action === Action.SWITCH) {
121
+ const { serverId } = model;
122
+ const piModel = ctx.modelRegistry.find(serverId, model.id);
123
+ if (!piModel)
124
+ throw new Error(`Cannot find model ${model.name} in pi registry`);
125
+
126
+ await pi.setModel(piModel);
127
+ ctx.ui.notify(`Model ${model.name} ready`, "info");
128
+ return;
129
+ }
130
+
131
+ // Actions: Load / Load & Switch / Retry
132
+ const loadActions = [Action.LOAD, Action.LOAD_AND_SWITCH, Action.RETRY];
133
+ if (loadActions.includes(action)) {
134
+ ctx.ui.notify(`Loading ${model.name}...`, "info");
135
+ EventManager.inflightModel = model;
136
+
137
+ const onSuccess = async () => {
138
+ const { serverId } = model;
139
+ const piModel = ctx.modelRegistry.find(serverId, model.id);
140
+ if (!piModel)
141
+ throw new Error(`Cannot find model ${model.name} in pi registry`);
142
+
143
+ // Verify auth
144
+ if ((await model.getStatus()) === Status.UNAUTHORIZED)
145
+ throw new Error(
146
+ `Unauthorized for ${model.name}. Use /login and add your API key.`,
147
+ );
148
+
149
+ // Verify failure
150
+ if ((await model.getStatus()) === Status.FAILED)
151
+ throw new Error(`Failed to load model ${model.name}`);
152
+
153
+ // Select the model if asked
154
+ if (action === Action.LOAD_AND_SWITCH) await pi.setModel(piModel);
155
+
156
+ ctx.ui.notify(`Model ${model.name} ready`, "info");
157
+ };
158
+
159
+ const onFailure = (err: any) => {
160
+ const message = err instanceof Error ? err.message : String(err);
161
+
162
+ try {
163
+ ctx.ui.notify(message, "error");
164
+ } catch {
165
+ // ctx went stale between error and notification
166
+ }
167
+ };
168
+
169
+ // Load the model without blocking the UI
170
+ model
171
+ .load()
172
+ .then(onSuccess)
173
+ .catch(onFailure)
174
+ .finally(EventManager.resetInflightModel);
175
+ }
176
+ }
177
+
178
+ /**
179
+ * Handles the menu for model selection.
180
+ * Loops: select model → select action → handle action.
181
+ *
182
+ * Escape on actions menu goes back to model selection.
183
+ * Escape on model selection exits.
184
+ *
185
+ * @returns The selected action and model
186
+ */
187
+ private async modelSelectionHandler(
188
+ ctx: ExtensionCommandContext,
189
+ models: BaseModel[],
190
+ ): Promise<{ action: Action; model: BaseModel } | null> {
191
+ while (true) {
192
+ // Select the model
193
+ const model = await this.selectModel(ctx, models);
194
+ if (!model) return null;
195
+
196
+ // Select the action
197
+ const actions = await this.getActionsForModel(model);
198
+ const action = await this.selectAction(ctx, model, actions);
199
+ if (action === null) {
200
+ // Escape key pressed => back to model selection
201
+ continue;
202
+ }
203
+
204
+ // Return the selected action and model
205
+ return { action, model };
206
+ }
207
+ }
208
+
209
+ /**
210
+ * Select a model from the list. Returns null if user cancels.
211
+ *
212
+ * @returns The model selected by the user
213
+ */
214
+ private async selectModel(
215
+ ctx: ExtensionCommandContext,
216
+ models: BaseModel[],
217
+ ): Promise<BaseModel | null> {
218
+ const labels = await Promise.all(
219
+ models.map(async (model) => ({
220
+ label: (await model.getLabel()).trim(),
221
+ serverUrl: model.serverUrl,
222
+ })),
223
+ );
224
+
225
+ // Count grapheme clusters (not UTF-16 code units) so emoji padding aligns visually
226
+ const graphemeLength = (str: string) =>
227
+ [...new Intl.Segmenter().segment(str)].length;
228
+
229
+ // Decorate the label so the spacing makes it seem more like a table
230
+ const maxLength = Math.max(
231
+ ...labels.map(({ label }) => graphemeLength(label)),
232
+ );
233
+ const choices = labels.map(({ label, serverUrl }) => {
234
+ const extraPadding = 2;
235
+ const padLen = maxLength - graphemeLength(label) + extraPadding;
236
+ return `${label}${" ".repeat(padLen)} [Server: ${serverUrl}]`;
237
+ });
238
+
239
+ const choice = await ctx.ui.select(`${PROVIDER_NAME} models:`, choices);
240
+ if (!choice) return null;
241
+ const idx = choices.indexOf(choice);
242
+
243
+ return models[idx];
244
+ }
245
+
246
+ /**
247
+ * Get available actions for a model based on its mode and status.
248
+ *
249
+ * @returns A mapping of actions for each status
250
+ */
251
+ private async getActionsForModel(model: BaseModel): Promise<Array<Action>> {
252
+ const allActions: Record<Status, Array<Action>> = {
253
+ [Status.LOADED]:
254
+ model.mode === Mode.ROUTER
255
+ ? [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL]
256
+ : [Action.SWITCH, Action.INFO, Action.CANCEL],
257
+ [Status.LOADING]: [Action.INFO, Action.CANCEL],
258
+ [Status.FAILED]: [Action.RETRY, Action.CANCEL],
259
+ [Status.SLEEPING]: [
260
+ Action.SWITCH,
261
+ Action.UNLOAD,
262
+ Action.INFO,
263
+ Action.CANCEL,
264
+ ],
265
+ [Status.UNLOADED]: [Action.LOAD_AND_SWITCH, Action.LOAD, Action.CANCEL],
266
+ [Status.UNAUTHORIZED]: [Action.INFO, Action.CANCEL],
267
+ };
268
+
269
+ const status = await model.getStatus();
270
+ return allActions[status];
271
+ }
272
+
273
+ /**
274
+ * Selects an action for a model.
275
+ *
276
+ * @returns The selected action
277
+ */
278
+ private async selectAction(
279
+ ctx: ExtensionCommandContext,
280
+ model: BaseModel,
281
+ actions: Array<Action>,
282
+ ): Promise<Action | null> {
283
+ const labels = actions.map((a) => String(a));
284
+ const choice = await ctx.ui.select(`${model.name}`, labels);
285
+ if (!choice) return null;
286
+
287
+ const idx = labels.indexOf(choice);
288
+ return actions[idx];
289
+ }
290
+ }
@@ -0,0 +1,101 @@
1
+ import {
2
+ type BeforeProviderRequestEvent,
3
+ type ExtensionContext,
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { READABLE_TIMEOUT } from "../constants";
6
+ import { ModelSelectEvent } from "../interfaces/events";
7
+ import { BaseModel } from "../models/baseModel";
8
+ import { ConfigResolver } from "../resolver";
9
+ import { Server } from "../server";
10
+
11
+ export class EventManager {
12
+ static inflightModel: BaseModel | null = null;
13
+ private readonly resolver = new ConfigResolver();
14
+
15
+ constructor(private readonly servers: Server[]) {}
16
+
17
+ /**
18
+ * Resets the in-flight model reference.
19
+ */
20
+ static resetInflightModel() {
21
+ EventManager.inflightModel = null;
22
+ }
23
+
24
+ /**
25
+ * Reacts to a new model event triggered by Pi
26
+ *
27
+ * @param event Model selection event
28
+ * @param ctx Pi context
29
+ */
30
+ async onModelSelect(event: ModelSelectEvent, ctx: ExtensionContext) {
31
+ for (const { providerId, models } of this.servers) {
32
+ if (event.model.provider !== providerId) continue;
33
+
34
+ const model = models.find((m) => m.id === event.model.id);
35
+ if (!model) continue;
36
+
37
+ ctx.ui.notify(`Loading ${model.name}...`, "info");
38
+ await model
39
+ .load()
40
+ .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
41
+ .catch(() =>
42
+ ctx.ui.notify(`Failed to load model ${model.name}`, "error"),
43
+ );
44
+ return;
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Session-switch handler. Registered once at extension init.
50
+ * Only notifies if a model load is actually in-flight.
51
+ *
52
+ * @param ctx Pi context
53
+ */
54
+ async onSessionBeforeSwitch(ctx: ExtensionContext) {
55
+ if (!EventManager.inflightModel) return;
56
+
57
+ const messages = [
58
+ `Session change detected while model '${EventManager.inflightModel.name}' was still loading.`,
59
+ "Model load will continue in the background, but UI might not update.",
60
+ "",
61
+ "Verify that your new model is loaded, or use /models to re-select it afterwards.",
62
+ ];
63
+ ctx.ui.notify(messages.join("\n"), "warning");
64
+
65
+ // Show the notification for a reasonable amount of time
66
+ await new Promise((r) => setTimeout(r, READABLE_TIMEOUT));
67
+ }
68
+
69
+ /**
70
+ * Intercepts the request to add extra information, useful to llama.cpp.
71
+ * Adds a custom thinking budget to the request payload.
72
+ *
73
+ * @param event Request event
74
+ * @returns Updated payload
75
+ */
76
+ async onBeforeProviderRequest(event: BeforeProviderRequestEvent) {
77
+ const payload = event.payload as { model?: string };
78
+ const { model } = payload;
79
+ if (!model) return payload;
80
+
81
+ // Check if this model belongs to one of our servers
82
+ const isLlamaCpp = this.servers.some((s) =>
83
+ s.models.some((m) => m.id === model),
84
+ );
85
+
86
+ if (!isLlamaCpp) return payload;
87
+
88
+ // Retrieve pi's current thinking level, so we can setup a budget
89
+ const level = this.resolver.resolveThinkingLevel() ?? "medium";
90
+ const budgets = this.resolver.resolveThinkingBudgets();
91
+ const thinking_budget_tokens = budgets[level];
92
+
93
+ // Setup payload
94
+ if (level === "off")
95
+ return { ...payload, chat_template_kwargs: { enable_thinking: false } };
96
+
97
+ if (level === "xhigh") return payload;
98
+
99
+ return { ...payload, thinking_budget_tokens };
100
+ }
101
+ }
@@ -0,0 +1,136 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { API_TYPE, PROVIDER_NAME, SERVER_TIMEOUT } from "../constants";
3
+ import { ServerStatus } from "../enums/serverStatus";
4
+ import { BaseModel } from "../models/baseModel";
5
+ import { Server } from "../server";
6
+
7
+ export class ServerManager {
8
+ readonly failedUrls: string[] = [];
9
+ private readonly warnings: string[] = [];
10
+
11
+ constructor(private readonly servers: Server[]) {}
12
+
13
+ /**
14
+ * Verifies reachability of servers and registers the providers
15
+ *
16
+ * @param pi The Pi extension API
17
+ */
18
+ async initialize(pi: ExtensionAPI) {
19
+ // Register the providers with a timeout first
20
+ await this.update(pi, SERVER_TIMEOUT);
21
+ }
22
+
23
+ /**
24
+ * Registers one provider per server in Pi with their model configurations.
25
+ * The manual awaiting per-server is deliberate (we want them in order)
26
+ *
27
+ * @param pi The Pi extension API
28
+ * @param timeout (Optional) Timeout before assuming server has failed
29
+ */
30
+ async update(pi: ExtensionAPI, timeout?: number) {
31
+ this.failedUrls.length = 0;
32
+
33
+ const registrableServers = timeout
34
+ ? await this.findRegistrableServers(timeout)
35
+ : this.servers;
36
+
37
+ // Initialization and registration
38
+ for (const server of registrableServers) {
39
+ try {
40
+ await server.initialize();
41
+ await this.registerProvider(server, pi);
42
+ } catch {
43
+ this.failedUrls.push(server.baseUrl);
44
+ continue;
45
+ }
46
+ }
47
+ }
48
+
49
+ /**
50
+ * Runs concurrent health checks and returns only healthy servers.
51
+ *
52
+ * @param timeout Maximum time to wait for each server
53
+ * @returns Array of servers that passed the health check
54
+ */
55
+ private async findRegistrableServers(timeout: number): Promise<Server[]> {
56
+ const healthResults = await Promise.all(
57
+ this.servers.map(async (server) => {
58
+ const status = await server.isReady(timeout);
59
+ return { server, status };
60
+ }),
61
+ );
62
+
63
+ const response: Server[] = [];
64
+ for (const { server, status } of healthResults) {
65
+ if (status === ServerStatus.READY) {
66
+ response.push(server);
67
+ } else if (status === ServerStatus.TIMEOUT) {
68
+ const message = [
69
+ "[pi-llama-cpp]",
70
+ `${PROVIDER_NAME} server initialization for '${server.baseUrl}' took more than ${SERVER_TIMEOUT} ms, so it has been skipped.`,
71
+ "Run `/models` to retry without timeout and see all models.",
72
+ ].join("\n");
73
+ this.warnings.push(message);
74
+ this.failedUrls.push(server.baseUrl);
75
+ } else {
76
+ const message = [
77
+ "[pi-llama-cpp]",
78
+ `${PROVIDER_NAME} server at '${server.baseUrl}' is unreachable.`,
79
+ "Check the URL and try again. Run `/models` to retry.",
80
+ ].join("\n");
81
+ this.warnings.push(message);
82
+ this.failedUrls.push(server.baseUrl);
83
+ }
84
+ }
85
+
86
+ return response;
87
+ }
88
+
89
+ /**
90
+ * Creates a Pi provider for the given server
91
+ *
92
+ * @param server The server
93
+ */
94
+ private async registerProvider(server: Server, pi: ExtensionAPI) {
95
+ const { baseUrl, models, providerId, providerName } = server;
96
+ const apiKey = await server.getApiKey();
97
+ const modelConfigs = await Promise.all(
98
+ models.map((m) => m.toProviderConfig()),
99
+ );
100
+
101
+ pi.registerProvider(providerId, {
102
+ name: providerName,
103
+ baseUrl: baseUrl,
104
+ api: API_TYPE,
105
+ apiKey: apiKey,
106
+ models: modelConfigs,
107
+ });
108
+ }
109
+
110
+ /**
111
+ * Returns warnings collected during initialization.
112
+ */
113
+ getWarnings(): string[] {
114
+ const warnings = [...this.warnings];
115
+ this.warnings.length = 0;
116
+
117
+ return warnings;
118
+ }
119
+
120
+ /**
121
+ * Returns all models from all servers.
122
+ *
123
+ * @returns Flat array of all models across all servers
124
+ */
125
+ getAllModels(): BaseModel[] {
126
+ const response = [];
127
+
128
+ for (const { models } of this.servers) {
129
+ for (const model of models) {
130
+ response.push(model);
131
+ }
132
+ }
133
+
134
+ return response;
135
+ }
136
+ }