pi-llama-cpp 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
+ import { READABLE_TIMEOUT } from "../constants";
3
+ import { ModelSelectEvent } from "../interfaces/events";
4
+ import { BaseModel } from "../models/baseModel";
5
+ import { Server } from "../server";
6
+
7
+ export class EventManager {
8
+ static inflightModel: BaseModel | null = null;
9
+
10
+ constructor(private readonly servers: Server[]) {}
11
+
12
+ /**
13
+ * Reacts to a new model event triggered by Pi
14
+ *
15
+ * @param event Model selection event
16
+ * @param ctx Pi context
17
+ */
18
+ async onModelSelect(event: ModelSelectEvent, ctx: ExtensionContext) {
19
+ for (const { providerId, models } of this.servers) {
20
+ if (event.model.provider !== providerId) continue;
21
+
22
+ const model = models.find((m) => m.id === event.model.id);
23
+ if (!model) continue;
24
+
25
+ ctx.ui.notify(`Loading ${model.name}...`, "info");
26
+ await model
27
+ .load()
28
+ .then(() => ctx.ui.notify(`Model ${model.name} ready`, "info"))
29
+ .catch(() =>
30
+ ctx.ui.notify(`Failed to load model ${model.name}`, "error"),
31
+ );
32
+ return;
33
+ }
34
+ }
35
+
36
+ /**
37
+ * Session-switch handler. Registered once at extension init.
38
+ * Only notifies if a model load is actually in-flight.
39
+ *
40
+ * @param ctx Pi context
41
+ */
42
+ async onSessionBeforeSwitch(ctx: ExtensionContext) {
43
+ if (!EventManager.inflightModel) return;
44
+
45
+ const messages = [
46
+ `Session change detected while model '${EventManager.inflightModel.name}' was still loading.`,
47
+ "Model load will continue in the background, but UI might not update.",
48
+ "",
49
+ "Verify that your new model is loaded, or use /models to re-select it afterwards.",
50
+ ];
51
+ ctx.ui.notify(messages.join("\n"), "warning");
52
+
53
+ // Show the notification for a reasonable amount of time
54
+ await new Promise((r) => setTimeout(r, READABLE_TIMEOUT));
55
+ }
56
+
57
+ /**
58
+ * Resets the in-flight model reference.
59
+ */
60
+ static resetInflightModel() {
61
+ EventManager.inflightModel = null;
62
+ }
63
+ }
@@ -0,0 +1,71 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { API_TYPE } from "../constants";
3
+ import { BaseModel } from "../models/baseModel";
4
+ import { Server } from "../server";
5
+
6
+ export class ServerManager {
7
+ readonly failedUrls: string[] = [];
8
+
9
+ constructor(private readonly servers: Server[]) {}
10
+
11
+ /**
12
+ * Registers one provider per server in Pi with their model configurations.
13
+ * Call this after the servers have been initialized.
14
+ * The manual awaiting per-server is deliberate (we want them in order)
15
+ *
16
+ * @param pi The Pi extension
17
+ */
18
+ async registerAllProviders(pi: ExtensionAPI) {
19
+ this.failedUrls.length = 0;
20
+
21
+ for (const server of this.servers) {
22
+ await this.registerProvider(server, pi);
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Creates a Pi provider for the given server
28
+ *
29
+ * @param server The server
30
+ */
31
+ private async registerProvider(server: Server, pi: ExtensionAPI) {
32
+ try {
33
+ await server.initialize();
34
+ } catch {
35
+ this.failedUrls.push(server.baseUrl);
36
+ return;
37
+ }
38
+
39
+ // Setup the Pi registration
40
+ const { baseUrl, models, providerId, providerName } = server;
41
+ const apiKey = await server.getApiKey();
42
+ const modelConfigs = await Promise.all(
43
+ models.map((m) => m.toProviderConfig()),
44
+ );
45
+
46
+ pi.registerProvider(providerId, {
47
+ name: providerName,
48
+ baseUrl: baseUrl,
49
+ api: API_TYPE,
50
+ apiKey: apiKey,
51
+ models: modelConfigs,
52
+ });
53
+ }
54
+
55
+ /**
56
+ * Returns all models from all servers.
57
+ *
58
+ * @returns Flat array of all models across all servers
59
+ */
60
+ getAllModels(): BaseModel[] {
61
+ const response = [];
62
+
63
+ for (const { models } of this.servers) {
64
+ for (const model of models) {
65
+ response.push(model);
66
+ }
67
+ }
68
+
69
+ return response;
70
+ }
71
+ }
@@ -1,10 +1,9 @@
1
1
  import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
2
- import { POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
2
+ import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
3
3
  import { Mode } from "../enums/mode";
4
4
  import { Status } from "../enums/status";
5
- import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
6
- import { PropsEndpoint } from "../interfaces/endpoints/props";
7
- import { rpc } from "../tools/retriever";
5
+ import { DataProperty } from "../interfaces/endpoints/models";
6
+ import { Server } from "../server";
8
7
 
9
8
  /**
10
9
  * Abstract base class for llama-server models.
@@ -12,7 +11,10 @@ import { rpc } from "../tools/retriever";
12
11
  * loading/unloading, and configuration conversion.
13
12
  */
14
13
  export abstract class BaseModel {
15
- constructor(protected readonly model: DataProperty) {}
14
+ constructor(
15
+ protected readonly model: DataProperty,
16
+ protected readonly server: Server,
17
+ ) {}
16
18
 
17
19
  protected readonly statusMapper: Record<string, Status> = {
18
20
  loaded: Status.LOADED,
@@ -28,20 +30,44 @@ export abstract class BaseModel {
28
30
  [Status.FAILED]: "🔴",
29
31
  [Status.SLEEPING]: "🔵",
30
32
  [Status.UNLOADED]: "⚪",
33
+ [Status.UNAUTHORIZED]: "⛔",
31
34
  };
32
35
 
33
36
  abstract get mode(): Mode;
34
37
 
38
+ /**
39
+ * Returns the server URL associated with this model
40
+ */
41
+ get serverUrl(): string {
42
+ return this.server.baseUrl;
43
+ }
44
+
45
+ /**
46
+ * Returns the provider id associated with this model
47
+ */
48
+ get serverId(): string {
49
+ return this.server.providerId;
50
+ }
51
+
52
+ /**
53
+ * Returns the model's unique identifier
54
+ */
35
55
  get id(): string {
36
56
  return this.model.id;
37
57
  }
38
58
 
59
+ /**
60
+ * Returns the model's display name (first alias, or id as fallback)
61
+ */
39
62
  get name(): string {
40
63
  return this.model.aliases?.[0] || this.model.id;
41
64
  }
42
65
 
66
+ /**
67
+ * Whether the model is a reasoning model.
68
+ * Currently always returns true since there's no way to detect this from llama-server.
69
+ */
43
70
  get reasoning(): boolean {
44
- // We don't have a way to detect this, so we'll fallback to true
45
71
  return true;
46
72
  }
47
73
 
@@ -50,21 +76,38 @@ export abstract class BaseModel {
50
76
  *
51
77
  * @returns An array of capabilities, as expected by Pi
52
78
  */
53
- abstract getCapabilities(): Promise<("text" | "image")[]>;
79
+ async getCapabilities(): Promise<("text" | "image")[]> {
80
+ try {
81
+ // When loaded, this works alright
82
+ const { modalities } = await this.server.fetchModelProps(this.id);
83
+ return modalities.vision ? ["text", "image"] : ["text"];
84
+ } catch {
85
+ // Otherwise, we have to search for it ourselves
86
+ const { data } = await this.server.fetchModels();
87
+ const model = data.find((d) => d.id === this.id);
88
+ if (!model) return ["text"];
89
+
90
+ const { input_modalities } = model.architecture!;
91
+ const response = input_modalities.filter(
92
+ (mod) => mod === "text" || mod === "image",
93
+ );
94
+
95
+ return response;
96
+ }
97
+ }
54
98
 
55
99
  /**
56
100
  * Gets the load status of the model
57
101
  *
58
- * @returns The current status
102
+ * @returns The current {@link Status}
59
103
  */
60
104
  public async getStatus(): Promise<Status> {
61
105
  try {
62
- const { is_sleeping, error } = await rpc<PropsEndpoint>(
63
- `/props?model=${this.id}&autoload=false`,
64
- );
106
+ const { is_sleeping, error } = await this.server.fetchModelProps(this.id);
65
107
 
66
108
  if (is_sleeping) return Status.SLEEPING;
67
109
  if (!error) return Status.LOADED;
110
+ if (error.code === 401) return Status.UNAUTHORIZED;
68
111
  if (error.code === 503) return Status.LOADING;
69
112
  if (error.code === 400 && error.message === "model is not loaded")
70
113
  return Status.UNLOADED;
@@ -76,19 +119,23 @@ export abstract class BaseModel {
76
119
  }
77
120
 
78
121
  /**
79
- * Gets the context size of a particular model
122
+ * Gets the context size of a particular model.
80
123
  *
81
- * @returns The detected context size
124
+ * @returns The context size in tokens
82
125
  */
83
126
  async getContextSize(): Promise<number> {
84
- const { data } = await rpc<ModelsEndpoint>("/models");
85
- const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
127
+ try {
128
+ const { data } = await this.server.fetchModels();
129
+ const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
86
130
 
87
- return n_ctx;
131
+ return n_ctx ?? DEFAULT_CTX;
132
+ } catch {
133
+ return DEFAULT_CTX;
134
+ }
88
135
  }
89
136
 
90
137
  /**
91
- * Sets up a label for the model selection screen
138
+ * Returns a label for the model selection screen
92
139
  * @returns A label structured as "<icon> <name>"
93
140
  */
94
141
  async getLabel(): Promise<string> {
@@ -97,11 +144,12 @@ export abstract class BaseModel {
97
144
  }
98
145
 
99
146
  /**
100
- * Returns a human-readable information about the model
147
+ * Returns human-readable information about the model
101
148
  * @returns A string with the model information
102
149
  */
103
150
  async getInfo(): Promise<string> {
104
151
  const messages = [
152
+ `Server : ${this.serverUrl}`,
105
153
  `ID : ${this.id}`,
106
154
  `Model : ${this.name}`,
107
155
  `Reasoning : ${this.reasoning}`,
@@ -140,7 +188,7 @@ export abstract class BaseModel {
140
188
  const status = await this.getStatus();
141
189
  if (status === Status.LOADED || status === Status.SLEEPING) return;
142
190
 
143
- await rpc("/models/load", { model: this.id });
191
+ await this.server.postRequest("load", this.id);
144
192
  await this.pollStatus();
145
193
  }
146
194
 
@@ -148,7 +196,7 @@ export abstract class BaseModel {
148
196
  * Unloads the model from llama-server
149
197
  */
150
198
  async unload(): Promise<void> {
151
- await rpc("/models/unload", { model: this.id });
199
+ await this.server.postRequest("unload", this.id);
152
200
  }
153
201
 
154
202
  /**
@@ -0,0 +1,45 @@
1
+ import { DEFAULT_CTX } from "../constants";
2
+ import { Mode } from "../enums/mode";
3
+ import { SingleModel } from "./singleModel";
4
+
5
+ export class LegacyModel extends SingleModel {
6
+ get mode(): Mode {
7
+ return Mode.LEGACY;
8
+ }
9
+
10
+ /**
11
+ * Retrieves the context size when the user is running
12
+ * a server that uses legacy models, such as ik_llama.cpp
13
+ *
14
+ * @returns The context size
15
+ */
16
+ async getContextSize(): Promise<number> {
17
+ const props = await this.server.fetchModelProps(this.id);
18
+ const models = await this.server.fetchModels();
19
+
20
+ const { n_ctx } = props as unknown as { n_ctx: number };
21
+ const { data } = models as unknown as {
22
+ data: { max_model_len: number }[];
23
+ };
24
+
25
+ const [{ max_model_len }] = data;
26
+ const contextSize = max_model_len === 0 ? n_ctx : max_model_len;
27
+
28
+ return contextSize ?? DEFAULT_CTX;
29
+ }
30
+
31
+ /**
32
+ * Detects the capabilities of the model when the user is running
33
+ * a server that uses legacy models, such as ik_llama.cpp
34
+ *
35
+ * @returns An array of capabilities, as expected by Pi
36
+ */
37
+ async getCapabilities(): Promise<("text" | "image")[]> {
38
+ try {
39
+ return await super.getCapabilities();
40
+ } catch {
41
+ // When auth is wrong in a legacy model, we simply can't detect the real capabilities
42
+ return ["text"];
43
+ }
44
+ }
45
+ }
@@ -1,9 +1,6 @@
1
1
  import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
2
2
  import { Mode } from "../enums/mode";
3
3
  import { Status } from "../enums/status";
4
- import { ModelsEndpoint } from "../interfaces/endpoints/models";
5
- import { PropsEndpoint } from "../interfaces/endpoints/props";
6
- import { rpc } from "../tools/retriever";
7
4
  import { BaseModel } from "./baseModel";
8
5
 
9
6
  /**
@@ -16,19 +13,6 @@ export class RouterModel extends BaseModel {
16
13
  return Mode.ROUTER;
17
14
  }
18
15
 
19
- async getStatus(): Promise<Status> {
20
- const { data } = await rpc<ModelsEndpoint>("/models");
21
- const model = data.find((m) => m.id === this.id);
22
- if (!model) return Status.FAILED;
23
-
24
- const status = this.statusMapper[model.status!.value];
25
- if (status === Status.UNLOADED || status === Status.LOADING) {
26
- return super.getStatus();
27
- }
28
-
29
- return status;
30
- }
31
-
32
16
  /**
33
17
  * Workaround for the currently-bugged /models status detection
34
18
  * (I suspect it was introduced in PR #22683 of llama.cpp)
@@ -50,7 +34,7 @@ export class RouterModel extends BaseModel {
50
34
  // Grab the glitch
51
35
  while (Date.now() - startTime <= limit) {
52
36
  try {
53
- await rpc<PropsEndpoint>(`/props?model=${this.id}&autoload=false`);
37
+ await this.server.fetchModelProps(this.id);
54
38
  break;
55
39
  } catch {
56
40
  elapsed += POLLING_INTERVAL;
@@ -62,19 +46,12 @@ export class RouterModel extends BaseModel {
62
46
  return await super.pollStatus(startTime, timeout);
63
47
  }
64
48
 
65
- async getCapabilities(): Promise<("text" | "image")[]> {
66
- const { data } = await rpc<ModelsEndpoint>(`/models`);
67
- const model = data.find((d) => d.id === this.id);
68
- if (!model) return ["text"];
69
-
70
- const { input_modalities } = model.architecture!;
71
- const response = input_modalities.filter(
72
- (mod) => mod === "text" || mod === "image",
73
- );
74
-
75
- return response;
76
- }
77
-
49
+ /**
50
+ * Gets the context size of a particular model.
51
+ * In router mode, falls back to parsing CLI args when the model is unloaded.
52
+ *
53
+ * @returns The context size in tokens
54
+ */
78
55
  async getContextSize(): Promise<number> {
79
56
  // We can get a more accurate context size if the model is already loaded
80
57
  if ((await this.getStatus()) === Status.LOADED) {
@@ -1,6 +1,4 @@
1
1
  import { Mode } from "../enums/mode";
2
- import { ModelsEndpoint } from "../interfaces/endpoints/models";
3
- import { rpc } from "../tools/retriever";
4
2
  import { BaseModel } from "./baseModel";
5
3
 
6
4
  export class SingleModel extends BaseModel {
@@ -9,10 +7,15 @@ export class SingleModel extends BaseModel {
9
7
  }
10
8
 
11
9
  async getCapabilities(): Promise<("text" | "image")[]> {
12
- const { models } = await rpc<ModelsEndpoint>(`/models`);
13
- const [model] = models!;
10
+ try {
11
+ return await super.getCapabilities();
12
+ } catch {
13
+ // This is required when auth is wrong
14
+ const { models } = await this.server.fetchModels();
15
+ const [{ capabilities }] = models!;
14
16
 
15
- const hasImage = model.capabilities.includes("multimodal");
16
- return hasImage ? ["text", "image"] : ["text"];
17
+ const hasImage = capabilities.includes("multimodal");
18
+ return hasImage ? ["text", "image"] : ["text"];
19
+ }
17
20
  }
18
21
  }
@@ -0,0 +1,123 @@
1
+ import { getAgentDir } from "@earendil-works/pi-coding-agent";
2
+ import { access, constants, readFile } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { API_KEY_PLACEHOLDER, DEFAULT_LLAMA_SERVER_URL } from "./constants";
5
+ import { AuthFile } from "./interfaces/auth";
6
+
7
+ export class ConfigResolver {
8
+ private cachedUrls: string[] = [];
9
+
10
+ /**
11
+ * Detects if a particular file is present
12
+ */
13
+ private async fileExists(filePath: string): Promise<boolean> {
14
+ try {
15
+ await access(filePath, constants.F_OK);
16
+ return true;
17
+ } catch {
18
+ return false;
19
+ }
20
+ }
21
+
22
+ /**
23
+ * Reads and parses the contents of a file as JSON
24
+ */
25
+ private async readJson<T>(filePath: string): Promise<T | null> {
26
+ const raw = await readFile(filePath, "utf-8");
27
+ try {
28
+ return JSON.parse(raw) as T;
29
+ } catch {
30
+ return null;
31
+ }
32
+ }
33
+
34
+ /**
35
+ * Reads a value from a JSON config file by key
36
+ */
37
+ private async readConfigValue<T>(
38
+ filePath: string,
39
+ key: keyof T,
40
+ ): Promise<T[keyof T] | null> {
41
+ const cfg = await this.readJson<T>(filePath);
42
+ return cfg?.[key] ?? null;
43
+ }
44
+
45
+ /**
46
+ * Resolves the llama-server URL by searching in the global settings.json
47
+ */
48
+ private async resolveGlobalUrl(): Promise<string | null> {
49
+ const globalPath = join(getAgentDir(), "settings.json");
50
+ if (!(await this.fileExists(globalPath))) return null;
51
+ return this.readConfigValue<Record<string, string>>(
52
+ globalPath,
53
+ "llamaServerUrl",
54
+ );
55
+ }
56
+
57
+ /**
58
+ * Resolves the llama-server URL by searching in the project's .pi/llama-server.json
59
+ */
60
+ private async resolveProjectUrl(cwd: string): Promise<string | null> {
61
+ const projectPath = join(cwd, ".pi", "llama-server.json");
62
+ if (!(await this.fileExists(projectPath))) return null;
63
+ return this.readConfigValue<Record<string, string>>(projectPath, "url");
64
+ }
65
+
66
+ /**
67
+ * Resolves the llama-server URL from the environment
68
+ */
69
+ private async resolveEnvUrl(): Promise<string | null> {
70
+ return process.env.LLAMA_SERVER_URL ?? null;
71
+ }
72
+
73
+ /**
74
+ * Tries all possible ways to retrieve the llama-server URL(s)
75
+ */
76
+ private async extractJoinedUrls(cwd: string): Promise<string> {
77
+ // 1. per-project config
78
+ let response = await this.resolveProjectUrl(cwd);
79
+ if (response) return response;
80
+
81
+ // 2. env
82
+ response = await this.resolveEnvUrl();
83
+ if (response) return response;
84
+
85
+ // 3. global settings
86
+ response = await this.resolveGlobalUrl();
87
+ if (response) return response;
88
+
89
+ // 4. default
90
+ return DEFAULT_LLAMA_SERVER_URL;
91
+ }
92
+
93
+ /**
94
+ * Resolves URLs where llama-servers are running (cached)
95
+ */
96
+ async resolveUrls(cwd: string): Promise<string[]> {
97
+ if (this.cachedUrls.length > 0) return this.cachedUrls;
98
+
99
+ const raw = await this.extractJoinedUrls(cwd);
100
+ const urls = raw
101
+ .split(";")
102
+ .map((u) => u.trim())
103
+ .filter((u) => u.length > 0)
104
+ .map((u) => u.replace(/\/+$/, ""));
105
+
106
+ this.cachedUrls = urls;
107
+ return this.cachedUrls;
108
+ }
109
+
110
+ /**
111
+ * Resolves API key for the provider ID using Pi's auth.json
112
+ * Deliberately not cached, to react to changes in the file
113
+ */
114
+ async resolveApiKey(providerId: string): Promise<string> {
115
+ const authPath = join(getAgentDir(), "auth.json");
116
+ if (!(await this.fileExists(authPath))) return API_KEY_PLACEHOLDER;
117
+
118
+ const auth = await this.readJson<AuthFile>(authPath);
119
+ const apiKey = auth?.[providerId]?.key ?? API_KEY_PLACEHOLDER;
120
+
121
+ return apiKey;
122
+ }
123
+ }