pi-llama-cpp 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.3.1",
3
+ "version": "0.3.3",
4
4
  "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
5
5
  "keywords": [
6
6
  "pi",
@@ -38,7 +38,12 @@ const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
38
38
  [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
39
39
  [Status.LOADING]: [Action.INFO, Action.CANCEL],
40
40
  [Status.FAILED]: [Action.RETRY, Action.CANCEL],
41
- [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
41
+ [Status.SLEEPING]: [
42
+ Action.SWITCH,
43
+ Action.UNLOAD,
44
+ Action.INFO,
45
+ Action.CANCEL,
46
+ ],
42
47
  [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
43
48
  };
44
49
 
@@ -1,11 +1,8 @@
1
-
2
1
  /**
3
2
  * The structure of llama-server's /props endpoint
4
- *
5
- * In single mode, applies to /props
6
- * In router mode, applies to /props?model=<id>
7
3
  */
8
4
  export interface PropsEndpoint {
5
+ error?: PropsError;
9
6
  default_generation_settings: Record<string, any>;
10
7
  total_slots: number;
11
8
  model_alias: string;
@@ -27,3 +24,9 @@ export interface PropsEndpoint {
27
24
  build_info: string;
28
25
  is_sleeping: boolean;
29
26
  }
27
+
28
+ export interface PropsError {
29
+ code: number;
30
+ message: string;
31
+ type: string;
32
+ }
@@ -69,11 +69,31 @@ export abstract class BaseModel {
69
69
 
70
70
  /**
71
71
  * Gets the load status of the model
72
+ *
73
+ * @returns The current status
72
74
  */
73
- abstract getStatus(): Promise<Status>;
75
+ public async getStatus(): Promise<Status> {
76
+ try {
77
+ const { is_sleeping, error } = await rpc<PropsEndpoint>(
78
+ `/props?model=${this.id}`,
79
+ );
80
+
81
+ if (is_sleeping) return Status.SLEEPING;
82
+ if (!error) return Status.LOADED;
83
+ if (error.code === 503) return Status.LOADING;
84
+ if (error.code === 400 && error.message === "model is not loaded")
85
+ return Status.UNLOADED;
86
+
87
+ return Status.FAILED;
88
+ } catch (err) {
89
+ return Status.FAILED;
90
+ }
91
+ }
74
92
 
75
93
  /**
76
94
  * Gets the context size of a particular model
95
+ *
96
+ * @returns The detected context size
77
97
  */
78
98
  async getContextSize(): Promise<number> {
79
99
  try {
@@ -116,6 +136,7 @@ export abstract class BaseModel {
116
136
 
117
137
  /**
118
138
  * Converts the llama-server model into a configuration object used by Pi
139
+ *
119
140
  * @returns A Pi configuration object
120
141
  */
121
142
  async toProviderConfig(): Promise<ProviderModelConfig> {
@@ -153,15 +174,21 @@ export abstract class BaseModel {
153
174
  * Polls llama-server to check when the model is loaded
154
175
  *
155
176
  * @param startTime The initial polling timestamp
177
+ * @param timeout The maximum amount of ms before timeout. Defaults to POLLING_TIMEOUT
178
+ * @param interval The polling interval. Defaults to POLLING_INTERVAL
156
179
  */
157
- async pollStatus(startTime = Date.now()): Promise<void> {
180
+ async pollStatus(
181
+ startTime: number = Date.now(),
182
+ timeout: number = POLLING_TIMEOUT,
183
+ interval: number = POLLING_INTERVAL,
184
+ ): Promise<void> {
158
185
  while ((await this.getStatus()) === Status.LOADING) {
159
186
  // Force a timeout if we wasted too much time polling
160
- if (Date.now() - startTime > POLLING_TIMEOUT) {
161
- const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
187
+ if (Date.now() - startTime > timeout) {
188
+ const message = `Model loading timed out after ${timeout} ms: ${this.id}`;
162
189
  throw new Error(message);
163
190
  }
164
- await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
191
+ await new Promise((r) => setTimeout(r, interval));
165
192
  }
166
193
  }
167
194
  }
@@ -1,7 +1,8 @@
1
- import { DEFAULT_CTX } from "../constants";
1
+ import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
2
2
  import { Mode } from "../enums/mode";
3
3
  import { Status } from "../enums/status";
4
4
  import { ModelsEndpoint } from "../interfaces/endpoints/models";
5
+ import { PropsEndpoint } from "../interfaces/endpoints/props";
5
6
  import { rpc } from "../tools/retriever";
6
7
  import { BaseModel } from "./baseModel";
7
8
 
@@ -21,15 +22,46 @@ export class RouterModel extends BaseModel {
21
22
  if (!model) return Status.FAILED;
22
23
 
23
24
  const status = this.statusMapper[model.status!.value];
24
- if (status === Status.UNLOADED) {
25
- if (this.model.status!.failed) return Status.FAILED;
26
-
27
- return Status.UNLOADED;
25
+ if (status === Status.UNLOADED || status === Status.LOADING) {
26
+ return super.getStatus();
28
27
  }
29
28
 
30
29
  return status;
31
30
  }
32
31
 
32
+ /**
33
+ * Workaround for the currently-bugged /models status detection
34
+ * (I suspect it was introduced in PR #22683 of llama.cpp)
35
+ *
36
+ * When a model is loaded for the very first time,
37
+ * this workaround will try to poll to /props instead of /models
38
+ * for up to 5 seconds to try to detect if the model is really loading,
39
+ * or if it definitely failed.
40
+ *
41
+ * The tradeoff is that we'll have to wait for 5 seconds
42
+ * while the model is "loading", while not really loading.
43
+ *
44
+ * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
45
+ */
46
+ async pollStatus(startTime = Date.now()): Promise<void> {
47
+ let elapsed = 0;
48
+ const limit = 5000;
49
+
50
+ // Grab the glitch
51
+ while (Date.now() - startTime <= limit) {
52
+ try {
53
+ await rpc<PropsEndpoint>(`/props?model=${this.id}`);
54
+ break;
55
+ } catch {
56
+ elapsed += POLLING_INTERVAL;
57
+ await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
58
+ }
59
+ }
60
+
61
+ const timeout = POLLING_TIMEOUT - elapsed;
62
+ return await super.pollStatus(startTime, timeout);
63
+ }
64
+
33
65
  async getCapabilities(): Promise<["text"] | ["image"]> {
34
66
  // We can get the real capabilities if the model is already loaded
35
67
  if ((await this.getStatus()) === Status.LOADED) {
@@ -1,5 +1,5 @@
1
+ import { DEFAULT_CTX } from "../constants";
1
2
  import { Mode } from "../enums/mode";
2
- import { Status } from "../enums/status";
3
3
  import { PropsEndpoint } from "../interfaces/endpoints/props";
4
4
  import { rpc } from "../tools/retriever";
5
5
  import { BaseModel } from "./baseModel";
@@ -9,11 +9,15 @@ export class SingleModel extends BaseModel {
9
9
  return Mode.SINGLE;
10
10
  }
11
11
 
12
- async getStatus(): Promise<Status> {
13
- // In single-mode, the extension will only work when the model is fully loaded
14
- const { is_sleeping } = await rpc<PropsEndpoint>("/props");
15
- if (is_sleeping) return Status.SLEEPING;
16
-
17
- return Status.LOADED;
12
+ async getContextSize(): Promise<number> {
13
+ try {
14
+ const { default_generation_settings } = await rpc<PropsEndpoint>(
15
+ `/props?model=${this.id}`,
16
+ );
17
+ const { n_ctx } = default_generation_settings;
18
+ return n_ctx;
19
+ } catch {
20
+ return DEFAULT_CTX;
21
+ }
18
22
  }
19
23
  }
@@ -115,7 +115,18 @@ describe("RouterModel context size extraction", () => {
115
115
  data: [
116
116
  {
117
117
  id: "test-model",
118
- status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
118
+ status: {
119
+ value: "loaded",
120
+ args: [
121
+ "--model",
122
+ "gguf",
123
+ "--ctx-size",
124
+ "4096",
125
+ "--fit-ctx",
126
+ "8192",
127
+ ],
128
+ preset: "default",
129
+ },
119
130
  },
120
131
  ],
121
132
  });
@@ -149,7 +160,11 @@ describe("RouterModel context size extraction", () => {
149
160
  data: [
150
161
  {
151
162
  id: "test-model",
152
- status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
163
+ status: {
164
+ value: "loaded",
165
+ args: ["--model", "gguf"],
166
+ preset: "default",
167
+ },
153
168
  },
154
169
  ],
155
170
  });
@@ -186,7 +201,12 @@ describe("RouterModel capabilities detection", () => {
186
201
  data: [
187
202
  {
188
203
  id: "test-model",
189
- status: { value: "loaded", args: [], preset: "default", failed: false },
204
+ status: {
205
+ value: "loaded",
206
+ args: [],
207
+ preset: "default",
208
+ failed: false,
209
+ },
190
210
  },
191
211
  ],
192
212
  });
@@ -206,7 +226,12 @@ describe("RouterModel capabilities detection", () => {
206
226
  data: [
207
227
  {
208
228
  id: "test-model",
209
- status: { value: "loaded", args: [], preset: "default", failed: false },
229
+ status: {
230
+ value: "loaded",
231
+ args: [],
232
+ preset: "default",
233
+ failed: false,
234
+ },
210
235
  },
211
236
  ],
212
237
  });
@@ -225,7 +250,12 @@ describe("RouterModel capabilities detection", () => {
225
250
  data: [
226
251
  {
227
252
  id: "test-model",
228
- status: { value: "loaded", args: [], preset: "default", failed: false },
253
+ status: {
254
+ value: "loaded",
255
+ args: [],
256
+ preset: "default",
257
+ failed: false,
258
+ },
229
259
  },
230
260
  ],
231
261
  });
@@ -244,14 +274,24 @@ describe("RouterModel capabilities detection", () => {
244
274
  data: [
245
275
  {
246
276
  id: "test-model",
247
- status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
277
+ status: {
278
+ value: "unloaded",
279
+ args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
280
+ preset: "default",
281
+ failed: false,
282
+ },
248
283
  },
249
284
  ],
250
285
  });
251
286
 
252
287
  const model = new RouterModel(
253
288
  createModel({
254
- status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
289
+ status: {
290
+ value: "unloaded",
291
+ args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
292
+ preset: "default",
293
+ failed: false,
294
+ },
255
295
  }),
256
296
  );
257
297
  const capabilities = await model.getCapabilities();
@@ -265,14 +305,24 @@ describe("RouterModel capabilities detection", () => {
265
305
  data: [
266
306
  {
267
307
  id: "test-model",
268
- status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
308
+ status: {
309
+ value: "unloaded",
310
+ args: ["--model", "gguf"],
311
+ preset: "default",
312
+ failed: false,
313
+ },
269
314
  },
270
315
  ],
271
316
  });
272
317
 
273
318
  const model = new RouterModel(
274
319
  createModel({
275
- status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
320
+ status: {
321
+ value: "unloaded",
322
+ args: ["--model", "gguf"],
323
+ preset: "default",
324
+ failed: false,
325
+ },
276
326
  }),
277
327
  );
278
328
  const capabilities = await model.getCapabilities();
@@ -18,36 +18,13 @@ beforeEach(() => {
18
18
  });
19
19
 
20
20
  const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
21
- new SingleModel(
22
- {
23
- id: "test",
24
- tags: [],
25
- object: "model",
26
- owned_by: "test",
27
- created: Date.now(),
28
- },
29
- {
30
- name: "test",
31
- model: "test.gguf",
32
- modified_at: new Date().toISOString(),
33
- size: "1B",
34
- digest: "abc123",
35
- type: "model",
36
- description: "test",
37
- tags: [],
38
- capabilities: [],
39
- parameters: "",
40
- details: {
41
- parent_model: "",
42
- format: "",
43
- family: "",
44
- families: [],
45
- parameter_size: "",
46
- quantization_level: "",
47
- },
48
- ...extra,
49
- },
50
- );
21
+ new SingleModel({
22
+ id: "test",
23
+ tags: [],
24
+ object: "model",
25
+ owned_by: "test",
26
+ created: Date.now(),
27
+ });
51
28
 
52
29
  describe("SingleModel mode", () => {
53
30
  it("should always return SINGLE mode", () => {
@@ -94,7 +71,7 @@ describe("SingleModel getStatus", () => {
94
71
  const status = await model.getStatus();
95
72
 
96
73
  expect(status).toBe(Status.LOADED);
97
- expect(mockRpc).toHaveBeenCalledWith("/props");
74
+ expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
98
75
  });
99
76
 
100
77
  it("should return SLEEPING when is_sleeping is true", async () => {