pi-llama-cpp 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
5
5
  "keywords": [
6
6
  "pi",
@@ -69,6 +69,8 @@ export abstract class BaseModel {
69
69
 
70
70
  /**
71
71
  * Gets the load status of the model
72
+ *
73
+ * @returns The current status
72
74
  */
73
75
  public async getStatus(): Promise<Status> {
74
76
  try {
@@ -79,8 +81,10 @@ export abstract class BaseModel {
79
81
  if (is_sleeping) return Status.SLEEPING;
80
82
  if (!error) return Status.LOADED;
81
83
  if (error.code === 503) return Status.LOADING;
84
+ if (error.code === 400 && error.message === "model is not loaded")
85
+ return Status.UNLOADED;
82
86
 
83
- return Status.UNLOADED;
87
+ return Status.FAILED;
84
88
  } catch (err) {
85
89
  return Status.FAILED;
86
90
  }
@@ -88,6 +92,8 @@ export abstract class BaseModel {
88
92
 
89
93
  /**
90
94
  * Gets the context size of a particular model
95
+ *
96
+ * @returns The detected context size
91
97
  */
92
98
  async getContextSize(): Promise<number> {
93
99
  try {
@@ -130,6 +136,7 @@ export abstract class BaseModel {
130
136
 
131
137
  /**
132
138
  * Converts the llama-server model into a configuration object used by Pi
139
+ *
133
140
  * @returns A Pi configuration object
134
141
  */
135
142
  async toProviderConfig(): Promise<ProviderModelConfig> {
@@ -167,15 +174,21 @@ export abstract class BaseModel {
167
174
  * Polls llama-server to check when the model is loaded
168
175
  *
169
176
  * @param startTime The initial polling timestamp
177
+ * @param timeout The maximum amount of ms before timeout. Defaults to POLLING_TIMEOUT
178
+ * @param interval The polling interval. Defaults to POLLING_INTERVAL
170
179
  */
171
- async pollStatus(startTime = Date.now()): Promise<void> {
180
+ async pollStatus(
181
+ startTime: number = Date.now(),
182
+ timeout: number = POLLING_TIMEOUT,
183
+ interval: number = POLLING_INTERVAL,
184
+ ): Promise<void> {
172
185
  while ((await this.getStatus()) === Status.LOADING) {
173
186
  // Force a timeout if we wasted too much time polling
174
- if (Date.now() - startTime > POLLING_TIMEOUT) {
175
- const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
187
+ if (Date.now() - startTime > timeout) {
188
+ const message = `Model loading timed out after ${timeout} ms: ${this.id}`;
176
189
  throw new Error(message);
177
190
  }
178
- await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
191
+ await new Promise((r) => setTimeout(r, interval));
179
192
  }
180
193
  }
181
194
  }
@@ -1,4 +1,4 @@
1
- import { DEFAULT_CTX } from "../constants";
1
+ import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
2
2
  import { Mode } from "../enums/mode";
3
3
  import { Status } from "../enums/status";
4
4
  import { ModelsEndpoint } from "../interfaces/endpoints/models";
@@ -22,23 +22,8 @@ export class RouterModel extends BaseModel {
22
22
  if (!model) return Status.FAILED;
23
23
 
24
24
  const status = this.statusMapper[model.status!.value];
25
- if (status === Status.UNLOADED) {
26
- if (this.model.status!.failed) {
27
- /**
28
- * Workaround for the currently-bugged /models status detection
29
- * (I suspect it was introduced in PR #22683 of llama.cpp)
30
- *
31
- * This workaround will show an eternal "loading" status when the model's real status
32
- * is "failed", which is acceptable, because models in "failed" or "loading" status
33
- * shouldn't be used.
34
- *
35
- * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
36
- */
37
- // return Status.FAILED; // <-- Original implementation
38
- return await super.getStatus();
39
- }
40
-
41
- return Status.UNLOADED;
25
+ if (status === Status.UNLOADED || status === Status.LOADING) {
26
+ return super.getStatus();
42
27
  }
43
28
 
44
29
  return status;
@@ -48,22 +33,33 @@ export class RouterModel extends BaseModel {
48
33
  * Workaround for the currently-bugged /models status detection
49
34
  * (I suspect it was introduced in PR #22683 of llama.cpp)
50
35
  *
51
- * @returns The detected status
36
+ * When a model is loaded for the very first time,
37
+ * this workaround will try to poll to /props instead of /models
38
+ * for up to 5 seconds to try to detect if the model is really loading,
39
+ * or if it definitely failed.
40
+ *
41
+ * The tradeoff is that we'll have to wait for 5 seconds
42
+ * while the model is "loading", while not really loading.
43
+ *
44
+ * In exchange, it will allow unloaded models to be correctly shown as "unloaded".
52
45
  */
53
- private async getStatusWorkaround(): Promise<Status> {
54
- try {
55
- const { is_sleeping, error } = await rpc<PropsEndpoint>(
56
- `/props?model=${this.id}`,
57
- );
58
-
59
- if (is_sleeping) return Status.SLEEPING;
60
- if (!error) return Status.LOADED;
61
- if (error.code === 503) return Status.LOADING;
62
-
63
- return Status.UNLOADED;
64
- } catch (err) {
65
- return Status.FAILED;
46
+ async pollStatus(startTime = Date.now()): Promise<void> {
47
+ let elapsed = 0;
48
+ const limit = 5000;
49
+
50
+ // Grab the glitch
51
+ while (Date.now() - startTime <= limit) {
52
+ try {
53
+ await rpc<PropsEndpoint>(`/props?model=${this.id}`);
54
+ break;
55
+ } catch {
56
+ elapsed += POLLING_INTERVAL;
57
+ await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
58
+ }
66
59
  }
60
+
61
+ const timeout = POLLING_TIMEOUT - elapsed;
62
+ return await super.pollStatus(startTime, timeout);
67
63
  }
68
64
 
69
65
  async getCapabilities(): Promise<["text"] | ["image"]> {
@@ -1,8 +1,23 @@
1
+ import { DEFAULT_CTX } from "../constants";
1
2
  import { Mode } from "../enums/mode";
3
+ import { PropsEndpoint } from "../interfaces/endpoints/props";
4
+ import { rpc } from "../tools/retriever";
2
5
  import { BaseModel } from "./baseModel";
3
6
 
4
7
  export class SingleModel extends BaseModel {
5
8
  get mode(): Mode {
6
9
  return Mode.SINGLE;
7
10
  }
11
+
12
+ async getContextSize(): Promise<number> {
13
+ try {
14
+ const { default_generation_settings } = await rpc<PropsEndpoint>(
15
+ `/props?model=${this.id}`,
16
+ );
17
+ const { n_ctx } = default_generation_settings;
18
+ return n_ctx;
19
+ } catch {
20
+ return DEFAULT_CTX;
21
+ }
22
+ }
8
23
  }