pi-llama-cpp 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/models/baseModel.ts +24 -11
- package/src/models/routerModel.ts +28 -32
- package/tests/routerModel.test.ts +2 -7
- package/tests/singleModel.test.ts +4 -13
package/package.json
CHANGED
package/src/models/baseModel.ts
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
} from "../constants";
|
|
8
8
|
import { Mode } from "../enums/mode";
|
|
9
9
|
import { Status } from "../enums/status";
|
|
10
|
-
import { DataProperty
|
|
10
|
+
import { DataProperty } from "../interfaces/endpoints/models";
|
|
11
11
|
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
12
12
|
import { rpc } from "../tools/retriever";
|
|
13
13
|
|
|
@@ -69,6 +69,8 @@ export abstract class BaseModel {
|
|
|
69
69
|
|
|
70
70
|
/**
|
|
71
71
|
* Gets the load status of the model
|
|
72
|
+
*
|
|
73
|
+
* @returns The current status
|
|
72
74
|
*/
|
|
73
75
|
public async getStatus(): Promise<Status> {
|
|
74
76
|
try {
|
|
@@ -79,8 +81,10 @@ export abstract class BaseModel {
|
|
|
79
81
|
if (is_sleeping) return Status.SLEEPING;
|
|
80
82
|
if (!error) return Status.LOADED;
|
|
81
83
|
if (error.code === 503) return Status.LOADING;
|
|
84
|
+
if (error.code === 400 && error.message === "model is not loaded")
|
|
85
|
+
return Status.UNLOADED;
|
|
82
86
|
|
|
83
|
-
return Status.
|
|
87
|
+
return Status.FAILED;
|
|
84
88
|
} catch (err) {
|
|
85
89
|
return Status.FAILED;
|
|
86
90
|
}
|
|
@@ -88,14 +92,16 @@ export abstract class BaseModel {
|
|
|
88
92
|
|
|
89
93
|
/**
|
|
90
94
|
* Gets the context size of a particular model
|
|
95
|
+
*
|
|
96
|
+
* @returns The detected context size
|
|
91
97
|
*/
|
|
92
98
|
async getContextSize(): Promise<number> {
|
|
93
99
|
try {
|
|
94
|
-
const {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
const
|
|
98
|
-
return
|
|
100
|
+
const { default_generation_settings } = await rpc<PropsEndpoint>(
|
|
101
|
+
`/props?model=${this.id}`,
|
|
102
|
+
);
|
|
103
|
+
const { n_ctx } = default_generation_settings;
|
|
104
|
+
return n_ctx;
|
|
99
105
|
} catch {
|
|
100
106
|
return DEFAULT_CTX;
|
|
101
107
|
}
|
|
@@ -130,6 +136,7 @@ export abstract class BaseModel {
|
|
|
130
136
|
|
|
131
137
|
/**
|
|
132
138
|
* Converts the llama-server model into a configuration object used by Pi
|
|
139
|
+
*
|
|
133
140
|
* @returns A Pi configuration object
|
|
134
141
|
*/
|
|
135
142
|
async toProviderConfig(): Promise<ProviderModelConfig> {
|
|
@@ -167,15 +174,21 @@ export abstract class BaseModel {
|
|
|
167
174
|
* Polls llama-server to check when the model is loaded
|
|
168
175
|
*
|
|
169
176
|
* @param startTime The initial polling timestamp
|
|
177
|
+
* @param timeout The maximum amount of ms before timeout. Defaults to POLLING_TIMEOUT
|
|
178
|
+
* @param interval The polling interval. Defaults to POLLING_INTERVAL
|
|
170
179
|
*/
|
|
171
|
-
async pollStatus(
|
|
180
|
+
async pollStatus(
|
|
181
|
+
startTime: number = Date.now(),
|
|
182
|
+
timeout: number = POLLING_TIMEOUT,
|
|
183
|
+
interval: number = POLLING_INTERVAL,
|
|
184
|
+
): Promise<void> {
|
|
172
185
|
while ((await this.getStatus()) === Status.LOADING) {
|
|
173
186
|
// Force a timeout if we wasted too much time polling
|
|
174
|
-
if (Date.now() - startTime >
|
|
175
|
-
const message = `Model loading timed out after ${
|
|
187
|
+
if (Date.now() - startTime > timeout) {
|
|
188
|
+
const message = `Model loading timed out after ${timeout} ms: ${this.id}`;
|
|
176
189
|
throw new Error(message);
|
|
177
190
|
}
|
|
178
|
-
await new Promise((r) => setTimeout(r,
|
|
191
|
+
await new Promise((r) => setTimeout(r, interval));
|
|
179
192
|
}
|
|
180
193
|
}
|
|
181
194
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DEFAULT_CTX } from "../constants";
|
|
1
|
+
import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
4
|
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
@@ -22,23 +22,8 @@ export class RouterModel extends BaseModel {
|
|
|
22
22
|
if (!model) return Status.FAILED;
|
|
23
23
|
|
|
24
24
|
const status = this.statusMapper[model.status!.value];
|
|
25
|
-
if (status === Status.UNLOADED) {
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Workaround for the currently-bugged /models status detection
|
|
29
|
-
* (I suspect it was introduced in PR #22683 of llama.cpp)
|
|
30
|
-
*
|
|
31
|
-
* This workaround will show an eternal "loading" status when the model's real status
|
|
32
|
-
* is "failed", which is acceptable, because models in "failed" or "loading" status
|
|
33
|
-
* shouldn't be used.
|
|
34
|
-
*
|
|
35
|
-
* In exchange, it will allow unloaded models to be correctly shown as "unloaded".
|
|
36
|
-
*/
|
|
37
|
-
// return Status.FAILED; // <-- Original implementation
|
|
38
|
-
return await super.getStatus();
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
return Status.UNLOADED;
|
|
25
|
+
if (status === Status.UNLOADED || status === Status.LOADING) {
|
|
26
|
+
return super.getStatus();
|
|
42
27
|
}
|
|
43
28
|
|
|
44
29
|
return status;
|
|
@@ -48,22 +33,33 @@ export class RouterModel extends BaseModel {
|
|
|
48
33
|
* Workaround for the currently-bugged /models status detection
|
|
49
34
|
* (I suspect it was introduced in PR #22683 of llama.cpp)
|
|
50
35
|
*
|
|
51
|
-
*
|
|
36
|
+
* When a model is loaded for the very first time,
|
|
37
|
+
* this workaround will try to poll to /props instead of /models
|
|
38
|
+
* for up to 5 seconds to try to detect if the model is really loading,
|
|
39
|
+
* or if it definitely failed.
|
|
40
|
+
*
|
|
41
|
+
* The tradeoff is that we'll have to wait for 5 seconds
|
|
42
|
+
* while the model is "loading", while not really loading.
|
|
43
|
+
*
|
|
44
|
+
* In exchange, it will allow unloaded models to be correctly shown as "unloaded".
|
|
52
45
|
*/
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
46
|
+
async pollStatus(startTime = Date.now()): Promise<void> {
|
|
47
|
+
let elapsed = 0;
|
|
48
|
+
const limit = 5000;
|
|
49
|
+
|
|
50
|
+
// Grab the glitch
|
|
51
|
+
while (Date.now() - startTime <= limit) {
|
|
52
|
+
try {
|
|
53
|
+
await rpc<PropsEndpoint>(`/props?model=${this.id}`);
|
|
54
|
+
break;
|
|
55
|
+
} catch {
|
|
56
|
+
elapsed += POLLING_INTERVAL;
|
|
57
|
+
await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
|
|
58
|
+
}
|
|
66
59
|
}
|
|
60
|
+
|
|
61
|
+
const timeout = POLLING_TIMEOUT - elapsed;
|
|
62
|
+
return await super.pollStatus(startTime, timeout);
|
|
67
63
|
}
|
|
68
64
|
|
|
69
65
|
async getCapabilities(): Promise<["text"] | ["image"]> {
|
|
@@ -130,14 +130,9 @@ describe("RouterModel context size extraction", () => {
|
|
|
130
130
|
},
|
|
131
131
|
],
|
|
132
132
|
});
|
|
133
|
-
// Second call: super.getContextSize() -> /
|
|
133
|
+
// Second call: super.getContextSize() -> /props?model=test-model with default_generation_settings.n_ctx
|
|
134
134
|
mockRpc.mockResolvedValueOnce({
|
|
135
|
-
|
|
136
|
-
{
|
|
137
|
-
id: "test-model",
|
|
138
|
-
meta: { n_ctx: 4096 },
|
|
139
|
-
},
|
|
140
|
-
],
|
|
135
|
+
default_generation_settings: { n_ctx: 4096 },
|
|
141
136
|
});
|
|
142
137
|
|
|
143
138
|
const model = new RouterModel(
|
|
@@ -85,28 +85,19 @@ describe("SingleModel getStatus", () => {
|
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
describe("SingleModel getContextSize", () => {
|
|
88
|
-
it("should return n_ctx from /
|
|
88
|
+
it("should return n_ctx from /props endpoint default_generation_settings", async () => {
|
|
89
89
|
mockRpc.mockResolvedValueOnce({
|
|
90
|
-
|
|
90
|
+
default_generation_settings: { n_ctx: 8192 },
|
|
91
91
|
});
|
|
92
92
|
|
|
93
93
|
const model = createModel();
|
|
94
94
|
const ctxSize = await model.getContextSize();
|
|
95
95
|
|
|
96
96
|
expect(ctxSize).toBe(8192);
|
|
97
|
-
expect(mockRpc).toHaveBeenCalledWith("/
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
it("should return DEFAULT_CTX when model not found in /models", async () => {
|
|
101
|
-
mockRpc.mockResolvedValueOnce({ data: [] });
|
|
102
|
-
|
|
103
|
-
const model = createModel();
|
|
104
|
-
const ctxSize = await model.getContextSize();
|
|
105
|
-
|
|
106
|
-
expect(ctxSize).toBe(DEFAULT_CTX);
|
|
97
|
+
expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
|
|
107
98
|
});
|
|
108
99
|
|
|
109
|
-
it("should return DEFAULT_CTX when /
|
|
100
|
+
it("should return DEFAULT_CTX when /props fails", async () => {
|
|
110
101
|
mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
|
|
111
102
|
|
|
112
103
|
const model = createModel();
|