pi-llama-cpp 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/models.ts +7 -2
- package/src/interfaces/endpoints/props.ts +7 -4
- package/src/models/baseModel.ts +20 -1
- package/src/models/routerModel.ts +42 -1
- package/src/models/singleModel.ts +0 -11
- package/src/tools/resolver.ts +5 -5
- package/src/tools/retriever.ts +8 -11
- package/tests/routerModel.test.ts +59 -9
- package/tests/singleModel.test.ts +8 -31
package/package.json
CHANGED
package/src/commands/models.ts
CHANGED
|
@@ -31,14 +31,19 @@ const selectModel = async (
|
|
|
31
31
|
* Get available actions for a model based on its mode and status.
|
|
32
32
|
*
|
|
33
33
|
* @param model The selected model
|
|
34
|
-
* @returns
|
|
34
|
+
* @returns The array of available actions for the given model status
|
|
35
35
|
*/
|
|
36
36
|
const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
|
|
37
37
|
const routerModeActions: Record<Status, Array<Action>> = {
|
|
38
38
|
[Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
|
|
39
39
|
[Status.LOADING]: [Action.INFO, Action.CANCEL],
|
|
40
40
|
[Status.FAILED]: [Action.RETRY, Action.CANCEL],
|
|
41
|
-
[Status.SLEEPING]: [
|
|
41
|
+
[Status.SLEEPING]: [
|
|
42
|
+
Action.SWITCH,
|
|
43
|
+
Action.UNLOAD,
|
|
44
|
+
Action.INFO,
|
|
45
|
+
Action.CANCEL,
|
|
46
|
+
],
|
|
42
47
|
[Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
|
|
43
48
|
};
|
|
44
49
|
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
|
|
2
1
|
/**
|
|
3
2
|
* The structure of llama-server's /props endpoint
|
|
4
|
-
*
|
|
5
|
-
* In single mode, applies to /props
|
|
6
|
-
* In router mode, applies to /props?model=<id>
|
|
7
3
|
*/
|
|
8
4
|
export interface PropsEndpoint {
|
|
5
|
+
error?: PropsError;
|
|
9
6
|
default_generation_settings: Record<string, any>;
|
|
10
7
|
total_slots: number;
|
|
11
8
|
model_alias: string;
|
|
@@ -27,3 +24,9 @@ export interface PropsEndpoint {
|
|
|
27
24
|
build_info: string;
|
|
28
25
|
is_sleeping: boolean;
|
|
29
26
|
}
|
|
27
|
+
|
|
28
|
+
export interface PropsError {
|
|
29
|
+
code: number;
|
|
30
|
+
message: string;
|
|
31
|
+
type: string;
|
|
32
|
+
}
|
package/src/models/baseModel.ts
CHANGED
|
@@ -11,6 +11,11 @@ import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
|
11
11
|
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
12
12
|
import { rpc } from "../tools/retriever";
|
|
13
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Abstract base class for llama-server models.
|
|
16
|
+
* Provides common functionality for model identification, status checking,
|
|
17
|
+
* loading/unloading, and configuration conversion.
|
|
18
|
+
*/
|
|
14
19
|
export abstract class BaseModel {
|
|
15
20
|
constructor(protected readonly model: DataProperty) {}
|
|
16
21
|
|
|
@@ -65,7 +70,21 @@ export abstract class BaseModel {
|
|
|
65
70
|
/**
|
|
66
71
|
* Gets the load status of the model
|
|
67
72
|
*/
|
|
68
|
-
|
|
73
|
+
public async getStatus(): Promise<Status> {
|
|
74
|
+
try {
|
|
75
|
+
const { is_sleeping, error } = await rpc<PropsEndpoint>(
|
|
76
|
+
`/props?model=${this.id}`,
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
if (is_sleeping) return Status.SLEEPING;
|
|
80
|
+
if (!error) return Status.LOADED;
|
|
81
|
+
if (error.code === 503) return Status.LOADING;
|
|
82
|
+
|
|
83
|
+
return Status.UNLOADED;
|
|
84
|
+
} catch (err) {
|
|
85
|
+
return Status.FAILED;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
69
88
|
|
|
70
89
|
/**
|
|
71
90
|
* Gets the context size of a particular model
|
|
@@ -2,9 +2,15 @@ import { DEFAULT_CTX } from "../constants";
|
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
4
|
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
5
|
+
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
5
6
|
import { rpc } from "../tools/retriever";
|
|
6
7
|
import { BaseModel } from "./baseModel";
|
|
7
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Represents a model in llama-server router mode.
|
|
11
|
+
* Tracks per-model status from the /models endpoint and extracts
|
|
12
|
+
* context size from startup arguments when the model is not loaded.
|
|
13
|
+
*/
|
|
8
14
|
export class RouterModel extends BaseModel {
|
|
9
15
|
get mode(): Mode {
|
|
10
16
|
return Mode.ROUTER;
|
|
@@ -17,7 +23,20 @@ export class RouterModel extends BaseModel {
|
|
|
17
23
|
|
|
18
24
|
const status = this.statusMapper[model.status!.value];
|
|
19
25
|
if (status === Status.UNLOADED) {
|
|
20
|
-
if (this.model.status!.failed)
|
|
26
|
+
if (this.model.status!.failed) {
|
|
27
|
+
/**
|
|
28
|
+
* Workaround for the currently-bugged /models status detection
|
|
29
|
+
* (I suspect it was introduced in PR #22683 of llama.cpp)
|
|
30
|
+
*
|
|
31
|
+
* This workaround will show an eternal "loading" status when the model's real status
|
|
32
|
+
* is "failed", which is acceptable, because models in "failed" or "loading" status
|
|
33
|
+
* shouldn't be used.
|
|
34
|
+
*
|
|
35
|
+
* In exchange, it will allow unloaded models to be correctly shown as "unloaded".
|
|
36
|
+
*/
|
|
37
|
+
// return Status.FAILED; // <-- Original implementation
|
|
38
|
+
return await super.getStatus();
|
|
39
|
+
}
|
|
21
40
|
|
|
22
41
|
return Status.UNLOADED;
|
|
23
42
|
}
|
|
@@ -25,6 +44,28 @@ export class RouterModel extends BaseModel {
|
|
|
25
44
|
return status;
|
|
26
45
|
}
|
|
27
46
|
|
|
47
|
+
/**
|
|
48
|
+
* Workaround for the currently-bugged /models status detection
|
|
49
|
+
* (I suspect it was introduced in PR #22683 of llama.cpp)
|
|
50
|
+
*
|
|
51
|
+
* @returns The detected status
|
|
52
|
+
*/
|
|
53
|
+
private async getStatusWorkaround(): Promise<Status> {
|
|
54
|
+
try {
|
|
55
|
+
const { is_sleeping, error } = await rpc<PropsEndpoint>(
|
|
56
|
+
`/props?model=${this.id}`,
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
if (is_sleeping) return Status.SLEEPING;
|
|
60
|
+
if (!error) return Status.LOADED;
|
|
61
|
+
if (error.code === 503) return Status.LOADING;
|
|
62
|
+
|
|
63
|
+
return Status.UNLOADED;
|
|
64
|
+
} catch (err) {
|
|
65
|
+
return Status.FAILED;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
28
69
|
async getCapabilities(): Promise<["text"] | ["image"]> {
|
|
29
70
|
// We can get the real capabilities if the model is already loaded
|
|
30
71
|
if ((await this.getStatus()) === Status.LOADED) {
|
|
@@ -1,19 +1,8 @@
|
|
|
1
1
|
import { Mode } from "../enums/mode";
|
|
2
|
-
import { Status } from "../enums/status";
|
|
3
|
-
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
4
|
-
import { rpc } from "../tools/retriever";
|
|
5
2
|
import { BaseModel } from "./baseModel";
|
|
6
3
|
|
|
7
4
|
export class SingleModel extends BaseModel {
|
|
8
5
|
get mode(): Mode {
|
|
9
6
|
return Mode.SINGLE;
|
|
10
7
|
}
|
|
11
|
-
|
|
12
|
-
async getStatus(): Promise<Status> {
|
|
13
|
-
// In single-mode, the extension will only work when the model is fully loaded
|
|
14
|
-
const { is_sleeping } = await rpc<PropsEndpoint>("/props");
|
|
15
|
-
if (is_sleeping) return Status.SLEEPING;
|
|
16
|
-
|
|
17
|
-
return Status.LOADED;
|
|
18
|
-
}
|
|
19
8
|
}
|
package/src/tools/resolver.ts
CHANGED
|
@@ -25,9 +25,9 @@ const fileExists = async (filePath: string): Promise<boolean> => {
|
|
|
25
25
|
};
|
|
26
26
|
|
|
27
27
|
/**
|
|
28
|
-
* Reads the contents of a file as JSON
|
|
29
|
-
* @param filePath The path
|
|
30
|
-
* @returns The content
|
|
28
|
+
* Reads and parses the contents of a file as JSON
|
|
29
|
+
* @param filePath The path to the file
|
|
30
|
+
* @returns The parsed content, or null if parsing fails
|
|
31
31
|
*/
|
|
32
32
|
const readContents = async <T>(filePath: string): Promise<T | null> => {
|
|
33
33
|
const raw = await readFile(filePath, "utf-8");
|
|
@@ -41,10 +41,10 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
|
|
|
41
41
|
};
|
|
42
42
|
|
|
43
43
|
/**
|
|
44
|
-
* Reads a
|
|
44
|
+
* Reads a value from a JSON config file by key
|
|
45
45
|
* @param filePath Path to the JSON config file
|
|
46
46
|
* @param key Key to extract from the parsed JSON
|
|
47
|
-
* @returns The
|
|
47
|
+
* @returns The value at the given key, or null if file/key missing or invalid
|
|
48
48
|
*/
|
|
49
49
|
const readConfigValue = async <T>(
|
|
50
50
|
filePath: string,
|
package/src/tools/retriever.ts
CHANGED
|
@@ -19,10 +19,11 @@ export const isServerReady = async (): Promise<boolean> => {
|
|
|
19
19
|
};
|
|
20
20
|
|
|
21
21
|
/**
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* @param
|
|
25
|
-
* @
|
|
22
|
+
* Makes an HTTP request to the llama-server and returns the parsed JSON response
|
|
23
|
+
*
|
|
24
|
+
* @param endpoint The endpoint path to fetch (e.g. "/health")
|
|
25
|
+
* @param body The optional request body for POST requests
|
|
26
|
+
* @returns The parsed JSON response from the server
|
|
26
27
|
*/
|
|
27
28
|
export const rpc = async <T>(
|
|
28
29
|
endpoint: string,
|
|
@@ -46,11 +47,8 @@ export const rpc = async <T>(
|
|
|
46
47
|
},
|
|
47
48
|
});
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
throw new Error(`${res.status}: ${text}`);
|
|
52
|
-
}
|
|
53
|
-
return res.json() as T;
|
|
50
|
+
const response: T = await res.json();
|
|
51
|
+
return response;
|
|
54
52
|
};
|
|
55
53
|
|
|
56
54
|
/**
|
|
@@ -62,8 +60,7 @@ export const listModels = async (): Promise<BaseModel[]> => {
|
|
|
62
60
|
const { models, data } = await rpc<ModelsEndpoint>("/models");
|
|
63
61
|
|
|
64
62
|
if (models) {
|
|
65
|
-
|
|
66
|
-
return data.map((m) => new SingleModel(m, extra));
|
|
63
|
+
return data.map((m) => new SingleModel(m));
|
|
67
64
|
}
|
|
68
65
|
|
|
69
66
|
const response = data
|
|
@@ -115,7 +115,18 @@ describe("RouterModel context size extraction", () => {
|
|
|
115
115
|
data: [
|
|
116
116
|
{
|
|
117
117
|
id: "test-model",
|
|
118
|
-
status: {
|
|
118
|
+
status: {
|
|
119
|
+
value: "loaded",
|
|
120
|
+
args: [
|
|
121
|
+
"--model",
|
|
122
|
+
"gguf",
|
|
123
|
+
"--ctx-size",
|
|
124
|
+
"4096",
|
|
125
|
+
"--fit-ctx",
|
|
126
|
+
"8192",
|
|
127
|
+
],
|
|
128
|
+
preset: "default",
|
|
129
|
+
},
|
|
119
130
|
},
|
|
120
131
|
],
|
|
121
132
|
});
|
|
@@ -149,7 +160,11 @@ describe("RouterModel context size extraction", () => {
|
|
|
149
160
|
data: [
|
|
150
161
|
{
|
|
151
162
|
id: "test-model",
|
|
152
|
-
status: {
|
|
163
|
+
status: {
|
|
164
|
+
value: "loaded",
|
|
165
|
+
args: ["--model", "gguf"],
|
|
166
|
+
preset: "default",
|
|
167
|
+
},
|
|
153
168
|
},
|
|
154
169
|
],
|
|
155
170
|
});
|
|
@@ -186,7 +201,12 @@ describe("RouterModel capabilities detection", () => {
|
|
|
186
201
|
data: [
|
|
187
202
|
{
|
|
188
203
|
id: "test-model",
|
|
189
|
-
status: {
|
|
204
|
+
status: {
|
|
205
|
+
value: "loaded",
|
|
206
|
+
args: [],
|
|
207
|
+
preset: "default",
|
|
208
|
+
failed: false,
|
|
209
|
+
},
|
|
190
210
|
},
|
|
191
211
|
],
|
|
192
212
|
});
|
|
@@ -206,7 +226,12 @@ describe("RouterModel capabilities detection", () => {
|
|
|
206
226
|
data: [
|
|
207
227
|
{
|
|
208
228
|
id: "test-model",
|
|
209
|
-
status: {
|
|
229
|
+
status: {
|
|
230
|
+
value: "loaded",
|
|
231
|
+
args: [],
|
|
232
|
+
preset: "default",
|
|
233
|
+
failed: false,
|
|
234
|
+
},
|
|
210
235
|
},
|
|
211
236
|
],
|
|
212
237
|
});
|
|
@@ -225,7 +250,12 @@ describe("RouterModel capabilities detection", () => {
|
|
|
225
250
|
data: [
|
|
226
251
|
{
|
|
227
252
|
id: "test-model",
|
|
228
|
-
status: {
|
|
253
|
+
status: {
|
|
254
|
+
value: "loaded",
|
|
255
|
+
args: [],
|
|
256
|
+
preset: "default",
|
|
257
|
+
failed: false,
|
|
258
|
+
},
|
|
229
259
|
},
|
|
230
260
|
],
|
|
231
261
|
});
|
|
@@ -244,14 +274,24 @@ describe("RouterModel capabilities detection", () => {
|
|
|
244
274
|
data: [
|
|
245
275
|
{
|
|
246
276
|
id: "test-model",
|
|
247
|
-
status: {
|
|
277
|
+
status: {
|
|
278
|
+
value: "unloaded",
|
|
279
|
+
args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
|
|
280
|
+
preset: "default",
|
|
281
|
+
failed: false,
|
|
282
|
+
},
|
|
248
283
|
},
|
|
249
284
|
],
|
|
250
285
|
});
|
|
251
286
|
|
|
252
287
|
const model = new RouterModel(
|
|
253
288
|
createModel({
|
|
254
|
-
status: {
|
|
289
|
+
status: {
|
|
290
|
+
value: "unloaded",
|
|
291
|
+
args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
|
|
292
|
+
preset: "default",
|
|
293
|
+
failed: false,
|
|
294
|
+
},
|
|
255
295
|
}),
|
|
256
296
|
);
|
|
257
297
|
const capabilities = await model.getCapabilities();
|
|
@@ -265,14 +305,24 @@ describe("RouterModel capabilities detection", () => {
|
|
|
265
305
|
data: [
|
|
266
306
|
{
|
|
267
307
|
id: "test-model",
|
|
268
|
-
status: {
|
|
308
|
+
status: {
|
|
309
|
+
value: "unloaded",
|
|
310
|
+
args: ["--model", "gguf"],
|
|
311
|
+
preset: "default",
|
|
312
|
+
failed: false,
|
|
313
|
+
},
|
|
269
314
|
},
|
|
270
315
|
],
|
|
271
316
|
});
|
|
272
317
|
|
|
273
318
|
const model = new RouterModel(
|
|
274
319
|
createModel({
|
|
275
|
-
status: {
|
|
320
|
+
status: {
|
|
321
|
+
value: "unloaded",
|
|
322
|
+
args: ["--model", "gguf"],
|
|
323
|
+
preset: "default",
|
|
324
|
+
failed: false,
|
|
325
|
+
},
|
|
276
326
|
}),
|
|
277
327
|
);
|
|
278
328
|
const capabilities = await model.getCapabilities();
|
|
@@ -18,36 +18,13 @@ beforeEach(() => {
|
|
|
18
18
|
});
|
|
19
19
|
|
|
20
20
|
const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
|
|
21
|
-
new SingleModel(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
},
|
|
29
|
-
{
|
|
30
|
-
name: "test",
|
|
31
|
-
model: "test.gguf",
|
|
32
|
-
modified_at: new Date().toISOString(),
|
|
33
|
-
size: "1B",
|
|
34
|
-
digest: "abc123",
|
|
35
|
-
type: "model",
|
|
36
|
-
description: "test",
|
|
37
|
-
tags: [],
|
|
38
|
-
capabilities: [],
|
|
39
|
-
parameters: "",
|
|
40
|
-
details: {
|
|
41
|
-
parent_model: "",
|
|
42
|
-
format: "",
|
|
43
|
-
family: "",
|
|
44
|
-
families: [],
|
|
45
|
-
parameter_size: "",
|
|
46
|
-
quantization_level: "",
|
|
47
|
-
},
|
|
48
|
-
...extra,
|
|
49
|
-
},
|
|
50
|
-
);
|
|
21
|
+
new SingleModel({
|
|
22
|
+
id: "test",
|
|
23
|
+
tags: [],
|
|
24
|
+
object: "model",
|
|
25
|
+
owned_by: "test",
|
|
26
|
+
created: Date.now(),
|
|
27
|
+
});
|
|
51
28
|
|
|
52
29
|
describe("SingleModel mode", () => {
|
|
53
30
|
it("should always return SINGLE mode", () => {
|
|
@@ -94,7 +71,7 @@ describe("SingleModel getStatus", () => {
|
|
|
94
71
|
const status = await model.getStatus();
|
|
95
72
|
|
|
96
73
|
expect(status).toBe(Status.LOADED);
|
|
97
|
-
expect(mockRpc).toHaveBeenCalledWith(
|
|
74
|
+
expect(mockRpc).toHaveBeenCalledWith(`/props?model=${model.id}`);
|
|
98
75
|
});
|
|
99
76
|
|
|
100
77
|
it("should return SLEEPING when is_sleeping is true", async () => {
|