pi-llama-cpp 0.2.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/package.json +2 -2
- package/src/{handlers.ts → commands/models.ts} +21 -8
- package/src/events.ts +1 -1
- package/src/index.ts +12 -25
- package/src/interfaces/endpoints/models.ts +1 -0
- package/src/models/baseModel.ts +48 -18
- package/src/models/routerModel.ts +25 -15
- package/src/models/singleModel.ts +0 -31
- package/src/tools/provider.ts +28 -0
- package/src/tools/resolver.ts +5 -5
- package/src/tools/retriever.ts +8 -11
- package/tests/routerModel.test.ts +136 -23
- package/tests/singleModel.test.ts +34 -17
- package/tsconfig.json +5 -6
- package/src/interfaces/endpoints/slots.ts +0 -15
package/README.md
CHANGED
|
@@ -99,8 +99,8 @@ llama-server --model path/to/model.gguf ...
|
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
The extension determines the context size as follows:
|
|
102
|
-
- **Router mode** — reads from the
|
|
103
|
-
- **Single mode** — reads from the `/
|
|
102
|
+
- **Router mode** — when loaded, reads `meta.n_ctx` from the `/models` endpoint; when not loaded, reads `--ctx-size` and/or `--fit-ctx` from the model's status `args` array
|
|
103
|
+
- **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
|
|
104
104
|
- Falls back to `128000` if not available
|
|
105
105
|
|
|
106
106
|
### Commands
|
|
@@ -145,6 +145,6 @@ Each model exposed to Pi includes the following defaults:
|
|
|
145
145
|
|
|
146
146
|
## Dependencies
|
|
147
147
|
|
|
148
|
-
| Dependency
|
|
149
|
-
|
|
|
150
|
-
| `@
|
|
148
|
+
| Dependency | Purpose |
|
|
149
|
+
| --------------------------------- | ------------------------------------- |
|
|
150
|
+
| `@earendil-works/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-llama-cpp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
]
|
|
34
34
|
},
|
|
35
35
|
"peerDependencies": {
|
|
36
|
-
"@
|
|
36
|
+
"@earendil-works/pi-coding-agent": "*"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"@types/node": "^25.6.0",
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
ExtensionAPI,
|
|
3
3
|
ExtensionCommandContext,
|
|
4
|
-
} from "@
|
|
5
|
-
import { PROVIDER_ID, PROVIDER_NAME } from "
|
|
6
|
-
import { Action } from "
|
|
7
|
-
import { Mode } from "
|
|
8
|
-
import { Status } from "
|
|
9
|
-
import { BaseModel } from "
|
|
4
|
+
} from "@earendil-works/pi-coding-agent";
|
|
5
|
+
import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
|
|
6
|
+
import { Action } from "../enums/action";
|
|
7
|
+
import { Mode } from "../enums/mode";
|
|
8
|
+
import { Status } from "../enums/status";
|
|
9
|
+
import { BaseModel } from "../models/baseModel";
|
|
10
|
+
import { resolveUrl } from "../tools/resolver";
|
|
10
11
|
|
|
11
12
|
/**
|
|
12
13
|
* Select a model from the list. Returns null if user cancels.
|
|
@@ -30,7 +31,7 @@ const selectModel = async (
|
|
|
30
31
|
* Get available actions for a model based on its mode and status.
|
|
31
32
|
*
|
|
32
33
|
* @param model The selected model
|
|
33
|
-
* @returns
|
|
34
|
+
* @returns The array of available actions for the given model status
|
|
34
35
|
*/
|
|
35
36
|
const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
|
|
36
37
|
const routerModeActions: Record<Status, Array<Action>> = {
|
|
@@ -109,13 +110,25 @@ const modelSelectionHandler = async (
|
|
|
109
110
|
}
|
|
110
111
|
};
|
|
111
112
|
|
|
113
|
+
/**
|
|
114
|
+
* Handles the /models command when the server is unreachable.
|
|
115
|
+
*
|
|
116
|
+
* @param ctx The context used by Pi
|
|
117
|
+
*/
|
|
118
|
+
export const notFoundCommand = async (
|
|
119
|
+
ctx: ExtensionCommandContext,
|
|
120
|
+
): Promise<void> => {
|
|
121
|
+
const url = await resolveUrl(ctx.cwd);
|
|
122
|
+
ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
|
|
123
|
+
};
|
|
124
|
+
|
|
112
125
|
/**
|
|
113
126
|
* Handles the /models command
|
|
114
127
|
*
|
|
115
128
|
* @param ctx The context used by Pi
|
|
116
129
|
* @param pi The Pi extension
|
|
117
130
|
*/
|
|
118
|
-
export const
|
|
131
|
+
export const modelsCommand = async (
|
|
119
132
|
ctx: ExtensionCommandContext,
|
|
120
133
|
pi: ExtensionAPI,
|
|
121
134
|
models: BaseModel[],
|
package/src/events.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ExtensionContext } from "@
|
|
1
|
+
import { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import { PROVIDER_ID } from "./constants";
|
|
3
3
|
import { ModelSelectEvent } from "./interfaces/events";
|
|
4
4
|
import { listModels } from "./tools/retriever";
|
package/src/index.ts
CHANGED
|
@@ -1,47 +1,34 @@
|
|
|
1
1
|
import type {
|
|
2
2
|
ExtensionAPI,
|
|
3
3
|
ExtensionCommandContext,
|
|
4
|
-
} from "@
|
|
5
|
-
import {
|
|
4
|
+
} from "@earendil-works/pi-coding-agent";
|
|
5
|
+
import { modelsCommand, notFoundCommand } from "./commands/models";
|
|
6
|
+
import { PROVIDER_NAME } from "./constants";
|
|
6
7
|
import { onModelSelect } from "./events";
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import { isServerReady, listModels } from "./tools/retriever";
|
|
8
|
+
import { registerLlamaCppProvider } from "./tools/provider";
|
|
9
|
+
import { isServerReady } from "./tools/retriever";
|
|
10
10
|
|
|
11
11
|
export default async function (pi: ExtensionAPI) {
|
|
12
|
-
//
|
|
12
|
+
// Server verification
|
|
13
13
|
if (!(await isServerReady())) {
|
|
14
14
|
pi.registerCommand("models", {
|
|
15
15
|
description: `${PROVIDER_NAME} models (offline)`,
|
|
16
|
-
handler: async (
|
|
17
|
-
|
|
18
|
-
ctx: ExtensionCommandContext,
|
|
19
|
-
): Promise<void> => {
|
|
20
|
-
const url = await resolveUrl(ctx.cwd);
|
|
21
|
-
ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
|
|
16
|
+
handler: async (_: string, ctx: ExtensionCommandContext) => {
|
|
17
|
+
await notFoundCommand(ctx);
|
|
22
18
|
},
|
|
23
19
|
});
|
|
24
20
|
|
|
25
21
|
return;
|
|
26
22
|
}
|
|
27
23
|
|
|
28
|
-
|
|
29
|
-
const
|
|
30
|
-
const serverModels = await listModels();
|
|
24
|
+
// Provider registration
|
|
25
|
+
const serverModels = await registerLlamaCppProvider(pi);
|
|
31
26
|
|
|
27
|
+
// Command: /models
|
|
32
28
|
pi.registerCommand("models", {
|
|
33
29
|
description: `Browse ${PROVIDER_NAME} models (live status)`,
|
|
34
30
|
handler: async (_: string, ctx: ExtensionCommandContext) =>
|
|
35
|
-
await
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
// Provider registration
|
|
39
|
-
pi.registerProvider(PROVIDER_ID, {
|
|
40
|
-
name: PROVIDER_NAME,
|
|
41
|
-
baseUrl: `${url}/v1`,
|
|
42
|
-
api: "openai-completions",
|
|
43
|
-
apiKey: await resolveApiKey(),
|
|
44
|
-
models: await Promise.all(serverModels.map((m) => m.toProviderConfig())),
|
|
31
|
+
await modelsCommand(ctx, pi, serverModels),
|
|
45
32
|
});
|
|
46
33
|
|
|
47
34
|
// Events registration
|
package/src/models/baseModel.ts
CHANGED
|
@@ -1,10 +1,21 @@
|
|
|
1
|
-
import type { ProviderModelConfig } from "@
|
|
2
|
-
import {
|
|
1
|
+
import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import {
|
|
3
|
+
DEFAULT_CTX,
|
|
4
|
+
MAX_TOKENS,
|
|
5
|
+
POLLING_INTERVAL,
|
|
6
|
+
POLLING_TIMEOUT,
|
|
7
|
+
} from "../constants";
|
|
3
8
|
import { Mode } from "../enums/mode";
|
|
4
9
|
import { Status } from "../enums/status";
|
|
5
|
-
import { DataProperty } from "../interfaces/endpoints/models";
|
|
10
|
+
import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
11
|
+
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
6
12
|
import { rpc } from "../tools/retriever";
|
|
7
13
|
|
|
14
|
+
/**
|
|
15
|
+
* Abstract base class for llama-server models.
|
|
16
|
+
* Provides common functionality for model identification, status checking,
|
|
17
|
+
* loading/unloading, and configuration conversion.
|
|
18
|
+
*/
|
|
8
19
|
export abstract class BaseModel {
|
|
9
20
|
constructor(protected readonly model: DataProperty) {}
|
|
10
21
|
|
|
@@ -40,9 +51,21 @@ export abstract class BaseModel {
|
|
|
40
51
|
}
|
|
41
52
|
|
|
42
53
|
/**
|
|
43
|
-
* Detects
|
|
54
|
+
* Detects the capabilities of the model
|
|
55
|
+
*
|
|
56
|
+
* @returns An array of capabilities, as expected by Pi
|
|
44
57
|
*/
|
|
45
|
-
|
|
58
|
+
async getCapabilities(): Promise<["text"] | ["image"]> {
|
|
59
|
+
try {
|
|
60
|
+
const { modalities } = await rpc<PropsEndpoint>(
|
|
61
|
+
`/props?model=${this.id}`,
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
return modalities.vision ? ["image"] : ["text"];
|
|
65
|
+
} catch {
|
|
66
|
+
return ["text"];
|
|
67
|
+
}
|
|
68
|
+
}
|
|
46
69
|
|
|
47
70
|
/**
|
|
48
71
|
* Gets the load status of the model
|
|
@@ -52,7 +75,17 @@ export abstract class BaseModel {
|
|
|
52
75
|
/**
|
|
53
76
|
* Gets the context size of a particular model
|
|
54
77
|
*/
|
|
55
|
-
|
|
78
|
+
async getContextSize(): Promise<number> {
|
|
79
|
+
try {
|
|
80
|
+
const { data } = await rpc<ModelsEndpoint>(`/models`);
|
|
81
|
+
const model = data.find((d) => d.id === this.id);
|
|
82
|
+
|
|
83
|
+
const response = model?.meta?.n_ctx;
|
|
84
|
+
return response ?? DEFAULT_CTX;
|
|
85
|
+
} catch {
|
|
86
|
+
return DEFAULT_CTX;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
56
89
|
|
|
57
90
|
/**
|
|
58
91
|
* Sets up a label for the model selection screen
|
|
@@ -72,7 +105,7 @@ export abstract class BaseModel {
|
|
|
72
105
|
`ID : ${this.id}`,
|
|
73
106
|
`Model : ${this.name}`,
|
|
74
107
|
`Reasoning : ${this.reasoning}`,
|
|
75
|
-
`Capabilities : ${this.
|
|
108
|
+
`Capabilities : ${(await this.getCapabilities()).join(", ")}`,
|
|
76
109
|
`Context size : ${await this.getContextSize()}`,
|
|
77
110
|
`Status : ${await this.getStatus()}`,
|
|
78
111
|
];
|
|
@@ -90,7 +123,7 @@ export abstract class BaseModel {
|
|
|
90
123
|
id: this.id,
|
|
91
124
|
name: this.name,
|
|
92
125
|
reasoning: this.reasoning,
|
|
93
|
-
input: this.
|
|
126
|
+
input: await this.getCapabilities(),
|
|
94
127
|
contextWindow: await this.getContextSize(),
|
|
95
128
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
96
129
|
maxTokens: MAX_TOKENS,
|
|
@@ -122,16 +155,13 @@ export abstract class BaseModel {
|
|
|
122
155
|
* @param startTime The initial polling timestamp
|
|
123
156
|
*/
|
|
124
157
|
async pollStatus(startTime = Date.now()): Promise<void> {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
158
|
+
while ((await this.getStatus()) === Status.LOADING) {
|
|
159
|
+
// Force a timeout if we wasted too much time polling
|
|
160
|
+
if (Date.now() - startTime > POLLING_TIMEOUT) {
|
|
161
|
+
const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
|
|
162
|
+
throw new Error(message);
|
|
163
|
+
}
|
|
164
|
+
await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
|
|
132
165
|
}
|
|
133
|
-
|
|
134
|
-
await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
|
|
135
|
-
await this.pollStatus(startTime);
|
|
136
166
|
}
|
|
137
167
|
}
|
|
@@ -1,24 +1,20 @@
|
|
|
1
1
|
import { DEFAULT_CTX } from "../constants";
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
|
-
import {
|
|
4
|
+
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
5
5
|
import { rpc } from "../tools/retriever";
|
|
6
6
|
import { BaseModel } from "./baseModel";
|
|
7
7
|
|
|
8
|
+
/**
|
|
9
|
+
* Represents a model in llama-server router mode.
|
|
10
|
+
* Tracks per-model status from the /models endpoint and extracts
|
|
11
|
+
* context size from startup arguments when the model is not loaded.
|
|
12
|
+
*/
|
|
8
13
|
export class RouterModel extends BaseModel {
|
|
9
|
-
constructor(protected readonly model: DataProperty) {
|
|
10
|
-
super(model);
|
|
11
|
-
}
|
|
12
|
-
|
|
13
14
|
get mode(): Mode {
|
|
14
15
|
return Mode.ROUTER;
|
|
15
16
|
}
|
|
16
17
|
|
|
17
|
-
get capabilities(): ["text"] | ["image"] {
|
|
18
|
-
const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
|
|
19
|
-
return hasImage ? ["image"] : ["text"];
|
|
20
|
-
}
|
|
21
|
-
|
|
22
18
|
async getStatus(): Promise<Status> {
|
|
23
19
|
const { data } = await rpc<ModelsEndpoint>("/models");
|
|
24
20
|
const model = data.find((m) => m.id === this.id);
|
|
@@ -34,14 +30,28 @@ export class RouterModel extends BaseModel {
|
|
|
34
30
|
return status;
|
|
35
31
|
}
|
|
36
32
|
|
|
33
|
+
async getCapabilities(): Promise<["text"] | ["image"]> {
|
|
34
|
+
// We can get the real capabilities if the model is already loaded
|
|
35
|
+
if ((await this.getStatus()) === Status.LOADED) {
|
|
36
|
+
return super.getCapabilities();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
|
|
40
|
+
return hasImage ? ["image"] : ["text"];
|
|
41
|
+
}
|
|
42
|
+
|
|
37
43
|
async getContextSize(): Promise<number> {
|
|
38
|
-
|
|
39
|
-
if (
|
|
44
|
+
// We can get a more accurate context size if the model is already loaded
|
|
45
|
+
if ((await this.getStatus()) === Status.LOADED) {
|
|
46
|
+
return super.getContextSize();
|
|
47
|
+
}
|
|
40
48
|
|
|
41
|
-
response =
|
|
42
|
-
|
|
49
|
+
const response =
|
|
50
|
+
this.extractFrom("--ctx-size") ??
|
|
51
|
+
this.extractFrom("--fit-ctx") ??
|
|
52
|
+
DEFAULT_CTX;
|
|
43
53
|
|
|
44
|
-
return
|
|
54
|
+
return response;
|
|
45
55
|
}
|
|
46
56
|
|
|
47
57
|
/**
|
|
@@ -1,31 +1,14 @@
|
|
|
1
|
-
import { DEFAULT_CTX } from "../constants";
|
|
2
1
|
import { Mode } from "../enums/mode";
|
|
3
2
|
import { Status } from "../enums/status";
|
|
4
|
-
import { DataProperty, ModelProperty } from "../interfaces/endpoints/models";
|
|
5
3
|
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
6
|
-
import { SlotsEndpoint } from "../interfaces/endpoints/slots";
|
|
7
4
|
import { rpc } from "../tools/retriever";
|
|
8
5
|
import { BaseModel } from "./baseModel";
|
|
9
6
|
|
|
10
7
|
export class SingleModel extends BaseModel {
|
|
11
|
-
private contextSize?: number;
|
|
12
|
-
|
|
13
|
-
constructor(
|
|
14
|
-
protected readonly model: DataProperty,
|
|
15
|
-
private readonly extra: ModelProperty,
|
|
16
|
-
) {
|
|
17
|
-
super(model);
|
|
18
|
-
}
|
|
19
|
-
|
|
20
8
|
get mode(): Mode {
|
|
21
9
|
return Mode.SINGLE;
|
|
22
10
|
}
|
|
23
11
|
|
|
24
|
-
get capabilities(): ["text"] | ["image"] {
|
|
25
|
-
const hasImage = this.extra.capabilities.includes("multimodal");
|
|
26
|
-
return hasImage ? ["image"] : ["text"];
|
|
27
|
-
}
|
|
28
|
-
|
|
29
12
|
async getStatus(): Promise<Status> {
|
|
30
13
|
// In single-mode, the extension will only work when the model is fully loaded
|
|
31
14
|
const { is_sleeping } = await rpc<PropsEndpoint>("/props");
|
|
@@ -33,18 +16,4 @@ export class SingleModel extends BaseModel {
|
|
|
33
16
|
|
|
34
17
|
return Status.LOADED;
|
|
35
18
|
}
|
|
36
|
-
|
|
37
|
-
async getContextSize(): Promise<number> {
|
|
38
|
-
// Avoid calling the endpoint if we already have the value
|
|
39
|
-
if (this.contextSize) return this.contextSize;
|
|
40
|
-
|
|
41
|
-
try {
|
|
42
|
-
const [{ n_ctx }] = await rpc<SlotsEndpoint[]>("/slots");
|
|
43
|
-
this.contextSize = n_ctx;
|
|
44
|
-
|
|
45
|
-
return this.contextSize;
|
|
46
|
-
} catch {
|
|
47
|
-
return DEFAULT_CTX;
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
19
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
|
|
3
|
+
import type { BaseModel } from "../models/baseModel";
|
|
4
|
+
import { resolveApiKey, resolveUrl } from "./resolver";
|
|
5
|
+
import { listModels } from "./retriever";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Registers the Llama.cpp provider and returns the fetched models.
|
|
9
|
+
*
|
|
10
|
+
* @param pi The Pi extension API
|
|
11
|
+
* @returns The list of models fetched from the server
|
|
12
|
+
*/
|
|
13
|
+
export const registerLlamaCppProvider = async (
|
|
14
|
+
pi: ExtensionAPI,
|
|
15
|
+
): Promise<BaseModel[]> => {
|
|
16
|
+
const baseUrl = `${await resolveUrl(process.cwd())}/v1`;
|
|
17
|
+
const models = await listModels();
|
|
18
|
+
|
|
19
|
+
pi.registerProvider(PROVIDER_ID, {
|
|
20
|
+
name: PROVIDER_NAME,
|
|
21
|
+
baseUrl,
|
|
22
|
+
api: "openai-completions",
|
|
23
|
+
apiKey: await resolveApiKey(),
|
|
24
|
+
models: await Promise.all(models.map((m) => m.toProviderConfig())),
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
return models;
|
|
28
|
+
};
|
package/src/tools/resolver.ts
CHANGED
|
@@ -25,9 +25,9 @@ const fileExists = async (filePath: string): Promise<boolean> => {
|
|
|
25
25
|
};
|
|
26
26
|
|
|
27
27
|
/**
|
|
28
|
-
* Reads the contents of a file as JSON
|
|
29
|
-
* @param filePath The path
|
|
30
|
-
* @returns The content
|
|
28
|
+
* Reads and parses the contents of a file as JSON
|
|
29
|
+
* @param filePath The path to the file
|
|
30
|
+
* @returns The parsed content, or null if parsing fails
|
|
31
31
|
*/
|
|
32
32
|
const readContents = async <T>(filePath: string): Promise<T | null> => {
|
|
33
33
|
const raw = await readFile(filePath, "utf-8");
|
|
@@ -41,10 +41,10 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
|
|
|
41
41
|
};
|
|
42
42
|
|
|
43
43
|
/**
|
|
44
|
-
* Reads a
|
|
44
|
+
* Reads a value from a JSON config file by key
|
|
45
45
|
* @param filePath Path to the JSON config file
|
|
46
46
|
* @param key Key to extract from the parsed JSON
|
|
47
|
-
* @returns The
|
|
47
|
+
* @returns The value at the given key, or null if file/key missing or invalid
|
|
48
48
|
*/
|
|
49
49
|
const readConfigValue = async <T>(
|
|
50
50
|
filePath: string,
|
package/src/tools/retriever.ts
CHANGED
|
@@ -19,10 +19,11 @@ export const isServerReady = async (): Promise<boolean> => {
|
|
|
19
19
|
};
|
|
20
20
|
|
|
21
21
|
/**
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* @param
|
|
25
|
-
* @
|
|
22
|
+
* Makes an HTTP request to the llama-server and returns the parsed JSON response
|
|
23
|
+
*
|
|
24
|
+
* @param endpoint The endpoint path to fetch (e.g. "/health")
|
|
25
|
+
* @param body The optional request body for POST requests
|
|
26
|
+
* @returns The parsed JSON response from the server
|
|
26
27
|
*/
|
|
27
28
|
export const rpc = async <T>(
|
|
28
29
|
endpoint: string,
|
|
@@ -46,11 +47,8 @@ export const rpc = async <T>(
|
|
|
46
47
|
},
|
|
47
48
|
});
|
|
48
49
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
throw new Error(`${res.status}: ${text}`);
|
|
52
|
-
}
|
|
53
|
-
return res.json() as T;
|
|
50
|
+
const response: T = await res.json();
|
|
51
|
+
return response;
|
|
54
52
|
};
|
|
55
53
|
|
|
56
54
|
/**
|
|
@@ -62,8 +60,7 @@ export const listModels = async (): Promise<BaseModel[]> => {
|
|
|
62
60
|
const { models, data } = await rpc<ModelsEndpoint>("/models");
|
|
63
61
|
|
|
64
62
|
if (models) {
|
|
65
|
-
|
|
66
|
-
return data.map((m) => new SingleModel(m, extra));
|
|
63
|
+
return data.map((m) => new SingleModel(m));
|
|
67
64
|
}
|
|
68
65
|
|
|
69
66
|
const response = data
|
|
@@ -1,8 +1,17 @@
|
|
|
1
|
-
import { describe, expect, it } from "vitest";
|
|
1
|
+
import { describe, expect, it, vi } from "vitest";
|
|
2
2
|
import { Mode } from "../src/enums/mode";
|
|
3
3
|
import { DataProperty } from "../src/interfaces/endpoints/models";
|
|
4
4
|
import { RouterModel } from "../src/models/routerModel";
|
|
5
5
|
|
|
6
|
+
// Mock the retriever module before importing anything that depends on it
|
|
7
|
+
const mockRpc = vi.fn();
|
|
8
|
+
|
|
9
|
+
vi.mock("../src/tools/retriever", () => ({
|
|
10
|
+
rpc: (...args: unknown[]) => mockRpc(...args),
|
|
11
|
+
isServerReady: vi.fn(),
|
|
12
|
+
listModels: vi.fn(),
|
|
13
|
+
}));
|
|
14
|
+
|
|
6
15
|
// Helper to create a mock DataProperty
|
|
7
16
|
const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
|
|
8
17
|
id: "test-model",
|
|
@@ -11,6 +20,7 @@ const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
|
|
|
11
20
|
object: "model",
|
|
12
21
|
owned_by: "test",
|
|
13
22
|
created: Date.now(),
|
|
23
|
+
status: { value: "loaded", args: [], preset: "default", failed: false },
|
|
14
24
|
...overrides,
|
|
15
25
|
});
|
|
16
26
|
|
|
@@ -99,7 +109,26 @@ describe("RouterModel context size extraction", () => {
|
|
|
99
109
|
expect(extractFrom("--ctx-size")).toBeNull();
|
|
100
110
|
});
|
|
101
111
|
|
|
102
|
-
it("should prefer --ctx-size over --fit-ctx", async () => {
|
|
112
|
+
it("should prefer --ctx-size over --fit-ctx when loaded", async () => {
|
|
113
|
+
// First call: getStatus() -> /models
|
|
114
|
+
mockRpc.mockResolvedValueOnce({
|
|
115
|
+
data: [
|
|
116
|
+
{
|
|
117
|
+
id: "test-model",
|
|
118
|
+
status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
|
|
119
|
+
},
|
|
120
|
+
],
|
|
121
|
+
});
|
|
122
|
+
// Second call: super.getContextSize() -> /models with meta.n_ctx
|
|
123
|
+
mockRpc.mockResolvedValueOnce({
|
|
124
|
+
data: [
|
|
125
|
+
{
|
|
126
|
+
id: "test-model",
|
|
127
|
+
meta: { n_ctx: 4096 },
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
});
|
|
131
|
+
|
|
103
132
|
const model = new RouterModel(
|
|
104
133
|
createModel({
|
|
105
134
|
status: {
|
|
@@ -114,7 +143,25 @@ describe("RouterModel context size extraction", () => {
|
|
|
114
143
|
expect(ctxSize).toBe(4096);
|
|
115
144
|
});
|
|
116
145
|
|
|
117
|
-
it("should return DEFAULT_CTX when no context size args are present", async () => {
|
|
146
|
+
it("should return DEFAULT_CTX when no context size args are present and loaded", async () => {
|
|
147
|
+
// First call: getStatus() -> /models
|
|
148
|
+
mockRpc.mockResolvedValueOnce({
|
|
149
|
+
data: [
|
|
150
|
+
{
|
|
151
|
+
id: "test-model",
|
|
152
|
+
status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
});
|
|
156
|
+
// Second call: super.getContextSize() -> /models without meta.n_ctx
|
|
157
|
+
mockRpc.mockResolvedValueOnce({
|
|
158
|
+
data: [
|
|
159
|
+
{
|
|
160
|
+
id: "test-model",
|
|
161
|
+
},
|
|
162
|
+
],
|
|
163
|
+
});
|
|
164
|
+
|
|
118
165
|
const { DEFAULT_CTX } = await import("../src/constants");
|
|
119
166
|
|
|
120
167
|
const model = new RouterModel(
|
|
@@ -133,38 +180,104 @@ describe("RouterModel context size extraction", () => {
|
|
|
133
180
|
});
|
|
134
181
|
|
|
135
182
|
describe("RouterModel capabilities detection", () => {
|
|
136
|
-
it("should detect image capability when
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
preset: "default",
|
|
183
|
+
it("should detect image capability when modalities.vision is true", async () => {
|
|
184
|
+
// getStatus() calls /models first
|
|
185
|
+
mockRpc.mockResolvedValueOnce({
|
|
186
|
+
data: [
|
|
187
|
+
{
|
|
188
|
+
id: "test-model",
|
|
189
|
+
status: { value: "loaded", args: [], preset: "default", failed: false },
|
|
143
190
|
},
|
|
144
|
-
|
|
145
|
-
);
|
|
191
|
+
],
|
|
192
|
+
});
|
|
193
|
+
// super.getCapabilities() calls /props?model=<id>
|
|
194
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
|
|
195
|
+
|
|
196
|
+
const model = new RouterModel(createModel());
|
|
197
|
+
const capabilities = await model.getCapabilities();
|
|
198
|
+
|
|
199
|
+
expect(capabilities).toEqual(["image"]);
|
|
200
|
+
expect(mockRpc).toHaveBeenCalledWith("/props?model=test-model");
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it("should detect text-only capability when modalities.vision is false", async () => {
|
|
204
|
+
// getStatus() calls /models first
|
|
205
|
+
mockRpc.mockResolvedValueOnce({
|
|
206
|
+
data: [
|
|
207
|
+
{
|
|
208
|
+
id: "test-model",
|
|
209
|
+
status: { value: "loaded", args: [], preset: "default", failed: false },
|
|
210
|
+
},
|
|
211
|
+
],
|
|
212
|
+
});
|
|
213
|
+
// super.getCapabilities() calls /props?model=<id>
|
|
214
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
|
|
146
215
|
|
|
147
|
-
|
|
216
|
+
const model = new RouterModel(createModel());
|
|
217
|
+
const capabilities = await model.getCapabilities();
|
|
218
|
+
|
|
219
|
+
expect(capabilities).toEqual(["text"]);
|
|
148
220
|
});
|
|
149
221
|
|
|
150
|
-
it("should
|
|
222
|
+
it("should default to text when /props call fails", async () => {
|
|
223
|
+
// getStatus() calls /models first
|
|
224
|
+
mockRpc.mockResolvedValueOnce({
|
|
225
|
+
data: [
|
|
226
|
+
{
|
|
227
|
+
id: "test-model",
|
|
228
|
+
status: { value: "loaded", args: [], preset: "default", failed: false },
|
|
229
|
+
},
|
|
230
|
+
],
|
|
231
|
+
});
|
|
232
|
+
// super.getCapabilities() calls /props?model=<id> which fails
|
|
233
|
+
mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
|
|
234
|
+
|
|
235
|
+
const model = new RouterModel(createModel());
|
|
236
|
+
const capabilities = await model.getCapabilities();
|
|
237
|
+
|
|
238
|
+
expect(capabilities).toEqual(["text"]);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
it("should use status.args to detect image capability when not loaded", async () => {
|
|
242
|
+
// getStatus() calls /models first, returns unloaded
|
|
243
|
+
mockRpc.mockResolvedValueOnce({
|
|
244
|
+
data: [
|
|
245
|
+
{
|
|
246
|
+
id: "test-model",
|
|
247
|
+
status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
|
|
248
|
+
},
|
|
249
|
+
],
|
|
250
|
+
});
|
|
251
|
+
|
|
151
252
|
const model = new RouterModel(
|
|
152
253
|
createModel({
|
|
153
|
-
status: {
|
|
154
|
-
value: "loaded",
|
|
155
|
-
args: ["--model", "gguf"],
|
|
156
|
-
preset: "default",
|
|
157
|
-
},
|
|
254
|
+
status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
|
|
158
255
|
}),
|
|
159
256
|
);
|
|
257
|
+
const capabilities = await model.getCapabilities();
|
|
160
258
|
|
|
161
|
-
expect(
|
|
259
|
+
expect(capabilities).toEqual(["image"]);
|
|
162
260
|
});
|
|
163
261
|
|
|
164
|
-
it("should
|
|
165
|
-
|
|
262
|
+
it("should return text when not loaded and no --mmproj in args", async () => {
|
|
263
|
+
// getStatus() calls /models first, returns unloaded
|
|
264
|
+
mockRpc.mockResolvedValueOnce({
|
|
265
|
+
data: [
|
|
266
|
+
{
|
|
267
|
+
id: "test-model",
|
|
268
|
+
status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
|
|
269
|
+
},
|
|
270
|
+
],
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
const model = new RouterModel(
|
|
274
|
+
createModel({
|
|
275
|
+
status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
|
|
276
|
+
}),
|
|
277
|
+
);
|
|
278
|
+
const capabilities = await model.getCapabilities();
|
|
166
279
|
|
|
167
|
-
expect(
|
|
280
|
+
expect(capabilities).toEqual(["text"]);
|
|
168
281
|
});
|
|
169
282
|
});
|
|
170
283
|
|
|
@@ -57,14 +57,32 @@ describe("SingleModel mode", () => {
|
|
|
57
57
|
});
|
|
58
58
|
|
|
59
59
|
describe("SingleModel capabilities", () => {
|
|
60
|
-
it("should detect image capability when
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
it("should detect image capability when modalities.vision is true", async () => {
|
|
61
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
|
|
62
|
+
|
|
63
|
+
const model = createModel();
|
|
64
|
+
const capabilities = await model.getCapabilities();
|
|
65
|
+
|
|
66
|
+
expect(capabilities).toEqual(["image"]);
|
|
67
|
+
expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("should detect text-only capability when modalities.vision is false", async () => {
|
|
71
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
|
|
72
|
+
|
|
73
|
+
const model = createModel();
|
|
74
|
+
const capabilities = await model.getCapabilities();
|
|
75
|
+
|
|
76
|
+
expect(capabilities).toEqual(["text"]);
|
|
63
77
|
});
|
|
64
78
|
|
|
65
|
-
it("should
|
|
66
|
-
|
|
67
|
-
|
|
79
|
+
it("should return text when /props call fails", async () => {
|
|
80
|
+
mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
|
|
81
|
+
|
|
82
|
+
const model = createModel();
|
|
83
|
+
const capabilities = await model.getCapabilities();
|
|
84
|
+
|
|
85
|
+
expect(capabilities).toEqual(["text"]);
|
|
68
86
|
});
|
|
69
87
|
});
|
|
70
88
|
|
|
@@ -90,29 +108,28 @@ describe("SingleModel getStatus", () => {
|
|
|
90
108
|
});
|
|
91
109
|
|
|
92
110
|
describe("SingleModel getContextSize", () => {
|
|
93
|
-
it("should return n_ctx from /
|
|
94
|
-
mockRpc.mockResolvedValueOnce(
|
|
111
|
+
it("should return n_ctx from /models endpoint meta", async () => {
|
|
112
|
+
mockRpc.mockResolvedValueOnce({
|
|
113
|
+
data: [{ id: "test", meta: { n_ctx: 8192 } }],
|
|
114
|
+
});
|
|
95
115
|
|
|
96
116
|
const model = createModel();
|
|
97
117
|
const ctxSize = await model.getContextSize();
|
|
98
118
|
|
|
99
119
|
expect(ctxSize).toBe(8192);
|
|
100
|
-
expect(mockRpc).toHaveBeenCalledWith("/
|
|
120
|
+
expect(mockRpc).toHaveBeenCalledWith("/models");
|
|
101
121
|
});
|
|
102
122
|
|
|
103
|
-
it("should
|
|
104
|
-
mockRpc.mockResolvedValueOnce(
|
|
123
|
+
it("should return DEFAULT_CTX when model not found in /models", async () => {
|
|
124
|
+
mockRpc.mockResolvedValueOnce({ data: [] });
|
|
105
125
|
|
|
106
126
|
const model = createModel();
|
|
107
|
-
const
|
|
108
|
-
const second = await model.getContextSize();
|
|
127
|
+
const ctxSize = await model.getContextSize();
|
|
109
128
|
|
|
110
|
-
expect(
|
|
111
|
-
expect(second).toBe(4096);
|
|
112
|
-
expect(mockRpc).toHaveBeenCalledTimes(1);
|
|
129
|
+
expect(ctxSize).toBe(DEFAULT_CTX);
|
|
113
130
|
});
|
|
114
131
|
|
|
115
|
-
it("should return DEFAULT_CTX when /
|
|
132
|
+
it("should return DEFAULT_CTX when /models fails", async () => {
|
|
116
133
|
mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
|
|
117
134
|
|
|
118
135
|
const model = createModel();
|
package/tsconfig.json
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"compilerOptions": {
|
|
3
|
-
"target": "
|
|
4
|
-
"module": "
|
|
5
|
-
"moduleResolution": "
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"moduleResolution": "bundler",
|
|
6
6
|
"strict": true,
|
|
7
7
|
"esModuleInterop": true,
|
|
8
8
|
"skipLibCheck": true,
|
|
9
|
-
"
|
|
10
|
-
"types": ["node"]
|
|
9
|
+
"noEmit": true
|
|
11
10
|
},
|
|
12
|
-
"include": ["src/**/*.ts"]
|
|
11
|
+
"include": ["src/**/*.ts", "tests/**/*.ts"]
|
|
13
12
|
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* The structure of llama-server's /slots endpoint
|
|
3
|
-
*
|
|
4
|
-
* In single mode, applies to /slots
|
|
5
|
-
* In router mode, applies to /slots?model=<id>
|
|
6
|
-
*/
|
|
7
|
-
export interface SlotsEndpoint {
|
|
8
|
-
id: number;
|
|
9
|
-
n_ctx: number;
|
|
10
|
-
speculative: boolean;
|
|
11
|
-
is_processing: boolean;
|
|
12
|
-
id_task?: number;
|
|
13
|
-
params?: Array<Record<string, any>>;
|
|
14
|
-
next_token?: Array<Record<string, any>>;
|
|
15
|
-
}
|