pi-llama-cpp 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,8 +99,8 @@ llama-server --model path/to/model.gguf ...
99
99
  ```
100
100
 
101
101
  The extension determines the context size as follows:
102
- - **Router mode** — reads from the preset file's `ctx-size` and/or `fit-ctx` keys
103
- - **Single mode** — reads from the `/slots` endpoint (stores it in cache afterwards)
102
+ - **Router mode** — when loaded, reads `meta.n_ctx` from the `/models` endpoint; when not loaded, reads `--ctx-size` and/or `--fit-ctx` from the model's status `args` array
103
+ - **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
104
104
  - Falls back to `128000` if not available
105
105
 
106
106
  ### Commands
@@ -145,6 +145,6 @@ Each model exposed to Pi includes the following defaults:
145
145
 
146
146
  ## Dependencies
147
147
 
148
- | Dependency | Purpose |
149
- | ------------------------------- | ------------------------------------- |
150
- | `@mariozechner/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
148
+ | Dependency | Purpose |
149
+ | --------------------------------- | ------------------------------------- |
150
+ | `@earendil-works/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.2.3",
3
+ "version": "0.3.1",
4
4
  "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
5
5
  "keywords": [
6
6
  "pi",
@@ -33,7 +33,7 @@
33
33
  ]
34
34
  },
35
35
  "peerDependencies": {
36
- "@mariozechner/pi-coding-agent": "*"
36
+ "@earendil-works/pi-coding-agent": "*"
37
37
  },
38
38
  "devDependencies": {
39
39
  "@types/node": "^25.6.0",
@@ -1,12 +1,13 @@
1
1
  import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
- } from "@mariozechner/pi-coding-agent";
5
- import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
6
- import { Action } from "./enums/action";
7
- import { Mode } from "./enums/mode";
8
- import { Status } from "./enums/status";
9
- import { BaseModel } from "./models/baseModel";
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
6
+ import { Action } from "../enums/action";
7
+ import { Mode } from "../enums/mode";
8
+ import { Status } from "../enums/status";
9
+ import { BaseModel } from "../models/baseModel";
10
+ import { resolveUrl } from "../tools/resolver";
10
11
 
11
12
  /**
12
13
  * Select a model from the list. Returns null if user cancels.
@@ -30,7 +31,7 @@ const selectModel = async (
30
31
  * Get available actions for a model based on its mode and status.
31
32
  *
32
33
  * @param model The selected model
33
- * @returns
34
+ * @returns The array of available actions for the given model status
34
35
  */
35
36
  const getActionsForModel = async (model: BaseModel): Promise<Array<Action>> => {
36
37
  const routerModeActions: Record<Status, Array<Action>> = {
@@ -109,13 +110,25 @@ const modelSelectionHandler = async (
109
110
  }
110
111
  };
111
112
 
113
+ /**
114
+ * Handles the /models command when the server is unreachable.
115
+ *
116
+ * @param ctx The context used by Pi
117
+ */
118
+ export const notFoundCommand = async (
119
+ ctx: ExtensionCommandContext,
120
+ ): Promise<void> => {
121
+ const url = await resolveUrl(ctx.cwd);
122
+ ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
123
+ };
124
+
112
125
  /**
113
126
  * Handles the /models command
114
127
  *
115
128
  * @param ctx The context used by Pi
116
129
  * @param pi The Pi extension
117
130
  */
118
- export const modelsCommandHandler = async (
131
+ export const modelsCommand = async (
119
132
  ctx: ExtensionCommandContext,
120
133
  pi: ExtensionAPI,
121
134
  models: BaseModel[],
package/src/events.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ExtensionContext } from "@mariozechner/pi-coding-agent";
1
+ import { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
2
  import { PROVIDER_ID } from "./constants";
3
3
  import { ModelSelectEvent } from "./interfaces/events";
4
4
  import { listModels } from "./tools/retriever";
package/src/index.ts CHANGED
@@ -1,47 +1,34 @@
1
1
  import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
- } from "@mariozechner/pi-coding-agent";
5
- import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { modelsCommand, notFoundCommand } from "./commands/models";
6
+ import { PROVIDER_NAME } from "./constants";
6
7
  import { onModelSelect } from "./events";
7
- import { modelsCommandHandler } from "./handlers";
8
- import { resolveApiKey, resolveUrl } from "./tools/resolver";
9
- import { isServerReady, listModels } from "./tools/retriever";
8
+ import { registerLlamaCppProvider } from "./tools/provider";
9
+ import { isServerReady } from "./tools/retriever";
10
10
 
11
11
  export default async function (pi: ExtensionAPI) {
12
- // Command registration
12
+ // Server verification
13
13
  if (!(await isServerReady())) {
14
14
  pi.registerCommand("models", {
15
15
  description: `${PROVIDER_NAME} models (offline)`,
16
- handler: async (
17
- _: string,
18
- ctx: ExtensionCommandContext,
19
- ): Promise<void> => {
20
- const url = await resolveUrl(ctx.cwd);
21
- ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
16
+ handler: async (_: string, ctx: ExtensionCommandContext) => {
17
+ await notFoundCommand(ctx);
22
18
  },
23
19
  });
24
20
 
25
21
  return;
26
22
  }
27
23
 
28
- const cwd = process.cwd();
29
- const url = await resolveUrl(cwd);
30
- const serverModels = await listModels();
24
+ // Provider registration
25
+ const serverModels = await registerLlamaCppProvider(pi);
31
26
 
27
+ // Command: /models
32
28
  pi.registerCommand("models", {
33
29
  description: `Browse ${PROVIDER_NAME} models (live status)`,
34
30
  handler: async (_: string, ctx: ExtensionCommandContext) =>
35
- await modelsCommandHandler(ctx, pi, serverModels),
36
- });
37
-
38
- // Provider registration
39
- pi.registerProvider(PROVIDER_ID, {
40
- name: PROVIDER_NAME,
41
- baseUrl: `${url}/v1`,
42
- api: "openai-completions",
43
- apiKey: await resolveApiKey(),
44
- models: await Promise.all(serverModels.map((m) => m.toProviderConfig())),
31
+ await modelsCommand(ctx, pi, serverModels),
45
32
  });
46
33
 
47
34
  // Events registration
@@ -53,6 +53,7 @@ interface StatusProperty {
53
53
  interface MetaProperty {
54
54
  vocab_type: number;
55
55
  n_vocab: number;
56
+ n_ctx: number;
56
57
  n_ctx_train: number;
57
58
  n_embd: number;
58
59
  n_params: number;
@@ -1,10 +1,21 @@
1
- import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
2
- import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
1
+ import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
2
+ import {
3
+ DEFAULT_CTX,
4
+ MAX_TOKENS,
5
+ POLLING_INTERVAL,
6
+ POLLING_TIMEOUT,
7
+ } from "../constants";
3
8
  import { Mode } from "../enums/mode";
4
9
  import { Status } from "../enums/status";
5
- import { DataProperty } from "../interfaces/endpoints/models";
10
+ import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
11
+ import { PropsEndpoint } from "../interfaces/endpoints/props";
6
12
  import { rpc } from "../tools/retriever";
7
13
 
14
+ /**
15
+ * Abstract base class for llama-server models.
16
+ * Provides common functionality for model identification, status checking,
17
+ * loading/unloading, and configuration conversion.
18
+ */
8
19
  export abstract class BaseModel {
9
20
  constructor(protected readonly model: DataProperty) {}
10
21
 
@@ -40,9 +51,21 @@ export abstract class BaseModel {
40
51
  }
41
52
 
42
53
  /**
43
- * Detects if the model can load images
54
+ * Detects the capabilities of the model
55
+ *
56
+ * @returns An array of capabilities, as expected by Pi
44
57
  */
45
- abstract get capabilities(): ["text"] | ["image"];
58
+ async getCapabilities(): Promise<["text"] | ["image"]> {
59
+ try {
60
+ const { modalities } = await rpc<PropsEndpoint>(
61
+ `/props?model=${this.id}`,
62
+ );
63
+
64
+ return modalities.vision ? ["image"] : ["text"];
65
+ } catch {
66
+ return ["text"];
67
+ }
68
+ }
46
69
 
47
70
  /**
48
71
  * Gets the load status of the model
@@ -52,7 +75,17 @@ export abstract class BaseModel {
52
75
  /**
53
76
  * Gets the context size of a particular model
54
77
  */
55
- abstract getContextSize(): Promise<number>;
78
+ async getContextSize(): Promise<number> {
79
+ try {
80
+ const { data } = await rpc<ModelsEndpoint>(`/models`);
81
+ const model = data.find((d) => d.id === this.id);
82
+
83
+ const response = model?.meta?.n_ctx;
84
+ return response ?? DEFAULT_CTX;
85
+ } catch {
86
+ return DEFAULT_CTX;
87
+ }
88
+ }
56
89
 
57
90
  /**
58
91
  * Sets up a label for the model selection screen
@@ -72,7 +105,7 @@ export abstract class BaseModel {
72
105
  `ID : ${this.id}`,
73
106
  `Model : ${this.name}`,
74
107
  `Reasoning : ${this.reasoning}`,
75
- `Capabilities : ${this.capabilities.join(", ")}`,
108
+ `Capabilities : ${(await this.getCapabilities()).join(", ")}`,
76
109
  `Context size : ${await this.getContextSize()}`,
77
110
  `Status : ${await this.getStatus()}`,
78
111
  ];
@@ -90,7 +123,7 @@ export abstract class BaseModel {
90
123
  id: this.id,
91
124
  name: this.name,
92
125
  reasoning: this.reasoning,
93
- input: this.capabilities,
126
+ input: await this.getCapabilities(),
94
127
  contextWindow: await this.getContextSize(),
95
128
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
96
129
  maxTokens: MAX_TOKENS,
@@ -122,16 +155,13 @@ export abstract class BaseModel {
122
155
  * @param startTime The initial polling timestamp
123
156
  */
124
157
  async pollStatus(startTime = Date.now()): Promise<void> {
125
- const status = await this.getStatus();
126
- if (status !== Status.LOADING) return;
127
-
128
- // Force a timeout if we wasted too much time polling
129
- if (Date.now() - startTime > POLLING_TIMEOUT) {
130
- const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
131
- throw new Error(message);
158
+ while ((await this.getStatus()) === Status.LOADING) {
159
+ // Force a timeout if we wasted too much time polling
160
+ if (Date.now() - startTime > POLLING_TIMEOUT) {
161
+ const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
162
+ throw new Error(message);
163
+ }
164
+ await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
132
165
  }
133
-
134
- await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
135
- await this.pollStatus(startTime);
136
166
  }
137
167
  }
@@ -1,24 +1,20 @@
1
1
  import { DEFAULT_CTX } from "../constants";
2
2
  import { Mode } from "../enums/mode";
3
3
  import { Status } from "../enums/status";
4
- import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
4
+ import { ModelsEndpoint } from "../interfaces/endpoints/models";
5
5
  import { rpc } from "../tools/retriever";
6
6
  import { BaseModel } from "./baseModel";
7
7
 
8
+ /**
9
+ * Represents a model in llama-server router mode.
10
+ * Tracks per-model status from the /models endpoint and extracts
11
+ * context size from startup arguments when the model is not loaded.
12
+ */
8
13
  export class RouterModel extends BaseModel {
9
- constructor(protected readonly model: DataProperty) {
10
- super(model);
11
- }
12
-
13
14
  get mode(): Mode {
14
15
  return Mode.ROUTER;
15
16
  }
16
17
 
17
- get capabilities(): ["text"] | ["image"] {
18
- const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
19
- return hasImage ? ["image"] : ["text"];
20
- }
21
-
22
18
  async getStatus(): Promise<Status> {
23
19
  const { data } = await rpc<ModelsEndpoint>("/models");
24
20
  const model = data.find((m) => m.id === this.id);
@@ -34,14 +30,28 @@ export class RouterModel extends BaseModel {
34
30
  return status;
35
31
  }
36
32
 
33
+ async getCapabilities(): Promise<["text"] | ["image"]> {
34
+ // We can get the real capabilities if the model is already loaded
35
+ if ((await this.getStatus()) === Status.LOADED) {
36
+ return super.getCapabilities();
37
+ }
38
+
39
+ const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
40
+ return hasImage ? ["image"] : ["text"];
41
+ }
42
+
37
43
  async getContextSize(): Promise<number> {
38
- let response = this.extractFrom("--ctx-size");
39
- if (response) return response;
44
+ // We can get a more accurate context size if the model is already loaded
45
+ if ((await this.getStatus()) === Status.LOADED) {
46
+ return super.getContextSize();
47
+ }
40
48
 
41
- response = this.extractFrom("--fit-ctx");
42
- if (response) return response;
49
+ const response =
50
+ this.extractFrom("--ctx-size") ??
51
+ this.extractFrom("--fit-ctx") ??
52
+ DEFAULT_CTX;
43
53
 
44
- return DEFAULT_CTX;
54
+ return response;
45
55
  }
46
56
 
47
57
  /**
@@ -1,31 +1,14 @@
1
- import { DEFAULT_CTX } from "../constants";
2
1
  import { Mode } from "../enums/mode";
3
2
  import { Status } from "../enums/status";
4
- import { DataProperty, ModelProperty } from "../interfaces/endpoints/models";
5
3
  import { PropsEndpoint } from "../interfaces/endpoints/props";
6
- import { SlotsEndpoint } from "../interfaces/endpoints/slots";
7
4
  import { rpc } from "../tools/retriever";
8
5
  import { BaseModel } from "./baseModel";
9
6
 
10
7
  export class SingleModel extends BaseModel {
11
- private contextSize?: number;
12
-
13
- constructor(
14
- protected readonly model: DataProperty,
15
- private readonly extra: ModelProperty,
16
- ) {
17
- super(model);
18
- }
19
-
20
8
  get mode(): Mode {
21
9
  return Mode.SINGLE;
22
10
  }
23
11
 
24
- get capabilities(): ["text"] | ["image"] {
25
- const hasImage = this.extra.capabilities.includes("multimodal");
26
- return hasImage ? ["image"] : ["text"];
27
- }
28
-
29
12
  async getStatus(): Promise<Status> {
30
13
  // In single-mode, the extension will only work when the model is fully loaded
31
14
  const { is_sleeping } = await rpc<PropsEndpoint>("/props");
@@ -33,18 +16,4 @@ export class SingleModel extends BaseModel {
33
16
 
34
17
  return Status.LOADED;
35
18
  }
36
-
37
- async getContextSize(): Promise<number> {
38
- // Avoid calling the endpoint if we already have the value
39
- if (this.contextSize) return this.contextSize;
40
-
41
- try {
42
- const [{ n_ctx }] = await rpc<SlotsEndpoint[]>("/slots");
43
- this.contextSize = n_ctx;
44
-
45
- return this.contextSize;
46
- } catch {
47
- return DEFAULT_CTX;
48
- }
49
- }
50
19
  }
@@ -0,0 +1,28 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
3
+ import type { BaseModel } from "../models/baseModel";
4
+ import { resolveApiKey, resolveUrl } from "./resolver";
5
+ import { listModels } from "./retriever";
6
+
7
+ /**
8
+ * Registers the Llama.cpp provider and returns the fetched models.
9
+ *
10
+ * @param pi The Pi extension API
11
+ * @returns The list of models fetched from the server
12
+ */
13
+ export const registerLlamaCppProvider = async (
14
+ pi: ExtensionAPI,
15
+ ): Promise<BaseModel[]> => {
16
+ const baseUrl = `${await resolveUrl(process.cwd())}/v1`;
17
+ const models = await listModels();
18
+
19
+ pi.registerProvider(PROVIDER_ID, {
20
+ name: PROVIDER_NAME,
21
+ baseUrl,
22
+ api: "openai-completions",
23
+ apiKey: await resolveApiKey(),
24
+ models: await Promise.all(models.map((m) => m.toProviderConfig())),
25
+ });
26
+
27
+ return models;
28
+ };
@@ -25,9 +25,9 @@ const fileExists = async (filePath: string): Promise<boolean> => {
25
25
  };
26
26
 
27
27
  /**
28
- * Reads the contents of a file as JSON
29
- * @param filePath The path
30
- * @returns The content as JSON
28
+ * Reads and parses the contents of a file as JSON
29
+ * @param filePath The path to the file
30
+ * @returns The parsed content, or null if parsing fails
31
31
  */
32
32
  const readContents = async <T>(filePath: string): Promise<T | null> => {
33
33
  const raw = await readFile(filePath, "utf-8");
@@ -41,10 +41,10 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
41
41
  };
42
42
 
43
43
  /**
44
- * Reads a string value from a JSON config file
44
+ * Reads a value from a JSON config file by key
45
45
  * @param filePath Path to the JSON config file
46
46
  * @param key Key to extract from the parsed JSON
47
- * @returns The string value, or null if file/key missing or invalid
47
+ * @returns The value at the given key, or null if file/key missing or invalid
48
48
  */
49
49
  const readConfigValue = async <T>(
50
50
  filePath: string,
@@ -19,10 +19,11 @@ export const isServerReady = async (): Promise<boolean> => {
19
19
  };
20
20
 
21
21
  /**
22
- * Extracts the data of a fetch command
23
- * @param endpoint The endpoint to fetch from
24
- * @param body The body (optional)
25
- * @returns Data from the fetch command
22
+ * Makes an HTTP request to the llama-server and returns the parsed JSON response
23
+ *
24
+ * @param endpoint The endpoint path to fetch (e.g. "/health")
25
+ * @param body The optional request body for POST requests
26
+ * @returns The parsed JSON response from the server
26
27
  */
27
28
  export const rpc = async <T>(
28
29
  endpoint: string,
@@ -46,11 +47,8 @@ export const rpc = async <T>(
46
47
  },
47
48
  });
48
49
 
49
- if (!res.ok) {
50
- const text = await res.text();
51
- throw new Error(`${res.status}: ${text}`);
52
- }
53
- return res.json() as T;
50
+ const response: T = await res.json();
51
+ return response;
54
52
  };
55
53
 
56
54
  /**
@@ -62,8 +60,7 @@ export const listModels = async (): Promise<BaseModel[]> => {
62
60
  const { models, data } = await rpc<ModelsEndpoint>("/models");
63
61
 
64
62
  if (models) {
65
- const [extra] = models;
66
- return data.map((m) => new SingleModel(m, extra));
63
+ return data.map((m) => new SingleModel(m));
67
64
  }
68
65
 
69
66
  const response = data
@@ -1,8 +1,17 @@
1
- import { describe, expect, it } from "vitest";
1
+ import { describe, expect, it, vi } from "vitest";
2
2
  import { Mode } from "../src/enums/mode";
3
3
  import { DataProperty } from "../src/interfaces/endpoints/models";
4
4
  import { RouterModel } from "../src/models/routerModel";
5
5
 
6
+ // Mock the retriever module before importing anything that depends on it
7
+ const mockRpc = vi.fn();
8
+
9
+ vi.mock("../src/tools/retriever", () => ({
10
+ rpc: (...args: unknown[]) => mockRpc(...args),
11
+ isServerReady: vi.fn(),
12
+ listModels: vi.fn(),
13
+ }));
14
+
6
15
  // Helper to create a mock DataProperty
7
16
  const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
8
17
  id: "test-model",
@@ -11,6 +20,7 @@ const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
11
20
  object: "model",
12
21
  owned_by: "test",
13
22
  created: Date.now(),
23
+ status: { value: "loaded", args: [], preset: "default", failed: false },
14
24
  ...overrides,
15
25
  });
16
26
 
@@ -99,7 +109,26 @@ describe("RouterModel context size extraction", () => {
99
109
  expect(extractFrom("--ctx-size")).toBeNull();
100
110
  });
101
111
 
102
- it("should prefer --ctx-size over --fit-ctx", async () => {
112
+ it("should prefer --ctx-size over --fit-ctx when loaded", async () => {
113
+ // First call: getStatus() -> /models
114
+ mockRpc.mockResolvedValueOnce({
115
+ data: [
116
+ {
117
+ id: "test-model",
118
+ status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
119
+ },
120
+ ],
121
+ });
122
+ // Second call: super.getContextSize() -> /models with meta.n_ctx
123
+ mockRpc.mockResolvedValueOnce({
124
+ data: [
125
+ {
126
+ id: "test-model",
127
+ meta: { n_ctx: 4096 },
128
+ },
129
+ ],
130
+ });
131
+
103
132
  const model = new RouterModel(
104
133
  createModel({
105
134
  status: {
@@ -114,7 +143,25 @@ describe("RouterModel context size extraction", () => {
114
143
  expect(ctxSize).toBe(4096);
115
144
  });
116
145
 
117
- it("should return DEFAULT_CTX when no context size args are present", async () => {
146
+ it("should return DEFAULT_CTX when no context size args are present and loaded", async () => {
147
+ // First call: getStatus() -> /models
148
+ mockRpc.mockResolvedValueOnce({
149
+ data: [
150
+ {
151
+ id: "test-model",
152
+ status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
153
+ },
154
+ ],
155
+ });
156
+ // Second call: super.getContextSize() -> /models without meta.n_ctx
157
+ mockRpc.mockResolvedValueOnce({
158
+ data: [
159
+ {
160
+ id: "test-model",
161
+ },
162
+ ],
163
+ });
164
+
118
165
  const { DEFAULT_CTX } = await import("../src/constants");
119
166
 
120
167
  const model = new RouterModel(
@@ -133,38 +180,104 @@ describe("RouterModel context size extraction", () => {
133
180
  });
134
181
 
135
182
  describe("RouterModel capabilities detection", () => {
136
- it("should detect image capability when --mmproj is present", () => {
137
- const model = new RouterModel(
138
- createModel({
139
- status: {
140
- value: "loaded",
141
- args: ["--model", "gguf", "--mmproj", "mmproj.gguf"],
142
- preset: "default",
183
+ it("should detect image capability when modalities.vision is true", async () => {
184
+ // getStatus() calls /models first
185
+ mockRpc.mockResolvedValueOnce({
186
+ data: [
187
+ {
188
+ id: "test-model",
189
+ status: { value: "loaded", args: [], preset: "default", failed: false },
143
190
  },
144
- }),
145
- );
191
+ ],
192
+ });
193
+ // super.getCapabilities() calls /props?model=<id>
194
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
195
+
196
+ const model = new RouterModel(createModel());
197
+ const capabilities = await model.getCapabilities();
198
+
199
+ expect(capabilities).toEqual(["image"]);
200
+ expect(mockRpc).toHaveBeenCalledWith("/props?model=test-model");
201
+ });
202
+
203
+ it("should detect text-only capability when modalities.vision is false", async () => {
204
+ // getStatus() calls /models first
205
+ mockRpc.mockResolvedValueOnce({
206
+ data: [
207
+ {
208
+ id: "test-model",
209
+ status: { value: "loaded", args: [], preset: "default", failed: false },
210
+ },
211
+ ],
212
+ });
213
+ // super.getCapabilities() calls /props?model=<id>
214
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
146
215
 
147
- expect(model.capabilities).toEqual(["image"]);
216
+ const model = new RouterModel(createModel());
217
+ const capabilities = await model.getCapabilities();
218
+
219
+ expect(capabilities).toEqual(["text"]);
148
220
  });
149
221
 
150
- it("should detect text-only capability when --mmproj is absent", () => {
222
+ it("should default to text when /props call fails", async () => {
223
+ // getStatus() calls /models first
224
+ mockRpc.mockResolvedValueOnce({
225
+ data: [
226
+ {
227
+ id: "test-model",
228
+ status: { value: "loaded", args: [], preset: "default", failed: false },
229
+ },
230
+ ],
231
+ });
232
+ // super.getCapabilities() calls /props?model=<id> which fails
233
+ mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
234
+
235
+ const model = new RouterModel(createModel());
236
+ const capabilities = await model.getCapabilities();
237
+
238
+ expect(capabilities).toEqual(["text"]);
239
+ });
240
+
241
+ it("should use status.args to detect image capability when not loaded", async () => {
242
+ // getStatus() calls /models first, returns unloaded
243
+ mockRpc.mockResolvedValueOnce({
244
+ data: [
245
+ {
246
+ id: "test-model",
247
+ status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
248
+ },
249
+ ],
250
+ });
251
+
151
252
  const model = new RouterModel(
152
253
  createModel({
153
- status: {
154
- value: "loaded",
155
- args: ["--model", "gguf"],
156
- preset: "default",
157
- },
254
+ status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
158
255
  }),
159
256
  );
257
+ const capabilities = await model.getCapabilities();
160
258
 
161
- expect(model.capabilities).toEqual(["text"]);
259
+ expect(capabilities).toEqual(["image"]);
162
260
  });
163
261
 
164
- it("should default to text when status is undefined", () => {
165
- const model = new RouterModel(createModel({ status: undefined }));
262
+ it("should return text when not loaded and no --mmproj in args", async () => {
263
+ // getStatus() calls /models first, returns unloaded
264
+ mockRpc.mockResolvedValueOnce({
265
+ data: [
266
+ {
267
+ id: "test-model",
268
+ status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
269
+ },
270
+ ],
271
+ });
272
+
273
+ const model = new RouterModel(
274
+ createModel({
275
+ status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
276
+ }),
277
+ );
278
+ const capabilities = await model.getCapabilities();
166
279
 
167
- expect(model.capabilities).toEqual(["text"]);
280
+ expect(capabilities).toEqual(["text"]);
168
281
  });
169
282
  });
170
283
 
@@ -57,14 +57,32 @@ describe("SingleModel mode", () => {
57
57
  });
58
58
 
59
59
  describe("SingleModel capabilities", () => {
60
- it("should detect image capability when multimodal", () => {
61
- const model = createModel({ capabilities: ["multimodal"] });
62
- expect(model.capabilities).toEqual(["image"]);
60
+ it("should detect image capability when modalities.vision is true", async () => {
61
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
62
+
63
+ const model = createModel();
64
+ const capabilities = await model.getCapabilities();
65
+
66
+ expect(capabilities).toEqual(["image"]);
67
+ expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
68
+ });
69
+
70
+ it("should detect text-only capability when modalities.vision is false", async () => {
71
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
72
+
73
+ const model = createModel();
74
+ const capabilities = await model.getCapabilities();
75
+
76
+ expect(capabilities).toEqual(["text"]);
63
77
  });
64
78
 
65
- it("should detect text-only capability when not multimodal", () => {
66
- const model = createModel({ capabilities: [] });
67
- expect(model.capabilities).toEqual(["text"]);
79
+ it("should return text when /props call fails", async () => {
80
+ mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
81
+
82
+ const model = createModel();
83
+ const capabilities = await model.getCapabilities();
84
+
85
+ expect(capabilities).toEqual(["text"]);
68
86
  });
69
87
  });
70
88
 
@@ -90,29 +108,28 @@ describe("SingleModel getStatus", () => {
90
108
  });
91
109
 
92
110
  describe("SingleModel getContextSize", () => {
93
- it("should return n_ctx from /slots endpoint", async () => {
94
- mockRpc.mockResolvedValueOnce([{ n_ctx: 8192 }]);
111
+ it("should return n_ctx from /models endpoint meta", async () => {
112
+ mockRpc.mockResolvedValueOnce({
113
+ data: [{ id: "test", meta: { n_ctx: 8192 } }],
114
+ });
95
115
 
96
116
  const model = createModel();
97
117
  const ctxSize = await model.getContextSize();
98
118
 
99
119
  expect(ctxSize).toBe(8192);
100
- expect(mockRpc).toHaveBeenCalledWith("/slots");
120
+ expect(mockRpc).toHaveBeenCalledWith("/models");
101
121
  });
102
122
 
103
- it("should cache the context size on first call", async () => {
104
- mockRpc.mockResolvedValueOnce([{ n_ctx: 4096 }]);
123
+ it("should return DEFAULT_CTX when model not found in /models", async () => {
124
+ mockRpc.mockResolvedValueOnce({ data: [] });
105
125
 
106
126
  const model = createModel();
107
- const first = await model.getContextSize();
108
- const second = await model.getContextSize();
127
+ const ctxSize = await model.getContextSize();
109
128
 
110
- expect(first).toBe(4096);
111
- expect(second).toBe(4096);
112
- expect(mockRpc).toHaveBeenCalledTimes(1);
129
+ expect(ctxSize).toBe(DEFAULT_CTX);
113
130
  });
114
131
 
115
- it("should return DEFAULT_CTX when /slots fails", async () => {
132
+ it("should return DEFAULT_CTX when /models fails", async () => {
116
133
  mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
117
134
 
118
135
  const model = createModel();
package/tsconfig.json CHANGED
@@ -1,13 +1,12 @@
1
1
  {
2
2
  "compilerOptions": {
3
- "target": "ES2020",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
3
+ "target": "ES2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "bundler",
6
6
  "strict": true,
7
7
  "esModuleInterop": true,
8
8
  "skipLibCheck": true,
9
- "forceConsistentCasingInFileNames": true,
10
- "types": ["node"]
9
+ "noEmit": true
11
10
  },
12
- "include": ["src/**/*.ts"]
11
+ "include": ["src/**/*.ts", "tests/**/*.ts"]
13
12
  }
@@ -1,15 +0,0 @@
1
- /**
2
- * The structure of llama-server's /slots endpoint
3
- *
4
- * In single mode, applies to /slots
5
- * In router mode, applies to /slots?model=<id>
6
- */
7
- export interface SlotsEndpoint {
8
- id: number;
9
- n_ctx: number;
10
- speculative: boolean;
11
- is_processing: boolean;
12
- id_task?: number;
13
- params?: Array<Record<string, any>>;
14
- next_token?: Array<Record<string, any>>;
15
- }