pi-llama-cpp 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -99,8 +99,8 @@ llama-server --model path/to/model.gguf ...
99
99
  ```
100
100
 
101
101
  The extension determines the context size as follows:
102
- - **Router mode** — reads from the preset file's `ctx-size` and/or `fit-ctx` keys
103
- - **Single mode** — reads from the `/slots` endpoint (stores it in cache afterwards)
102
+ - **Router mode** — when loaded, reads `meta.n_ctx` from the `/models` endpoint; when not loaded, reads `--ctx-size` and/or `--fit-ctx` from the model's status `args` array
103
+ - **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
104
104
  - Falls back to `128000` if not available
105
105
 
106
106
  ### Commands
@@ -145,6 +145,6 @@ Each model exposed to Pi includes the following defaults:
145
145
 
146
146
  ## Dependencies
147
147
 
148
- | Dependency | Purpose |
149
- | ------------------------------- | ------------------------------------- |
150
- | `@mariozechner/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
148
+ | Dependency | Purpose |
149
+ | --------------------------------- | ------------------------------------- |
150
+ | `@earendil-works/pi-coding-agent` | Pi Coding Agent SDK (peer dependency) |
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.2.2",
4
- "description": "Pi extension for llama.cpp integration. Supports both router and single modes",
3
+ "version": "0.3.0",
4
+ "description": "Pi extension for llama.cpp integration. Supports both router and single modes.",
5
5
  "keywords": [
6
6
  "pi",
7
7
  "pi-package",
@@ -23,16 +23,21 @@
23
23
  "./src/index.ts"
24
24
  ]
25
25
  },
26
+ "scripts": {
27
+ "test": "vitest",
28
+ "test:run": "vitest run"
29
+ },
26
30
  "prettier": {
27
31
  "plugins": [
28
32
  "prettier-plugin-organize-imports"
29
33
  ]
30
34
  },
31
35
  "peerDependencies": {
32
- "@mariozechner/pi-coding-agent": "*"
36
+ "@earendil-works/pi-coding-agent": "*"
33
37
  },
34
38
  "devDependencies": {
35
39
  "@types/node": "^25.6.0",
36
- "prettier-plugin-organize-imports": "^4.3.0"
40
+ "prettier-plugin-organize-imports": "^4.3.0",
41
+ "vitest": "^4.1.5"
37
42
  }
38
43
  }
@@ -1,12 +1,13 @@
1
1
  import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
- } from "@mariozechner/pi-coding-agent";
5
- import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
6
- import { Action } from "./enums/action";
7
- import { Mode } from "./enums/mode";
8
- import { Status } from "./enums/status";
9
- import { BaseModel } from "./models/baseModel";
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
6
+ import { Action } from "../enums/action";
7
+ import { Mode } from "../enums/mode";
8
+ import { Status } from "../enums/status";
9
+ import { BaseModel } from "../models/baseModel";
10
+ import { resolveUrl } from "../tools/resolver";
10
11
 
11
12
  /**
12
13
  * Select a model from the list. Returns null if user cancels.
@@ -109,13 +110,25 @@ const modelSelectionHandler = async (
109
110
  }
110
111
  };
111
112
 
113
+ /**
114
+ * Handles the /models command when the server is unreachable.
115
+ *
116
+ * @param ctx The context used by Pi
117
+ */
118
+ export const notFoundCommand = async (
119
+ ctx: ExtensionCommandContext,
120
+ ): Promise<void> => {
121
+ const url = await resolveUrl(ctx.cwd);
122
+ ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
123
+ };
124
+
112
125
  /**
113
126
  * Handles the /models command
114
127
  *
115
128
  * @param ctx The context used by Pi
116
129
  * @param pi The Pi extension
117
130
  */
118
- export const modelsCommandHandler = async (
131
+ export const modelsCommand = async (
119
132
  ctx: ExtensionCommandContext,
120
133
  pi: ExtensionAPI,
121
134
  models: BaseModel[],
package/src/constants.ts CHANGED
@@ -13,6 +13,11 @@ export const PROVIDER_NAME = "Llama.cpp";
13
13
  */
14
14
  export const DEFAULT_LLAMA_SERVER_URL = "http://127.0.0.1:8080";
15
15
 
16
+ /**
17
+ * The placeholder api-key if it couldn't be resolved
18
+ */
19
+ export const API_KEY_PLACEHOLDER = "sk-placeholder";
20
+
16
21
  /**
17
22
  * The default context if the server didn't expose it
18
23
  */
package/src/events.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { ExtensionContext } from "@mariozechner/pi-coding-agent";
1
+ import { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
2
  import { PROVIDER_ID } from "./constants";
3
3
  import { ModelSelectEvent } from "./interfaces/events";
4
4
  import { listModels } from "./tools/retriever";
package/src/index.ts CHANGED
@@ -1,47 +1,34 @@
1
1
  import type {
2
2
  ExtensionAPI,
3
3
  ExtensionCommandContext,
4
- } from "@mariozechner/pi-coding-agent";
5
- import { PROVIDER_ID, PROVIDER_NAME } from "./constants";
4
+ } from "@earendil-works/pi-coding-agent";
5
+ import { modelsCommand, notFoundCommand } from "./commands/models";
6
+ import { PROVIDER_NAME } from "./constants";
6
7
  import { onModelSelect } from "./events";
7
- import { modelsCommandHandler } from "./handlers";
8
- import { resolveApiKey, resolveUrl } from "./tools/resolver";
9
- import { isServerReady, listModels } from "./tools/retriever";
8
+ import { registerLlamaCppProvider } from "./tools/provider";
9
+ import { isServerReady } from "./tools/retriever";
10
10
 
11
11
  export default async function (pi: ExtensionAPI) {
12
- // Command registration
12
+ // Server verification
13
13
  if (!(await isServerReady())) {
14
14
  pi.registerCommand("models", {
15
15
  description: `${PROVIDER_NAME} models (offline)`,
16
- handler: async (
17
- _: string,
18
- ctx: ExtensionCommandContext,
19
- ): Promise<void> => {
20
- const url = await resolveUrl(ctx.cwd);
21
- ctx.ui.notify(`${PROVIDER_NAME} unreachable at ${url}`, "error");
16
+ handler: async (_: string, ctx: ExtensionCommandContext) => {
17
+ await notFoundCommand(ctx);
22
18
  },
23
19
  });
24
20
 
25
21
  return;
26
22
  }
27
23
 
28
- const cwd = process.cwd();
29
- const url = await resolveUrl(cwd);
30
- const serverModels = await listModels();
24
+ // Provider registration
25
+ const serverModels = await registerLlamaCppProvider(pi);
31
26
 
27
+ // Command: /models
32
28
  pi.registerCommand("models", {
33
29
  description: `Browse ${PROVIDER_NAME} models (live status)`,
34
30
  handler: async (_: string, ctx: ExtensionCommandContext) =>
35
- await modelsCommandHandler(ctx, pi, serverModels),
36
- });
37
-
38
- // Provider registration
39
- pi.registerProvider(PROVIDER_ID, {
40
- name: PROVIDER_NAME,
41
- baseUrl: `${url}/v1`,
42
- api: "openai-completions",
43
- apiKey: await resolveApiKey(),
44
- models: await Promise.all(serverModels.map((m) => m.toProviderConfig())),
31
+ await modelsCommand(ctx, pi, serverModels),
45
32
  });
46
33
 
47
34
  // Events registration
@@ -1,6 +1,6 @@
1
1
  import { PROVIDER_ID } from "../constants";
2
2
 
3
- export interface Auth {
3
+ interface Auth {
4
4
  type: string;
5
5
  key: string;
6
6
  }
@@ -53,6 +53,7 @@ interface StatusProperty {
53
53
  interface MetaProperty {
54
54
  vocab_type: number;
55
55
  n_vocab: number;
56
+ n_ctx: number;
56
57
  n_ctx_train: number;
57
58
  n_embd: number;
58
59
  n_params: number;
@@ -1,8 +1,14 @@
1
- import type { ProviderModelConfig } from "@mariozechner/pi-coding-agent";
2
- import { MAX_TOKENS, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
1
+ import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
2
+ import {
3
+ DEFAULT_CTX,
4
+ MAX_TOKENS,
5
+ POLLING_INTERVAL,
6
+ POLLING_TIMEOUT,
7
+ } from "../constants";
3
8
  import { Mode } from "../enums/mode";
4
9
  import { Status } from "../enums/status";
5
- import { DataProperty } from "../interfaces/endpoints/models";
10
+ import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
11
+ import { PropsEndpoint } from "../interfaces/endpoints/props";
6
12
  import { rpc } from "../tools/retriever";
7
13
 
8
14
  export abstract class BaseModel {
@@ -40,9 +46,21 @@ export abstract class BaseModel {
40
46
  }
41
47
 
42
48
  /**
43
- * Detects if the model can load images
49
+ * Detects the capabilities of the model
50
+ *
51
+ * @returns An array of capabilities, as expected by Pi
44
52
  */
45
- abstract get capabilities(): ["text"] | ["image"];
53
+ async getCapabilities(): Promise<["text"] | ["image"]> {
54
+ try {
55
+ const { modalities } = await rpc<PropsEndpoint>(
56
+ `/props?model=${this.id}`,
57
+ );
58
+
59
+ return modalities.vision ? ["image"] : ["text"];
60
+ } catch {
61
+ return ["text"];
62
+ }
63
+ }
46
64
 
47
65
  /**
48
66
  * Gets the load status of the model
@@ -52,7 +70,17 @@ export abstract class BaseModel {
52
70
  /**
53
71
  * Gets the context size of a particular model
54
72
  */
55
- abstract getContextSize(): Promise<number>;
73
+ async getContextSize(): Promise<number> {
74
+ try {
75
+ const { data } = await rpc<ModelsEndpoint>(`/models`);
76
+ const model = data.find((d) => d.id === this.id);
77
+
78
+ const response = model?.meta?.n_ctx;
79
+ return response ?? DEFAULT_CTX;
80
+ } catch {
81
+ return DEFAULT_CTX;
82
+ }
83
+ }
56
84
 
57
85
  /**
58
86
  * Sets up a label for the model selection screen
@@ -72,7 +100,7 @@ export abstract class BaseModel {
72
100
  `ID : ${this.id}`,
73
101
  `Model : ${this.name}`,
74
102
  `Reasoning : ${this.reasoning}`,
75
- `Capabilities : ${this.capabilities.join(", ")}`,
103
+ `Capabilities : ${(await this.getCapabilities()).join(", ")}`,
76
104
  `Context size : ${await this.getContextSize()}`,
77
105
  `Status : ${await this.getStatus()}`,
78
106
  ];
@@ -90,7 +118,7 @@ export abstract class BaseModel {
90
118
  id: this.id,
91
119
  name: this.name,
92
120
  reasoning: this.reasoning,
93
- input: this.capabilities,
121
+ input: await this.getCapabilities(),
94
122
  contextWindow: await this.getContextSize(),
95
123
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
96
124
  maxTokens: MAX_TOKENS,
@@ -112,31 +140,23 @@ export abstract class BaseModel {
112
140
  /**
113
141
  * Unloads the model from llama-server
114
142
  */
115
-
116
143
  async unload(): Promise<void> {
117
144
  await rpc("/models/unload", { model: this.id });
118
145
  }
119
146
 
120
147
  /**
121
148
  * Polls llama-server to check when the model is loaded
149
+ *
150
+ * @param startTime The initial polling timestamp
122
151
  */
123
- async pollStatus(): Promise<void> {
124
- const startTime = Date.now();
125
-
126
- // Check loading status
127
- try {
128
- while ((await this.getStatus()) === Status.LOADING) {
129
- // Force a timeout if we wasted too much time polling
130
- if (Date.now() - startTime > POLLING_TIMEOUT) {
131
- const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
132
- throw new Error(message);
133
- }
134
-
135
- await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
152
+ async pollStatus(startTime = Date.now()): Promise<void> {
153
+ while ((await this.getStatus()) === Status.LOADING) {
154
+ // Force a timeout if we wasted too much time polling
155
+ if (Date.now() - startTime > POLLING_TIMEOUT) {
156
+ const message = `Model loading timed out after ${POLLING_TIMEOUT} ms: ${this.id}`;
157
+ throw new Error(message);
136
158
  }
137
- } catch (err) {
138
- const message = err instanceof Error ? err.message : String(err);
139
- throw new Error(message);
159
+ await new Promise((r) => setTimeout(r, POLLING_INTERVAL));
140
160
  }
141
161
  }
142
162
  }
@@ -1,24 +1,15 @@
1
1
  import { DEFAULT_CTX } from "../constants";
2
2
  import { Mode } from "../enums/mode";
3
3
  import { Status } from "../enums/status";
4
- import { DataProperty, ModelsEndpoint } from "../interfaces/endpoints/models";
4
+ import { ModelsEndpoint } from "../interfaces/endpoints/models";
5
5
  import { rpc } from "../tools/retriever";
6
6
  import { BaseModel } from "./baseModel";
7
7
 
8
8
  export class RouterModel extends BaseModel {
9
- constructor(protected readonly model: DataProperty) {
10
- super(model);
11
- }
12
-
13
9
  get mode(): Mode {
14
10
  return Mode.ROUTER;
15
11
  }
16
12
 
17
- get capabilities(): ["text"] | ["image"] {
18
- const hasImage = this.model.status!.args?.includes("--mmproj") ?? false;
19
- return hasImage ? ["image"] : ["text"];
20
- }
21
-
22
13
  async getStatus(): Promise<Status> {
23
14
  const { data } = await rpc<ModelsEndpoint>("/models");
24
15
  const model = data.find((m) => m.id === this.id);
@@ -34,14 +25,28 @@ export class RouterModel extends BaseModel {
34
25
  return status;
35
26
  }
36
27
 
28
+ async getCapabilities(): Promise<["text"] | ["image"]> {
29
+ // We can get the real capabilities if the model is already loaded
30
+ if ((await this.getStatus()) === Status.LOADED) {
31
+ return super.getCapabilities();
32
+ }
33
+
34
+ const hasImage = this.model.status?.args?.includes("--mmproj") ?? false;
35
+ return hasImage ? ["image"] : ["text"];
36
+ }
37
+
37
38
  async getContextSize(): Promise<number> {
38
- let response = this.extractFrom("--ctx-size");
39
- if (response) return response;
39
+ // We can get a more accurate context size if the model is already loaded
40
+ if ((await this.getStatus()) === Status.LOADED) {
41
+ return super.getContextSize();
42
+ }
40
43
 
41
- response = this.extractFrom("--fit-ctx");
42
- if (response) return response;
44
+ const response =
45
+ this.extractFrom("--ctx-size") ??
46
+ this.extractFrom("--fit-ctx") ??
47
+ DEFAULT_CTX;
43
48
 
44
- return DEFAULT_CTX;
49
+ return response;
45
50
  }
46
51
 
47
52
  /**
@@ -1,31 +1,14 @@
1
- import { DEFAULT_CTX } from "../constants";
2
1
  import { Mode } from "../enums/mode";
3
2
  import { Status } from "../enums/status";
4
- import { DataProperty, ModelProperty } from "../interfaces/endpoints/models";
5
3
  import { PropsEndpoint } from "../interfaces/endpoints/props";
6
- import { SlotsEndpoint } from "../interfaces/endpoints/slots";
7
4
  import { rpc } from "../tools/retriever";
8
5
  import { BaseModel } from "./baseModel";
9
6
 
10
7
  export class SingleModel extends BaseModel {
11
- private contextSize?: number;
12
-
13
- constructor(
14
- protected readonly model: DataProperty,
15
- private readonly extra: ModelProperty,
16
- ) {
17
- super(model);
18
- }
19
-
20
8
  get mode(): Mode {
21
9
  return Mode.SINGLE;
22
10
  }
23
11
 
24
- get capabilities(): ["text"] | ["image"] {
25
- const hasImage = this.extra.capabilities.includes("multimodal");
26
- return hasImage ? ["image"] : ["text"];
27
- }
28
-
29
12
  async getStatus(): Promise<Status> {
30
13
  // In single-mode, the extension will only work when the model is fully loaded
31
14
  const { is_sleeping } = await rpc<PropsEndpoint>("/props");
@@ -33,18 +16,4 @@ export class SingleModel extends BaseModel {
33
16
 
34
17
  return Status.LOADED;
35
18
  }
36
-
37
- async getContextSize(): Promise<number> {
38
- // Avoid calling the endpoint if we already have the value
39
- if (this.contextSize) return this.contextSize;
40
-
41
- try {
42
- const [{ n_ctx }] = await rpc<SlotsEndpoint[]>("/slots");
43
- this.contextSize = n_ctx;
44
-
45
- return this.contextSize;
46
- } catch {
47
- return DEFAULT_CTX;
48
- }
49
- }
50
19
  }
@@ -0,0 +1,28 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { PROVIDER_ID, PROVIDER_NAME } from "../constants";
3
+ import type { BaseModel } from "../models/baseModel";
4
+ import { resolveApiKey, resolveUrl } from "./resolver";
5
+ import { listModels } from "./retriever";
6
+
7
+ /**
8
+ * Registers the Llama.cpp provider and returns the fetched models.
9
+ *
10
+ * @param pi The Pi extension API
11
+ * @returns The list of models fetched from the server
12
+ */
13
+ export const registerLlamaCppProvider = async (
14
+ pi: ExtensionAPI,
15
+ ): Promise<BaseModel[]> => {
16
+ const baseUrl = `${await resolveUrl(process.cwd())}/v1`;
17
+ const models = await listModels();
18
+
19
+ pi.registerProvider(PROVIDER_ID, {
20
+ name: PROVIDER_NAME,
21
+ baseUrl,
22
+ api: "openai-completions",
23
+ apiKey: await resolveApiKey(),
24
+ models: await Promise.all(models.map((m) => m.toProviderConfig())),
25
+ });
26
+
27
+ return models;
28
+ };
@@ -1,7 +1,11 @@
1
1
  import { access, constants, readFile } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
- import { DEFAULT_LLAMA_SERVER_URL, PROVIDER_ID } from "../constants";
4
- import { Auth, AuthFile } from "../interfaces/auth";
3
+ import {
4
+ API_KEY_PLACEHOLDER,
5
+ DEFAULT_LLAMA_SERVER_URL,
6
+ PROVIDER_ID,
7
+ } from "../constants";
8
+ import { AuthFile } from "../interfaces/auth";
5
9
 
6
10
  // The URL is detected once, to reuse forever
7
11
  let resolvedUrl: string | undefined;
@@ -42,12 +46,12 @@ const readContents = async <T>(filePath: string): Promise<T | null> => {
42
46
  * @param key Key to extract from the parsed JSON
43
47
  * @returns The string value, or null if file/key missing or invalid
44
48
  */
45
- const readConfigValue = async <T, U>(
49
+ const readConfigValue = async <T>(
46
50
  filePath: string,
47
- key: string,
48
- ): Promise<U> => {
51
+ key: keyof T,
52
+ ): Promise<T[keyof T] | null> => {
49
53
  const cfg = await readContents<T>(filePath);
50
- return (cfg as Record<string, any>)?.[key] || null;
54
+ return cfg?.[key] ?? null;
51
55
  };
52
56
 
53
57
  /**
@@ -55,16 +59,11 @@ const readConfigValue = async <T, U>(
55
59
  * @returns The API key, as defined by the auth.json file
56
60
  */
57
61
  export const resolveApiKey = async (): Promise<string> => {
58
- const placeholder = "sk-placeholder";
59
-
60
62
  const authPath = join(process.env.HOME || ".", ".pi", "agent", "auth.json");
61
- if (!(await fileExists(authPath))) return placeholder;
63
+ if (!(await fileExists(authPath))) return API_KEY_PLACEHOLDER;
62
64
 
63
- const cfg = await readConfigValue<AuthFile, Auth | null>(
64
- authPath,
65
- PROVIDER_ID,
66
- );
67
- return cfg?.key ?? placeholder;
65
+ const cfg = await readConfigValue<AuthFile>(authPath, PROVIDER_ID);
66
+ return cfg?.key ?? API_KEY_PLACEHOLDER;
68
67
  };
69
68
 
70
69
  /**
@@ -81,10 +80,7 @@ const resolveGlobalUrl = async (): Promise<string | null> => {
81
80
 
82
81
  if (!(await fileExists(globalPath))) return null;
83
82
 
84
- return readConfigValue<Record<string, string>, string>(
85
- globalPath,
86
- "llamaServerUrl",
87
- );
83
+ return readConfigValue<Record<string, string>>(globalPath, "llamaServerUrl");
88
84
  };
89
85
 
90
86
  /**
@@ -96,7 +92,7 @@ const resolveProjectUrl = async (cwd: string): Promise<string | null> => {
96
92
  const projectPath = join(cwd, ".pi", "llama-server.json");
97
93
 
98
94
  if (!(await fileExists(projectPath))) return null;
99
- return readConfigValue<Record<string, string>, string>(projectPath, "url");
95
+ return readConfigValue<Record<string, string>>(projectPath, "url");
100
96
  };
101
97
 
102
98
  /**
@@ -0,0 +1,159 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { Action } from "../src/enums/action";
3
+ import { Mode } from "../src/enums/mode";
4
+ import { Status } from "../src/enums/status";
5
+ import { DataProperty } from "../src/interfaces/endpoints/models";
6
+
7
+ // Mock the retriever module before importing anything that depends on it
8
+ vi.mock("../src/tools/retriever", () => ({
9
+ rpc: vi.fn(),
10
+ isServerReady: vi.fn(),
11
+ listModels: vi.fn(),
12
+ }));
13
+
14
+ class TestModel {
15
+ constructor(
16
+ private readonly model: DataProperty,
17
+ private readonly _mode: Mode,
18
+ private readonly _status: Status,
19
+ ) {}
20
+
21
+ get mode(): Mode {
22
+ return this._mode;
23
+ }
24
+
25
+ get capabilities(): ["text"] | ["image"] {
26
+ return ["text"];
27
+ }
28
+
29
+ async getStatus(): Promise<Status> {
30
+ return this._status;
31
+ }
32
+
33
+ async getContextSize(): Promise<number> {
34
+ return 4096;
35
+ }
36
+ }
37
+
38
+ const createModel = (
39
+ mode: Mode,
40
+ status: Status,
41
+ overrides: Partial<DataProperty> = {},
42
+ ) =>
43
+ new TestModel(
44
+ {
45
+ id: "test",
46
+ tags: [],
47
+ object: "model",
48
+ owned_by: "test",
49
+ created: Date.now(),
50
+ ...overrides,
51
+ },
52
+ mode,
53
+ status,
54
+ );
55
+
56
+ /**
57
+ * Replicates the getActionsForModel logic from handlers.ts for testing
58
+ * without needing the full Pi extension context.
59
+ */
60
+ const getActionsForModel = async (model: TestModel): Promise<Array<Action>> => {
61
+ const routerModeActions: Record<Status, Array<Action>> = {
62
+ [Status.LOADED]: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
63
+ [Status.LOADING]: [Action.INFO, Action.CANCEL],
64
+ [Status.FAILED]: [Action.RETRY, Action.CANCEL],
65
+ [Status.SLEEPING]: [Action.UNLOAD, Action.INFO, Action.CANCEL],
66
+ [Status.UNLOADED]: [Action.LOAD, Action.CANCEL],
67
+ };
68
+
69
+ const singleModeActions: Record<Status, Array<Action>> = {
70
+ [Status.LOADED]: [Action.INFO, Action.CANCEL],
71
+ [Status.LOADING]: [Action.CANCEL],
72
+ [Status.FAILED]: [Action.CANCEL],
73
+ [Status.SLEEPING]: [Action.INFO, Action.CANCEL],
74
+ [Status.UNLOADED]: [Action.CANCEL],
75
+ };
76
+
77
+ const allActions =
78
+ model.mode === Mode.ROUTER ? routerModeActions : singleModeActions;
79
+
80
+ const status = await model.getStatus();
81
+ return allActions[status];
82
+ };
83
+
84
+ describe("Action availability", () => {
85
+ const actionMatrix: Array<{
86
+ mode: Mode;
87
+ status: Status;
88
+ expected: Action[];
89
+ }> = [
90
+ // Router mode
91
+ {
92
+ mode: Mode.ROUTER,
93
+ status: Status.LOADED,
94
+ expected: [Action.SWITCH, Action.UNLOAD, Action.INFO, Action.CANCEL],
95
+ },
96
+ {
97
+ mode: Mode.ROUTER,
98
+ status: Status.LOADING,
99
+ expected: [Action.INFO, Action.CANCEL],
100
+ },
101
+ {
102
+ mode: Mode.ROUTER,
103
+ status: Status.FAILED,
104
+ expected: [Action.RETRY, Action.CANCEL],
105
+ },
106
+ {
107
+ mode: Mode.ROUTER,
108
+ status: Status.SLEEPING,
109
+ expected: [Action.UNLOAD, Action.INFO, Action.CANCEL],
110
+ },
111
+ {
112
+ mode: Mode.ROUTER,
113
+ status: Status.UNLOADED,
114
+ expected: [Action.LOAD, Action.CANCEL],
115
+ },
116
+ // Single mode
117
+ {
118
+ mode: Mode.SINGLE,
119
+ status: Status.LOADED,
120
+ expected: [Action.INFO, Action.CANCEL],
121
+ },
122
+ { mode: Mode.SINGLE, status: Status.LOADING, expected: [Action.CANCEL] },
123
+ { mode: Mode.SINGLE, status: Status.FAILED, expected: [Action.CANCEL] },
124
+ {
125
+ mode: Mode.SINGLE,
126
+ status: Status.SLEEPING,
127
+ expected: [Action.INFO, Action.CANCEL],
128
+ },
129
+ { mode: Mode.SINGLE, status: Status.UNLOADED, expected: [Action.CANCEL] },
130
+ ];
131
+
132
+ it.each(actionMatrix)(
133
+ "should return correct actions for $mode/$status",
134
+ async ({ mode, status, expected }) => {
135
+ const model = createModel(mode, status);
136
+ const actions = await getActionsForModel(model);
137
+ expect(actions).toEqual(expected);
138
+ },
139
+ );
140
+
141
+ it("should always include CANCEL regardless of mode or status", async () => {
142
+ for (const mode of [Mode.ROUTER, Mode.SINGLE]) {
143
+ for (const status of Object.values(Status)) {
144
+ const model = createModel(mode, status);
145
+ const actions = await getActionsForModel(model);
146
+ expect(actions).toContain(Action.CANCEL);
147
+ }
148
+ }
149
+ });
150
+
151
+ it("should not include mode-exclusive actions", async () => {
152
+ const singleLoaded = createModel(Mode.SINGLE, Status.LOADED);
153
+ expect(await getActionsForModel(singleLoaded)).not.toContain(Action.SWITCH);
154
+ expect(await getActionsForModel(singleLoaded)).not.toContain(Action.LOAD);
155
+
156
+ const singleFailed = createModel(Mode.SINGLE, Status.FAILED);
157
+ expect(await getActionsForModel(singleFailed)).not.toContain(Action.RETRY);
158
+ });
159
+ });
@@ -0,0 +1,157 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
+ import {
3
+ API_KEY_PLACEHOLDER,
4
+ DEFAULT_LLAMA_SERVER_URL,
5
+ PROVIDER_ID,
6
+ } from "../src/constants";
7
+
8
+ describe("URL resolution fallback chain", () => {
9
+ beforeEach(() => {
10
+ vi.clearAllMocks();
11
+ vi.resetModules();
12
+ });
13
+
14
+ afterEach(() => {
15
+ delete process.env.LLAMA_SERVER_URL;
16
+ });
17
+
18
+ it("should return default URL when no config is found", async () => {
19
+ vi.doMock("node:fs/promises", () => ({
20
+ access: vi.fn().mockRejectedValue(new Error("ENOENT")),
21
+ constants: { F_OK: 0 },
22
+ readFile: vi.fn().mockResolvedValue(""),
23
+ }));
24
+
25
+ const { resolveUrl } = await import("../src/tools/resolver");
26
+ const result = await resolveUrl("/tmp/test-project");
27
+
28
+ expect(result).toBe(DEFAULT_LLAMA_SERVER_URL);
29
+ });
30
+
31
+ it("should prioritize project config over env variable", async () => {
32
+ vi.doMock("node:fs/promises", () => ({
33
+ access: vi.fn().mockImplementation(async (path: string) => {
34
+ if (path.includes("llama-server.json")) return undefined;
35
+ throw new Error("ENOENT");
36
+ }),
37
+ constants: { F_OK: 0 },
38
+ readFile: vi
39
+ .fn()
40
+ .mockResolvedValue(JSON.stringify({ url: "http://localhost:9999" })),
41
+ }));
42
+
43
+ process.env.LLAMA_SERVER_URL = "http://env-url:8080";
44
+
45
+ const { resolveUrl } = await import("../src/tools/resolver");
46
+ const result = await resolveUrl("/tmp/test-project");
47
+
48
+ expect(result).toBe("http://localhost:9999");
49
+ });
50
+
51
+ it("should use env variable when no project config exists", async () => {
52
+ vi.doMock("node:fs/promises", () => ({
53
+ access: vi.fn().mockRejectedValue(new Error("ENOENT")),
54
+ constants: { F_OK: 0 },
55
+ readFile: vi.fn().mockResolvedValue(""),
56
+ }));
57
+
58
+ process.env.LLAMA_SERVER_URL = "http://env-url:8080";
59
+
60
+ const { resolveUrl } = await import("../src/tools/resolver");
61
+ const result = await resolveUrl("/tmp/test-project");
62
+
63
+ expect(result).toBe("http://env-url:8080");
64
+ });
65
+
66
+ it("should use global settings when no project config or env exists", async () => {
67
+ vi.doMock("node:fs/promises", () => ({
68
+ access: vi.fn().mockImplementation(async (path: string) => {
69
+ if (path.includes("settings.json")) return undefined;
70
+ throw new Error("ENOENT");
71
+ }),
72
+ constants: { F_OK: 0 },
73
+ readFile: vi
74
+ .fn()
75
+ .mockResolvedValue(
76
+ JSON.stringify({ llamaServerUrl: "http://global:8080" }),
77
+ ),
78
+ }));
79
+
80
+ const { resolveUrl } = await import("../src/tools/resolver");
81
+ const result = await resolveUrl("/tmp/test-project");
82
+
83
+ expect(result).toBe("http://global:8080");
84
+ });
85
+
86
+ it("should strip trailing slashes from resolved URL", async () => {
87
+ vi.doMock("node:fs/promises", () => ({
88
+ access: vi.fn().mockImplementation(async (path: string) => {
89
+ if (path.includes("llama-server.json")) return undefined;
90
+ throw new Error("ENOENT");
91
+ }),
92
+ constants: { F_OK: 0 },
93
+ readFile: vi
94
+ .fn()
95
+ .mockResolvedValue(JSON.stringify({ url: "http://localhost:8080/" })),
96
+ }));
97
+
98
+ const { resolveUrl } = await import("../src/tools/resolver");
99
+ const result = await resolveUrl("/tmp/test-project");
100
+
101
+ expect(result).toBe("http://localhost:8080");
102
+ });
103
+ });
104
+
105
+ describe("API key resolution", () => {
106
+ beforeEach(() => {
107
+ vi.clearAllMocks();
108
+ vi.resetModules();
109
+ });
110
+
111
+ it("should return placeholder when auth file does not exist", async () => {
112
+ vi.doMock("node:fs/promises", () => ({
113
+ access: vi.fn().mockRejectedValue(new Error("ENOENT")),
114
+ constants: { F_OK: 0 },
115
+ readFile: vi.fn().mockResolvedValue(""),
116
+ }));
117
+
118
+ const { resolveApiKey } = await import("../src/tools/resolver");
119
+ const result = await resolveApiKey();
120
+
121
+ expect(result).toBe(API_KEY_PLACEHOLDER);
122
+ });
123
+
124
+ it("should return placeholder when provider key is missing", async () => {
125
+ vi.doMock("node:fs/promises", () => ({
126
+ access: vi.fn().mockResolvedValue(undefined),
127
+ constants: { F_OK: 0 },
128
+ readFile: vi
129
+ .fn()
130
+ .mockResolvedValue(
131
+ JSON.stringify({ "other-provider": { key: "other-key" } }),
132
+ ),
133
+ }));
134
+
135
+ const { resolveApiKey } = await import("../src/tools/resolver");
136
+ const result = await resolveApiKey();
137
+
138
+ expect(result).toBe(API_KEY_PLACEHOLDER);
139
+ });
140
+
141
+ it("should return the provider key when present", async () => {
142
+ vi.doMock("node:fs/promises", () => ({
143
+ access: vi.fn().mockResolvedValue(undefined),
144
+ constants: { F_OK: 0 },
145
+ readFile: vi
146
+ .fn()
147
+ .mockResolvedValue(
148
+ JSON.stringify({ [PROVIDER_ID]: { key: "test-api-key" } }),
149
+ ),
150
+ }));
151
+
152
+ const { resolveApiKey } = await import("../src/tools/resolver");
153
+ const result = await resolveApiKey();
154
+
155
+ expect(result).toBe("test-api-key");
156
+ });
157
+ });
@@ -0,0 +1,289 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { Mode } from "../src/enums/mode";
3
+ import { DataProperty } from "../src/interfaces/endpoints/models";
4
+ import { RouterModel } from "../src/models/routerModel";
5
+
6
+ // Mock the retriever module before importing anything that depends on it
7
+ const mockRpc = vi.fn();
8
+
9
+ vi.mock("../src/tools/retriever", () => ({
10
+ rpc: (...args: unknown[]) => mockRpc(...args),
11
+ isServerReady: vi.fn(),
12
+ listModels: vi.fn(),
13
+ }));
14
+
15
+ // Helper to create a mock DataProperty
16
+ const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
17
+ id: "test-model",
18
+ aliases: ["test-alias"],
19
+ tags: [],
20
+ object: "model",
21
+ owned_by: "test",
22
+ created: Date.now(),
23
+ status: { value: "loaded", args: [], preset: "default", failed: false },
24
+ ...overrides,
25
+ });
26
+
27
+ describe("RouterModel context size extraction", () => {
28
+ it("should extract --ctx-size value", () => {
29
+ const model = new RouterModel(
30
+ createModel({
31
+ status: {
32
+ value: "loaded",
33
+ args: [
34
+ "--model",
35
+ "gguf",
36
+ "--ctx-size",
37
+ "4096",
38
+ "--batch-size",
39
+ "512",
40
+ ],
41
+ preset: "default",
42
+ },
43
+ }),
44
+ );
45
+
46
+ // Access the private method via any
47
+ const extractFrom = (model as any).extractFrom.bind(model);
48
+ expect(extractFrom("--ctx-size")).toBe(4096);
49
+ });
50
+
51
+ it("should extract --fit-ctx value when --ctx-size is not present", () => {
52
+ const model = new RouterModel(
53
+ createModel({
54
+ status: {
55
+ value: "loaded",
56
+ args: ["--model", "gguf", "--fit-ctx", "8192"],
57
+ preset: "default",
58
+ },
59
+ }),
60
+ );
61
+
62
+ const extractFrom = (model as any).extractFrom.bind(model);
63
+ expect(extractFrom("--fit-ctx")).toBe(8192);
64
+ });
65
+
66
+ it("should return null when argument is not found", () => {
67
+ const model = new RouterModel(
68
+ createModel({
69
+ status: {
70
+ value: "loaded",
71
+ args: ["--model", "gguf", "--batch-size", "512"],
72
+ preset: "default",
73
+ },
74
+ }),
75
+ );
76
+
77
+ const extractFrom = (model as any).extractFrom.bind(model);
78
+ expect(extractFrom("--ctx-size")).toBeNull();
79
+ expect(extractFrom("--fit-ctx")).toBeNull();
80
+ });
81
+
82
+ it("should return null when argument has no following value", () => {
83
+ const model = new RouterModel(
84
+ createModel({
85
+ status: {
86
+ value: "loaded",
87
+ args: ["--model", "gguf", "--ctx-size"],
88
+ preset: "default",
89
+ },
90
+ }),
91
+ );
92
+
93
+ const extractFrom = (model as any).extractFrom.bind(model);
94
+ expect(extractFrom("--ctx-size")).toBeNull();
95
+ });
96
+
97
+ it("should return null when argument value is not a valid number", () => {
98
+ const model = new RouterModel(
99
+ createModel({
100
+ status: {
101
+ value: "loaded",
102
+ args: ["--model", "gguf", "--ctx-size", "not-a-number"],
103
+ preset: "default",
104
+ },
105
+ }),
106
+ );
107
+
108
+ const extractFrom = (model as any).extractFrom.bind(model);
109
+ expect(extractFrom("--ctx-size")).toBeNull();
110
+ });
111
+
112
+ it("should prefer --ctx-size over --fit-ctx when loaded", async () => {
113
+ // First call: getStatus() -> /models
114
+ mockRpc.mockResolvedValueOnce({
115
+ data: [
116
+ {
117
+ id: "test-model",
118
+ status: { value: "loaded", args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"], preset: "default" },
119
+ },
120
+ ],
121
+ });
122
+ // Second call: super.getContextSize() -> /models with meta.n_ctx
123
+ mockRpc.mockResolvedValueOnce({
124
+ data: [
125
+ {
126
+ id: "test-model",
127
+ meta: { n_ctx: 4096 },
128
+ },
129
+ ],
130
+ });
131
+
132
+ const model = new RouterModel(
133
+ createModel({
134
+ status: {
135
+ value: "loaded",
136
+ args: ["--model", "gguf", "--ctx-size", "4096", "--fit-ctx", "8192"],
137
+ preset: "default",
138
+ },
139
+ }),
140
+ );
141
+
142
+ const ctxSize = await model.getContextSize();
143
+ expect(ctxSize).toBe(4096);
144
+ });
145
+
146
+ it("should return DEFAULT_CTX when no context size args are present and loaded", async () => {
147
+ // First call: getStatus() -> /models
148
+ mockRpc.mockResolvedValueOnce({
149
+ data: [
150
+ {
151
+ id: "test-model",
152
+ status: { value: "loaded", args: ["--model", "gguf"], preset: "default" },
153
+ },
154
+ ],
155
+ });
156
+ // Second call: super.getContextSize() -> /models without meta.n_ctx
157
+ mockRpc.mockResolvedValueOnce({
158
+ data: [
159
+ {
160
+ id: "test-model",
161
+ },
162
+ ],
163
+ });
164
+
165
+ const { DEFAULT_CTX } = await import("../src/constants");
166
+
167
+ const model = new RouterModel(
168
+ createModel({
169
+ status: {
170
+ value: "loaded",
171
+ args: ["--model", "gguf"],
172
+ preset: "default",
173
+ },
174
+ }),
175
+ );
176
+
177
+ const ctxSize = await model.getContextSize();
178
+ expect(ctxSize).toBe(DEFAULT_CTX);
179
+ });
180
+ });
181
+
182
+ describe("RouterModel capabilities detection", () => {
183
+ it("should detect image capability when modalities.vision is true", async () => {
184
+ // getStatus() calls /models first
185
+ mockRpc.mockResolvedValueOnce({
186
+ data: [
187
+ {
188
+ id: "test-model",
189
+ status: { value: "loaded", args: [], preset: "default", failed: false },
190
+ },
191
+ ],
192
+ });
193
+ // super.getCapabilities() calls /props?model=<id>
194
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
195
+
196
+ const model = new RouterModel(createModel());
197
+ const capabilities = await model.getCapabilities();
198
+
199
+ expect(capabilities).toEqual(["image"]);
200
+ expect(mockRpc).toHaveBeenCalledWith("/props?model=test-model");
201
+ });
202
+
203
+ it("should detect text-only capability when modalities.vision is false", async () => {
204
+ // getStatus() calls /models first
205
+ mockRpc.mockResolvedValueOnce({
206
+ data: [
207
+ {
208
+ id: "test-model",
209
+ status: { value: "loaded", args: [], preset: "default", failed: false },
210
+ },
211
+ ],
212
+ });
213
+ // super.getCapabilities() calls /props?model=<id>
214
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
215
+
216
+ const model = new RouterModel(createModel());
217
+ const capabilities = await model.getCapabilities();
218
+
219
+ expect(capabilities).toEqual(["text"]);
220
+ });
221
+
222
+ it("should default to text when /props call fails", async () => {
223
+ // getStatus() calls /models first
224
+ mockRpc.mockResolvedValueOnce({
225
+ data: [
226
+ {
227
+ id: "test-model",
228
+ status: { value: "loaded", args: [], preset: "default", failed: false },
229
+ },
230
+ ],
231
+ });
232
+ // super.getCapabilities() calls /props?model=<id> which fails
233
+ mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
234
+
235
+ const model = new RouterModel(createModel());
236
+ const capabilities = await model.getCapabilities();
237
+
238
+ expect(capabilities).toEqual(["text"]);
239
+ });
240
+
241
+ it("should use status.args to detect image capability when not loaded", async () => {
242
+ // getStatus() calls /models first, returns unloaded
243
+ mockRpc.mockResolvedValueOnce({
244
+ data: [
245
+ {
246
+ id: "test-model",
247
+ status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
248
+ },
249
+ ],
250
+ });
251
+
252
+ const model = new RouterModel(
253
+ createModel({
254
+ status: { value: "unloaded", args: ["--model", "gguf", "--mmproj", "mmproj.gguf"], preset: "default", failed: false },
255
+ }),
256
+ );
257
+ const capabilities = await model.getCapabilities();
258
+
259
+ expect(capabilities).toEqual(["image"]);
260
+ });
261
+
262
+ it("should return text when not loaded and no --mmproj in args", async () => {
263
+ // getStatus() calls /models first, returns unloaded
264
+ mockRpc.mockResolvedValueOnce({
265
+ data: [
266
+ {
267
+ id: "test-model",
268
+ status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
269
+ },
270
+ ],
271
+ });
272
+
273
+ const model = new RouterModel(
274
+ createModel({
275
+ status: { value: "unloaded", args: ["--model", "gguf"], preset: "default", failed: false },
276
+ }),
277
+ );
278
+ const capabilities = await model.getCapabilities();
279
+
280
+ expect(capabilities).toEqual(["text"]);
281
+ });
282
+ });
283
+
284
+ describe("RouterModel mode", () => {
285
+ it("should always return ROUTER mode", () => {
286
+ const model = new RouterModel(createModel());
287
+ expect(model.mode).toBe(Mode.ROUTER);
288
+ });
289
+ });
@@ -0,0 +1,140 @@
1
+ import { beforeEach, describe, expect, it, vi } from "vitest";
2
+ import { DEFAULT_CTX } from "../src/constants";
3
+ import { Mode } from "../src/enums/mode";
4
+ import { Status } from "../src/enums/status";
5
+ import { ModelProperty } from "../src/interfaces/endpoints/models";
6
+ import { SingleModel } from "../src/models/singleModel";
7
+
8
+ const mockRpc = vi.fn();
9
+
10
+ vi.mock("../src/tools/retriever", () => ({
11
+ rpc: (...args: unknown[]) => mockRpc(...args),
12
+ isServerReady: vi.fn(),
13
+ listModels: vi.fn(),
14
+ }));
15
+
16
+ beforeEach(() => {
17
+ mockRpc.mockClear();
18
+ });
19
+
20
+ const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
21
+ new SingleModel(
22
+ {
23
+ id: "test",
24
+ tags: [],
25
+ object: "model",
26
+ owned_by: "test",
27
+ created: Date.now(),
28
+ },
29
+ {
30
+ name: "test",
31
+ model: "test.gguf",
32
+ modified_at: new Date().toISOString(),
33
+ size: "1B",
34
+ digest: "abc123",
35
+ type: "model",
36
+ description: "test",
37
+ tags: [],
38
+ capabilities: [],
39
+ parameters: "",
40
+ details: {
41
+ parent_model: "",
42
+ format: "",
43
+ family: "",
44
+ families: [],
45
+ parameter_size: "",
46
+ quantization_level: "",
47
+ },
48
+ ...extra,
49
+ },
50
+ );
51
+
52
+ describe("SingleModel mode", () => {
53
+ it("should always return SINGLE mode", () => {
54
+ const model = createModel();
55
+ expect(model.mode).toBe(Mode.SINGLE);
56
+ });
57
+ });
58
+
59
+ describe("SingleModel capabilities", () => {
60
+ it("should detect image capability when modalities.vision is true", async () => {
61
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
62
+
63
+ const model = createModel();
64
+ const capabilities = await model.getCapabilities();
65
+
66
+ expect(capabilities).toEqual(["image"]);
67
+ expect(mockRpc).toHaveBeenCalledWith("/props?model=test");
68
+ });
69
+
70
+ it("should detect text-only capability when modalities.vision is false", async () => {
71
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
72
+
73
+ const model = createModel();
74
+ const capabilities = await model.getCapabilities();
75
+
76
+ expect(capabilities).toEqual(["text"]);
77
+ });
78
+
79
+ it("should return text when /props call fails", async () => {
80
+ mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
81
+
82
+ const model = createModel();
83
+ const capabilities = await model.getCapabilities();
84
+
85
+ expect(capabilities).toEqual(["text"]);
86
+ });
87
+ });
88
+
89
+ describe("SingleModel getStatus", () => {
90
+ it("should return LOADED when not sleeping", async () => {
91
+ mockRpc.mockResolvedValueOnce({ is_sleeping: false });
92
+
93
+ const model = createModel();
94
+ const status = await model.getStatus();
95
+
96
+ expect(status).toBe(Status.LOADED);
97
+ expect(mockRpc).toHaveBeenCalledWith("/props");
98
+ });
99
+
100
+ it("should return SLEEPING when is_sleeping is true", async () => {
101
+ mockRpc.mockResolvedValueOnce({ is_sleeping: true });
102
+
103
+ const model = createModel();
104
+ const status = await model.getStatus();
105
+
106
+ expect(status).toBe(Status.SLEEPING);
107
+ });
108
+ });
109
+
110
+ describe("SingleModel getContextSize", () => {
111
+ it("should return n_ctx from /models endpoint meta", async () => {
112
+ mockRpc.mockResolvedValueOnce({
113
+ data: [{ id: "test", meta: { n_ctx: 8192 } }],
114
+ });
115
+
116
+ const model = createModel();
117
+ const ctxSize = await model.getContextSize();
118
+
119
+ expect(ctxSize).toBe(8192);
120
+ expect(mockRpc).toHaveBeenCalledWith("/models");
121
+ });
122
+
123
+ it("should return DEFAULT_CTX when model not found in /models", async () => {
124
+ mockRpc.mockResolvedValueOnce({ data: [] });
125
+
126
+ const model = createModel();
127
+ const ctxSize = await model.getContextSize();
128
+
129
+ expect(ctxSize).toBe(DEFAULT_CTX);
130
+ });
131
+
132
+ it("should return DEFAULT_CTX when /models fails", async () => {
133
+ mockRpc.mockRejectedValueOnce(new Error("Connection refused"));
134
+
135
+ const model = createModel();
136
+ const ctxSize = await model.getContextSize();
137
+
138
+ expect(ctxSize).toBe(DEFAULT_CTX);
139
+ });
140
+ });
package/tsconfig.json CHANGED
@@ -1,13 +1,12 @@
1
1
  {
2
2
  "compilerOptions": {
3
- "target": "ES2020",
4
- "module": "NodeNext",
5
- "moduleResolution": "NodeNext",
3
+ "target": "ES2022",
4
+ "module": "commonjs",
5
+ "moduleResolution": "bundler",
6
6
  "strict": true,
7
7
  "esModuleInterop": true,
8
8
  "skipLibCheck": true,
9
- "forceConsistentCasingInFileNames": true,
10
- "types": ["node"]
9
+ "noEmit": true
11
10
  },
12
- "include": ["src/**/*.ts"]
11
+ "include": ["src/**/*.ts", "tests/**/*.ts"]
13
12
  }
@@ -0,0 +1,8 @@
1
+ import { defineConfig } from "vitest/config";
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ globals: true,
6
+ environment: "node",
7
+ },
8
+ });
@@ -1,15 +0,0 @@
1
- /**
2
- * The structure of llama-server's /slots endpoint
3
- *
4
- * In single mode, applies to /slots
5
- * In router mode, applies to /slots?model=<id>
6
- */
7
- export interface SlotsEndpoint {
8
- id: number;
9
- n_ctx: number;
10
- speculative: boolean;
11
- is_processing: boolean;
12
- id_task?: number;
13
- params?: Array<Record<string, any>>;
14
- next_token?: Array<Record<string, any>>;
15
- }