pi-llama-cpp 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,6 +12,7 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with running [lla
12
12
  - **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
13
13
  - **Auth support** — allows to login into a llama.cpp server that was secured with an API key
14
14
  - **Multiple server support** — connect to multiple llama.cpp servers simultaneously by separating URLs with semicolons
15
+ - **Thinking budget support** — configurable token budgets for model reasoning/thinking, mapped to Pi's thinking levels
15
16
 
16
17
  ### Status Indicators
17
18
 
@@ -48,11 +49,11 @@ pi install https://github.com/gsanhueza/pi-llama-cpp
48
49
 
49
50
  The extension resolves the llama.cpp server URL(s) using the following priority order:
50
51
 
51
- 1. **Per-project config** — `.pi/llama-server.json` in your project root:
52
+ 1. **Per-project config** — `.pi/settings.json` in your project root:
52
53
 
53
54
  ```json
54
55
  {
55
- "url": "http://127.0.0.1:8080"
56
+ "llamaServerUrl": "http://127.0.0.1:8080"
56
57
  }
57
58
  ```
58
59
 
@@ -127,7 +128,7 @@ The extension determines the context size as follows:
127
128
 
128
129
  - **Router mode**
129
130
  - When loaded, reads `meta.n_ctx` from the `/models` endpoint
130
- - When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments, or `ctx-size` and/or `fit-ctx` keys from the **presets.ini** file.
131
+ - When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments (which can also originate from the **presets.ini** file the llama.cpp server uses to load its models).
131
132
  - **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
132
133
  - **Legacy mode** — reads `max_model_len` from `/models`, falling back to `n_ctx` from `/props`
133
134
  - Falls back to `128000` if not available
@@ -140,6 +141,8 @@ The extension determines the context size as follows:
140
141
  | `/models info` | Show detailed information for all available models at once. |
141
142
  | `/models unload` | Unload all loaded models at once. |
142
143
 
144
+ > **Note:** When a llama.cpp server is slow to respond, it will be skipped at startup with a warning. Run `/models` to retry without timeout and see all models.
145
+
143
146
  > **Note:** When a llama.cpp server is unreachable, `/models` displays an error notification with the configured server URL, but healthy servers continue to show their models.
144
147
 
145
148
  > **Note:** The `/models unload` command only makes sense in router mode.
@@ -157,12 +160,44 @@ When browsing models via the `/models` command, you can:
157
160
 
158
161
  > **Note:** In single-model and legacy-model mode, **Unload** is not available, since there is only one model on the server.
159
162
 
163
+ ### Thinking Budgets
164
+
165
+ The extension supports configurable **thinking budgets** that control how many tokens the model allocates to its reasoning/thinking process.
166
+ This is tied to Pi's thinking level selector (off, minimal, low, medium, high, xhigh).
167
+
168
+ | Level | Tokens | Description |
169
+ | --------- | ------ | ---------------------------- |
170
+ | `off` | 0 | Thinking disabled |
171
+ | `minimal` | 1,024 | Short reasoning steps |
172
+ | `low` | 2,048 | Light reasoning |
173
+ | `medium` | 8,192 | Balanced reasoning (default) |
174
+ | `high` | 16,384 | Extended reasoning |
175
+ | `xhigh` | -1 | Unlimited reasoning |
176
+
177
+ User-defined budgets can override the defaults by adding a `thinkingBudgets` object to `~/.pi/agent/settings.json` (global) or `.pi/settings.json` (per-project):
178
+
179
+ ```json
180
+ {
181
+ "thinkingBudgets": {
182
+ "minimal": 256,
183
+ "low": 1024,
184
+ "medium": 2048,
185
+ "high": 4096
186
+ }
187
+ }
188
+ ```
189
+
190
+ Only `minimal`, `low`, `medium`, and `high` are configurable — `off` (0) and `xhigh` (-1, unlimited) are fixed.
191
+ The extension automatically injects the appropriate `thinking_budget_tokens` into each request payload based on the selected level.
192
+
160
193
  ### Model Selection Event
161
194
 
162
195
  When you switch models via Pi's model picker (instead of using the `/models` command), the extension listens for the `model_select` event, which also loads the requested model before the conversation begins.
163
196
 
164
197
  This keeps the server in sync with the active model in Pi, regardless of how the switch was initiated — you don't need to manually load models before using them.
165
198
 
199
+ > **Note:** If you switch sessions while a model load is in-flight, you'll see a warning, but the load continues in the background. Use `/models` in the new session to verify the model status.
200
+
166
201
  ### Loading Models
167
202
 
168
203
  When you trigger a load, switch, or retry action, the extension polls the server to track progress. If a model takes longer than **60 seconds** to load, the polling times out with an error.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-llama-cpp",
3
- "version": "0.6.0",
3
+ "version": "0.7.1",
4
4
  "description": "Pi extension for llama.cpp integration. Supports router, single and legacy models. Supports multiple servers.",
5
5
  "keywords": [
6
6
  "pi",
@@ -36,7 +36,7 @@
36
36
  "@earendil-works/pi-tui": "*"
37
37
  },
38
38
  "devDependencies": {
39
- "@types/node": "^25.9.1",
39
+ "@types/node": "^25.9.3",
40
40
  "prettier-plugin-organize-imports": "^4.3.0",
41
41
  "vitest": "^4.1.8"
42
42
  }
package/src/constants.ts CHANGED
@@ -39,6 +39,23 @@ export const POLLING_INTERVAL = 500;
39
39
  export const POLLING_TIMEOUT = 60000;
40
40
 
41
41
  /**
42
- * Reasonable time to read notifications if context goes stale
42
+ * Reasonable time (ms) to read notifications if context goes stale
43
43
  */
44
44
  export const READABLE_TIMEOUT = 15000;
45
+
46
+ /**
47
+ * Timeout (ms) for server verification before assuming failure
48
+ */
49
+ export const SERVER_TIMEOUT = 1000;
50
+
51
+ /**
52
+ * Thinking budgets to send to the server, depending on user-selected level in Pi.
53
+ */
54
+ export const DEFAULT_THINKING_BUDGETS = {
55
+ off: 0,
56
+ minimal: 1024,
57
+ low: 2048,
58
+ medium: 8192,
59
+ high: 16384,
60
+ xhigh: -1,
61
+ };
@@ -0,0 +1,6 @@
1
+ /** The possible states of a llama.cpp server */
2
+ export enum ServerStatus {
3
+ READY = "ready",
4
+ TIMEOUT = "timeout",
5
+ UNREACHABLE = "unreachable",
6
+ }
package/src/index.ts CHANGED
@@ -1,8 +1,10 @@
1
- import type {
2
- ExtensionAPI,
3
- ExtensionCommandContext,
4
- ExtensionContext,
5
- SessionBeforeSwitchEvent,
1
+ import {
2
+ type BeforeProviderRequestEvent,
3
+ type ExtensionAPI,
4
+ type ExtensionCommandContext,
5
+ type ExtensionContext,
6
+ type SessionBeforeSwitchEvent,
7
+ type SessionStartEvent,
6
8
  } from "@earendil-works/pi-coding-agent";
7
9
  import { PROVIDER_NAME } from "./constants";
8
10
  import { ModelSelectEvent } from "./interfaces/events";
@@ -14,7 +16,7 @@ import { Server } from "./server";
14
16
 
15
17
  export default async function (pi: ExtensionAPI) {
16
18
  const resolver = new ConfigResolver();
17
- const urls = await resolver.resolveUrls(process.cwd());
19
+ const urls = await resolver.resolveUrls();
18
20
  const servers = urls.map((url) => new Server(url));
19
21
 
20
22
  const eventManager = new EventManager(servers);
@@ -22,7 +24,7 @@ export default async function (pi: ExtensionAPI) {
22
24
  const commandManager = new CommandManager(serverManager);
23
25
 
24
26
  // Register providers once at startup
25
- await serverManager.registerAllProviders(pi);
27
+ await serverManager.initialize(pi);
26
28
 
27
29
  // Single global /models command
28
30
  pi.registerCommand("models", {
@@ -34,6 +36,21 @@ export default async function (pi: ExtensionAPI) {
34
36
  });
35
37
 
36
38
  // Events
39
+ pi.on("session_start", (event: SessionStartEvent, ctx: ExtensionContext) => {
40
+ if (event.reason !== "startup") return;
41
+ for (const warning of serverManager.getWarnings())
42
+ ctx.ui.notify(warning, "warning");
43
+
44
+ for (const warning of resolver.getWarnings())
45
+ ctx.ui.notify(warning, "warning");
46
+ });
47
+
48
+ pi.on(
49
+ "before_provider_request",
50
+ async (event: BeforeProviderRequestEvent) =>
51
+ await eventManager.onBeforeProviderRequest(event),
52
+ );
53
+
37
54
  pi.on(
38
55
  "model_select",
39
56
  async (event: ModelSelectEvent, ctx: ExtensionContext) =>
@@ -0,0 +1,7 @@
1
+ export type ThinkingLevel =
2
+ | "off"
3
+ | "minimal"
4
+ | "low"
5
+ | "medium"
6
+ | "high"
7
+ | "xhigh";
@@ -50,7 +50,7 @@ export class CommandManager {
50
50
  pi: ExtensionAPI,
51
51
  ) {
52
52
  // Re-register providers so Pi sees updated model states
53
- await this.serverManager.registerAllProviders(pi);
53
+ await this.serverManager.update(pi);
54
54
 
55
55
  // Notify about unreachable servers
56
56
  for (const url of this.serverManager.failedUrls) {
@@ -1,7 +1,11 @@
1
- import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
1
+ import {
2
+ type BeforeProviderRequestEvent,
3
+ type ExtensionContext,
4
+ } from "@earendil-works/pi-coding-agent";
2
5
  import { READABLE_TIMEOUT } from "../constants";
3
6
  import { ModelSelectEvent } from "../interfaces/events";
4
7
  import { BaseModel } from "../models/baseModel";
8
+ import { ConfigResolver } from "../resolver";
5
9
  import { Server } from "../server";
6
10
 
7
11
  export class EventManager {
@@ -9,6 +13,13 @@ export class EventManager {
9
13
 
10
14
  constructor(private readonly servers: Server[]) {}
11
15
 
16
+ /**
17
+ * Resets the in-flight model reference.
18
+ */
19
+ static resetInflightModel() {
20
+ EventManager.inflightModel = null;
21
+ }
22
+
12
23
  /**
13
24
  * Reacts to a new model event triggered by Pi
14
25
  *
@@ -55,9 +66,36 @@ export class EventManager {
55
66
  }
56
67
 
57
68
  /**
58
- * Resets the in-flight model reference.
69
+ * Intercepts the request to add extra information, useful to llama.cpp.
70
+ * Adds a custom thinking budget to the request payload.
71
+ *
72
+ * @param event Request event
73
+ * @returns Updated payload
59
74
  */
60
- static resetInflightModel() {
61
- EventManager.inflightModel = null;
75
+ async onBeforeProviderRequest(event: BeforeProviderRequestEvent) {
76
+ const payload = event.payload as { model?: string };
77
+ const { model } = payload;
78
+ if (!model) return payload;
79
+
80
+ // Check if this model belongs to one of our servers
81
+ const isLlamaCpp = this.servers.some((s) =>
82
+ s.models.some((m) => m.id === model),
83
+ );
84
+
85
+ if (!isLlamaCpp) return payload;
86
+
87
+ // Retrieve pi's current thinking level, so we can setup a budget
88
+ const resolver = new ConfigResolver();
89
+ const level = resolver.resolveThinkingLevel() ?? "medium";
90
+ const budgets = resolver.resolveThinkingBudgets();
91
+ const thinking_budget_tokens = budgets[level];
92
+
93
+ // Setup payload
94
+ if (level === "off")
95
+ return { ...payload, chat_template_kwargs: { enable_thinking: false } };
96
+
97
+ if (level === "xhigh") return payload;
98
+
99
+ return { ...payload, thinking_budget_tokens };
62
100
  }
63
101
  }
@@ -1,42 +1,97 @@
1
1
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
- import { API_TYPE } from "../constants";
2
+ import { API_TYPE, PROVIDER_NAME, SERVER_TIMEOUT } from "../constants";
3
+ import { ServerStatus } from "../enums/serverStatus";
3
4
  import { BaseModel } from "../models/baseModel";
4
5
  import { Server } from "../server";
5
6
 
6
7
  export class ServerManager {
7
8
  readonly failedUrls: string[] = [];
9
+ private readonly warnings: string[] = [];
8
10
 
9
11
  constructor(private readonly servers: Server[]) {}
10
12
 
13
+ /**
14
+ * Verifies reachability of servers and registers the providers
15
+ *
16
+ * @param pi The Pi extension API
17
+ */
18
+ async initialize(pi: ExtensionAPI) {
19
+ // Register the providers with a timeout first
20
+ await this.update(pi, SERVER_TIMEOUT);
21
+ }
22
+
11
23
  /**
12
24
  * Registers one provider per server in Pi with their model configurations.
13
- * Call this after the servers have been initialized.
14
25
  * The manual awaiting per-server is deliberate (we want them in order)
15
26
  *
16
- * @param pi The Pi extension
27
+ * @param pi The Pi extension API
28
+ * @param timeout (Optional) Timeout before assuming server has failed
17
29
  */
18
- async registerAllProviders(pi: ExtensionAPI) {
30
+ async update(pi: ExtensionAPI, timeout?: number) {
19
31
  this.failedUrls.length = 0;
20
32
 
21
- for (const server of this.servers) {
22
- await this.registerProvider(server, pi);
33
+ const registrableServers = timeout
34
+ ? await this.findRegistrableServers(timeout)
35
+ : this.servers;
36
+
37
+ // Initialization and registration
38
+ for (const server of registrableServers) {
39
+ try {
40
+ await server.initialize();
41
+ await this.registerProvider(server, pi);
42
+ } catch {
43
+ this.failedUrls.push(server.baseUrl);
44
+ continue;
45
+ }
23
46
  }
24
47
  }
25
48
 
49
+ /**
50
+ * Runs concurrent health checks and returns only healthy servers.
51
+ *
52
+ * @param timeout Maximum time to wait for each server
53
+ * @returns Array of servers that passed the health check
54
+ */
55
+ private async findRegistrableServers(timeout: number): Promise<Server[]> {
56
+ const healthResults = await Promise.all(
57
+ this.servers.map(async (server) => {
58
+ const status = await server.isReady(timeout);
59
+ return { server, status };
60
+ }),
61
+ );
62
+
63
+ const response: Server[] = [];
64
+ for (const { server, status } of healthResults) {
65
+ if (status === ServerStatus.READY) {
66
+ response.push(server);
67
+ } else if (status === ServerStatus.TIMEOUT) {
68
+ const message = [
69
+ "[pi-llama-cpp]",
70
+ `${PROVIDER_NAME} server initialization for '${server.baseUrl}' took more than ${SERVER_TIMEOUT} ms, so it has been skipped.`,
71
+ "Run `/models` to retry without timeout and see all models.",
72
+ ].join("\n");
73
+ this.warnings.push(message);
74
+ this.failedUrls.push(server.baseUrl);
75
+ } else {
76
+ const message = [
77
+ "[pi-llama-cpp]",
78
+ `${PROVIDER_NAME} server at '${server.baseUrl}' is unreachable.`,
79
+ "Check the URL and try again. Run `/models` to retry.",
80
+ ].join("\n");
81
+ this.warnings.push(message);
82
+ this.failedUrls.push(server.baseUrl);
83
+ }
84
+ }
85
+
86
+ return response;
87
+ }
88
+
26
89
  /**
27
90
  * Creates a Pi provider for the given server
28
91
  *
29
92
  * @param server The server
30
93
  */
31
94
  private async registerProvider(server: Server, pi: ExtensionAPI) {
32
- try {
33
- await server.initialize();
34
- } catch {
35
- this.failedUrls.push(server.baseUrl);
36
- return;
37
- }
38
-
39
- // Setup the Pi registration
40
95
  const { baseUrl, models, providerId, providerName } = server;
41
96
  const apiKey = await server.getApiKey();
42
97
  const modelConfigs = await Promise.all(
@@ -52,6 +107,16 @@ export class ServerManager {
52
107
  });
53
108
  }
54
109
 
110
+ /**
111
+ * Returns warnings collected during initialization.
112
+ */
113
+ getWarnings(): string[] {
114
+ const warnings = [...this.warnings];
115
+ this.warnings.length = 0;
116
+
117
+ return warnings;
118
+ }
119
+
55
120
  /**
56
121
  * Returns all models from all servers.
57
122
  *
@@ -172,6 +172,13 @@ export abstract class BaseModel {
172
172
  id: this.id,
173
173
  name: this.name,
174
174
  reasoning: this.reasoning,
175
+ thinkingLevelMap: {
176
+ minimal: "minimal",
177
+ low: "low",
178
+ medium: "medium",
179
+ high: "high",
180
+ xhigh: "xhigh",
181
+ },
175
182
  input: await this.getCapabilities(),
176
183
  contextWindow: await this.getContextSize(),
177
184
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
package/src/resolver.ts CHANGED
@@ -1,66 +1,65 @@
1
- import { getAgentDir } from "@earendil-works/pi-coding-agent";
2
- import { access, constants, readFile } from "node:fs/promises";
1
+ import {
2
+ AuthStorage,
3
+ getAgentDir,
4
+ SettingsManager,
5
+ } from "@earendil-works/pi-coding-agent";
6
+ import { readFile } from "node:fs/promises";
3
7
  import { join } from "node:path";
4
- import { API_KEY_PLACEHOLDER, DEFAULT_LLAMA_SERVER_URL } from "./constants";
5
- import { AuthFile } from "./interfaces/auth";
8
+ import {
9
+ API_KEY_PLACEHOLDER,
10
+ DEFAULT_LLAMA_SERVER_URL,
11
+ DEFAULT_THINKING_BUDGETS,
12
+ } from "./constants";
13
+ import { ThinkingLevel } from "./interfaces/levels";
6
14
 
7
15
  export class ConfigResolver {
16
+ private warnings: string[] = [];
17
+
8
18
  private cachedUrls: string[] = [];
19
+ private authStorage = AuthStorage.create(join(getAgentDir(), "auth.json"));
20
+ private settingsManager = SettingsManager.create(
21
+ process.cwd(),
22
+ getAgentDir(),
23
+ );
9
24
 
10
25
  /**
11
- * Detects if a particular file is present
26
+ * Resolves the llama-server URL by searching in the global settings.json
12
27
  */
13
- private async fileExists(filePath: string): Promise<boolean> {
14
- try {
15
- await access(filePath, constants.F_OK);
16
- return true;
17
- } catch {
18
- return false;
19
- }
28
+ private async resolveGlobalUrl(): Promise<string | null> {
29
+ const settings = this.settingsManager.getGlobalSettings();
30
+ const { llamaServerUrl = null } = settings as Record<string, string>;
31
+
32
+ return llamaServerUrl;
20
33
  }
21
34
 
22
35
  /**
23
- * Reads and parses the contents of a file as JSON
36
+ * Resolves the llama-server URL by searching in the project's .pi/settings.json
24
37
  */
25
- private async readJson<T>(filePath: string): Promise<T | null> {
26
- const raw = await readFile(filePath, "utf-8");
38
+ private async resolveProjectUrl(): Promise<string | null> {
39
+ // Warn the user for deprecation
27
40
  try {
28
- return JSON.parse(raw) as T;
41
+ const filePath = join(process.cwd(), ".pi", "llama-server.json");
42
+ const { url = null } = JSON.parse(await readFile(filePath, "utf-8"));
43
+
44
+ const messages = [
45
+ "[pi-llama-cpp]",
46
+ "The project-level `.pi/llama-server.json` file has been deprecated.",
47
+ "It will work for now, but you must follow these instructions as soon as possible:",
48
+ '- Move your url to the project-level `.pi/settings.json` file as {"llamaServerUrl": "<url>"}.',
49
+ "- Remove the old `.pi/llama-server.json` file.",
50
+ ];
51
+
52
+ this.warnings.push(messages.join("\n"));
53
+
54
+ return url;
29
55
  } catch {
30
- return null;
56
+ // No old file available, continue as normal
31
57
  }
32
- }
33
58
 
34
- /**
35
- * Reads a value from a JSON config file by key
36
- */
37
- private async readConfigValue<T>(
38
- filePath: string,
39
- key: keyof T,
40
- ): Promise<T[keyof T] | null> {
41
- const cfg = await this.readJson<T>(filePath);
42
- return cfg?.[key] ?? null;
43
- }
59
+ const settings = this.settingsManager.getProjectSettings();
60
+ const { llamaServerUrl = null } = settings as Record<string, string>;
44
61
 
45
- /**
46
- * Resolves the llama-server URL by searching in the global settings.json
47
- */
48
- private async resolveGlobalUrl(): Promise<string | null> {
49
- const globalPath = join(getAgentDir(), "settings.json");
50
- if (!(await this.fileExists(globalPath))) return null;
51
- return this.readConfigValue<Record<string, string>>(
52
- globalPath,
53
- "llamaServerUrl",
54
- );
55
- }
56
-
57
- /**
58
- * Resolves the llama-server URL by searching in the project's .pi/llama-server.json
59
- */
60
- private async resolveProjectUrl(cwd: string): Promise<string | null> {
61
- const projectPath = join(cwd, ".pi", "llama-server.json");
62
- if (!(await this.fileExists(projectPath))) return null;
63
- return this.readConfigValue<Record<string, string>>(projectPath, "url");
62
+ return llamaServerUrl;
64
63
  }
65
64
 
66
65
  /**
@@ -73,9 +72,9 @@ export class ConfigResolver {
73
72
  /**
74
73
  * Tries all possible ways to retrieve the llama-server URL(s)
75
74
  */
76
- private async extractJoinedUrls(cwd: string): Promise<string> {
75
+ private async extractJoinedUrls(): Promise<string> {
77
76
  // 1. per-project config
78
- let response = await this.resolveProjectUrl(cwd);
77
+ let response = await this.resolveProjectUrl();
79
78
  if (response) return response;
80
79
 
81
80
  // 2. env
@@ -93,10 +92,10 @@ export class ConfigResolver {
93
92
  /**
94
93
  * Resolves URLs where llama-servers are running (cached)
95
94
  */
96
- async resolveUrls(cwd: string): Promise<string[]> {
95
+ async resolveUrls(): Promise<string[]> {
97
96
  if (this.cachedUrls.length > 0) return this.cachedUrls;
98
97
 
99
- const raw = await this.extractJoinedUrls(cwd);
98
+ const raw = await this.extractJoinedUrls();
100
99
  const urls = raw
101
100
  .split(";")
102
101
  .map((u) => u.trim())
@@ -108,16 +107,46 @@ export class ConfigResolver {
108
107
  }
109
108
 
110
109
  /**
111
- * Resolves API key for the provider ID using Pi's auth.json
112
- * Deliberately not cached, to react to changes in the file
110
+ * Resolves API key for the provider ID using Pi's AuthStorage
113
111
  */
114
112
  async resolveApiKey(providerId: string): Promise<string> {
115
- const authPath = join(getAgentDir(), "auth.json");
116
- if (!(await this.fileExists(authPath))) return API_KEY_PLACEHOLDER;
113
+ this.authStorage.reload();
114
+ const apiKey = await this.authStorage.getApiKey(providerId);
115
+
116
+ return apiKey ?? API_KEY_PLACEHOLDER;
117
+ }
118
+
119
+ /**
120
+ * Returns warnings collected during URL resolution.
121
+ */
122
+ getWarnings(): string[] {
123
+ const warnings = [...this.warnings];
124
+ this.warnings.length = 0;
125
+
126
+ return warnings;
127
+ }
117
128
 
118
- const auth = await this.readJson<AuthFile>(authPath);
119
- const apiKey = auth?.[providerId]?.key ?? API_KEY_PLACEHOLDER;
129
+ /*
130
+ * Resolves the current thinking level from Pi.
131
+ *
132
+ * @returns Selected level
133
+ */
134
+ resolveThinkingLevel(): ThinkingLevel | undefined {
135
+ return this.settingsManager.getDefaultThinkingLevel();
136
+ }
120
137
 
121
- return apiKey;
138
+ /**
139
+ * Resolves the effective thinking budgets from settings
140
+ *
141
+ * @returns Thinking budgets
142
+ */
143
+ resolveThinkingBudgets(): Record<ThinkingLevel, number> {
144
+ const settingsBudgets = this.settingsManager.getThinkingBudgets() ?? {};
145
+ const availableBudgets = {
146
+ ...DEFAULT_THINKING_BUDGETS,
147
+ ...settingsBudgets,
148
+ };
149
+
150
+ return availableBudgets;
122
151
  }
123
152
  }
package/src/server.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { PROVIDER_NAME, PROVIDER_PREFIX } from "./constants";
2
2
  import { Mode } from "./enums/mode";
3
+ import { ServerStatus } from "./enums/serverStatus";
3
4
  import { HealthEndpoint } from "./interfaces/endpoints/health";
4
5
  import { ModelsEndpoint } from "./interfaces/endpoints/models";
5
6
  import { PropsEndpoint } from "./interfaces/endpoints/props";
@@ -10,7 +11,8 @@ import { SingleModel } from "./models/singleModel";
10
11
  import { ConfigResolver } from "./resolver";
11
12
 
12
13
  export class Server {
13
- readonly models: BaseModel[] = [];
14
+ public readonly models: BaseModel[] = [];
15
+ private configResolver = new ConfigResolver();
14
16
 
15
17
  constructor(readonly baseUrl: string) {}
16
18
 
@@ -33,7 +35,7 @@ export class Server {
33
35
  * @returns The API key
34
36
  */
35
37
  async getApiKey(): Promise<string> {
36
- return await new ConfigResolver().resolveApiKey(this.providerId);
38
+ return await this.configResolver.resolveApiKey(this.providerId);
37
39
  }
38
40
 
39
41
  /**
@@ -73,15 +75,29 @@ export class Server {
73
75
  }
74
76
 
75
77
  /**
76
- * Detects if the server is ready
77
- * @returns True if it's ready to work
78
+ * Checks if the server is ready, with a timeout.
79
+ *
80
+ * @param timeout Maximum time to wait for the health check
81
+ * @returns The server status
78
82
  */
79
- async isReady(): Promise<boolean> {
83
+ async isReady(timeout: number): Promise<ServerStatus> {
80
84
  try {
81
- const { status } = await this.fetchServerHealth();
82
- return status === "ok";
83
- } catch {
84
- return false;
85
+ const timeoutPromise = new Promise<never>((_, reject) =>
86
+ setTimeout(() => reject(new Error("timeout")), timeout),
87
+ );
88
+ const health = await Promise.race([
89
+ this.fetchServerHealth(),
90
+ timeoutPromise,
91
+ ]);
92
+ if (health.status === "ok") {
93
+ return ServerStatus.READY;
94
+ }
95
+ return ServerStatus.UNREACHABLE;
96
+ } catch (error) {
97
+ if (error instanceof Error && error.message === "timeout") {
98
+ return ServerStatus.TIMEOUT;
99
+ }
100
+ return ServerStatus.UNREACHABLE;
85
101
  }
86
102
  }
87
103
 
@@ -0,0 +1,256 @@
1
+ import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
2
+ import { DEFAULT_THINKING_BUDGETS } from "../src/constants";
3
+ import { createMockModel, createMockServer } from "./mocks";
4
+
5
+ // Create a mutable mock object shared across tests
6
+ const mockSettingsManager = {
7
+ getDefaultThinkingLevel: vi.fn(() => "medium"),
8
+ getThinkingBudgets: vi.fn<() => Record<string, number> | undefined>(),
9
+ };
10
+
11
+ vi.mock("@earendil-works/pi-coding-agent", async (importOriginal) => {
12
+ const actual =
13
+ await importOriginal<typeof import("@earendil-works/pi-coding-agent")>();
14
+ return {
15
+ ...actual,
16
+ SettingsManager: {
17
+ create: () => mockSettingsManager,
18
+ },
19
+ };
20
+ });
21
+
22
+ let EventManager: typeof import("../src/managers/events").EventManager;
23
+
24
+ beforeAll(async () => {
25
+ const mod = await vi.importActual("../src/managers/events");
26
+ EventManager =
27
+ mod.EventManager as typeof import("../src/managers/events").EventManager;
28
+ });
29
+
30
+ beforeEach(() => {
31
+ vi.restoreAllMocks();
32
+ EventManager.resetInflightModel();
33
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
34
+ mockSettingsManager.getThinkingBudgets.mockReturnValue(undefined);
35
+ });
36
+
37
+ const createPayload = (modelId: string) => ({
38
+ model: modelId,
39
+ messages: [{ role: "user", content: "hello" }],
40
+ });
41
+
42
+ const createNonLlamaPayload = () => ({
43
+ model: "gpt-4",
44
+ messages: [{ role: "user", content: "hello" }],
45
+ });
46
+
47
+ describe("EventManager.onBeforeProviderRequest", () => {
48
+ describe("normal usage — each thinking level", () => {
49
+ it.each([
50
+ {
51
+ level: "off",
52
+ expected: { chat_template_kwargs: { enable_thinking: false } },
53
+ },
54
+ { level: "minimal", expected: { thinking_budget_tokens: 1024 } },
55
+ { level: "low", expected: { thinking_budget_tokens: 2048 } },
56
+ { level: "medium", expected: { thinking_budget_tokens: 8192 } },
57
+ { level: "high", expected: { thinking_budget_tokens: 16384 } },
58
+ { level: "xhigh", expected: {} },
59
+ ])(
60
+ 'level "$level" should return $expected',
61
+ async ({ level, expected }) => {
62
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue(level);
63
+
64
+ const server = createMockServer({
65
+ models: ["model-a"].map((id) => createMockModel(id)),
66
+ });
67
+ const eventManager = new EventManager([server]);
68
+ const event = { payload: createPayload("model-a") };
69
+
70
+ const result = (await eventManager.onBeforeProviderRequest(
71
+ event as any,
72
+ )) as Record<string, unknown>;
73
+
74
+ expect(result.model).toBe("model-a");
75
+ expect(result).toMatchObject(expected);
76
+ },
77
+ );
78
+
79
+ it("should preserve original payload fields alongside new ones", async () => {
80
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("low");
81
+
82
+ const server = createMockServer({
83
+ models: ["model-b"].map((id) => createMockModel(id)),
84
+ });
85
+ const eventManager = new EventManager([server]);
86
+ const event = {
87
+ payload: {
88
+ model: "model-b",
89
+ messages: [{ role: "user", content: "test" }],
90
+ temperature: 0.7,
91
+ },
92
+ };
93
+
94
+ const result = (await eventManager.onBeforeProviderRequest(
95
+ event as any,
96
+ )) as Record<string, unknown>;
97
+
98
+ expect(result.messages).toEqual([{ role: "user", content: "test" }]);
99
+ expect(result.temperature).toBe(0.7);
100
+ expect(result.thinking_budget_tokens).toBe(DEFAULT_THINKING_BUDGETS.low);
101
+ });
102
+ });
103
+
104
+ describe("non-llama.cpp models", () => {
105
+ it("should return the payload unchanged for unknown models", async () => {
106
+ const server = createMockServer({
107
+ models: ["model-a"].map((id) => createMockModel(id)),
108
+ });
109
+ const eventManager = new EventManager([server]);
110
+ const event = { payload: createNonLlamaPayload() };
111
+
112
+ const result = await eventManager.onBeforeProviderRequest(event as any);
113
+
114
+ expect(result).toEqual(createNonLlamaPayload());
115
+ });
116
+ });
117
+
118
+ describe("missing model in payload", () => {
119
+ it("should return the payload unchanged when model is absent", async () => {
120
+ const server = createMockServer({
121
+ models: ["model-a"].map((id) => createMockModel(id)),
122
+ });
123
+ const eventManager = new EventManager([server]);
124
+ const event = { payload: { messages: [] } };
125
+
126
+ const result = await eventManager.onBeforeProviderRequest(event as any);
127
+
128
+ expect(result).toEqual({ messages: [] });
129
+ });
130
+ });
131
+
132
+ describe("user-defined budget overrides", () => {
133
+ it("should use user-defined budgets instead of defaults", async () => {
134
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("low");
135
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({ low: 4096 });
136
+
137
+ const server = createMockServer({
138
+ models: ["model-a"].map((id) => createMockModel(id)),
139
+ });
140
+ const eventManager = new EventManager([server]);
141
+ const event = { payload: createPayload("model-a") };
142
+
143
+ const result = (await eventManager.onBeforeProviderRequest(
144
+ event as any,
145
+ )) as Record<string, unknown>;
146
+
147
+ expect(result.thinking_budget_tokens).toBe(4096);
148
+ });
149
+
150
+ it("should merge user budgets with defaults (partial override)", async () => {
151
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
152
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({ low: 4096 });
153
+
154
+ const server = createMockServer({
155
+ models: ["model-a"].map((id) => createMockModel(id)),
156
+ });
157
+ const eventManager = new EventManager([server]);
158
+ const event = { payload: createPayload("model-a") };
159
+
160
+ const result = (await eventManager.onBeforeProviderRequest(
161
+ event as any,
162
+ )) as Record<string, unknown>;
163
+
164
+ // medium uses default since user only overrode low
165
+ expect(result.thinking_budget_tokens).toBe(
166
+ DEFAULT_THINKING_BUDGETS.medium,
167
+ );
168
+ });
169
+ });
170
+
171
+ // ─── Edge cases ─────────────────────────────────────────────────────
172
+
173
+ describe("edge cases", () => {
174
+ it("should ignore invalid keys in user budgets (they are silently dropped)", async () => {
175
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
176
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({
177
+ foo: 999,
178
+ bar: 123,
179
+ } as any);
180
+
181
+ const server = createMockServer({
182
+ models: ["model-a"].map((id) => createMockModel(id)),
183
+ });
184
+ const eventManager = new EventManager([server]);
185
+ const event = { payload: createPayload("model-a") };
186
+
187
+ const result = (await eventManager.onBeforeProviderRequest(
188
+ event as any,
189
+ )) as Record<string, unknown>;
190
+
191
+ // Should fall back to default since "medium" is not in user budgets
192
+ expect(result.thinking_budget_tokens).toBe(
193
+ DEFAULT_THINKING_BUDGETS.medium,
194
+ );
195
+ });
196
+
197
+ it("should not allow overriding 'off' — thinking stays disabled", async () => {
198
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("off");
199
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({
200
+ off: 99999,
201
+ } as any);
202
+
203
+ const server = createMockServer({
204
+ models: ["model-a"].map((id) => createMockModel(id)),
205
+ });
206
+ const eventManager = new EventManager([server]);
207
+ const event = { payload: createPayload("model-a") };
208
+
209
+ const result = (await eventManager.onBeforeProviderRequest(
210
+ event as any,
211
+ )) as Record<string, unknown>;
212
+
213
+ expect(result).toMatchObject({
214
+ chat_template_kwargs: { enable_thinking: false },
215
+ });
216
+ expect(result).not.toHaveProperty("thinking_budget_tokens");
217
+ });
218
+
219
+ it("should not allow overriding 'xhigh' — no budget is injected", async () => {
220
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("xhigh");
221
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({
222
+ xhigh: 1,
223
+ } as any);
224
+
225
+ const server = createMockServer({
226
+ models: ["model-a"].map((id) => createMockModel(id)),
227
+ });
228
+ const eventManager = new EventManager([server]);
229
+ const event = { payload: createPayload("model-a") };
230
+
231
+ const result = (await eventManager.onBeforeProviderRequest(
232
+ event as any,
233
+ )) as Record<string, unknown>;
234
+
235
+ expect(result).toEqual(createPayload("model-a"));
236
+ expect(result).not.toHaveProperty("thinking_budget_tokens");
237
+ });
238
+
239
+ it("should handle empty user budgets gracefully", async () => {
240
+ mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("high");
241
+ mockSettingsManager.getThinkingBudgets.mockReturnValue({});
242
+
243
+ const server = createMockServer({
244
+ models: ["model-a"].map((id) => createMockModel(id)),
245
+ });
246
+ const eventManager = new EventManager([server]);
247
+ const event = { payload: createPayload("model-a") };
248
+
249
+ const result = (await eventManager.onBeforeProviderRequest(
250
+ event as any,
251
+ )) as Record<string, unknown>;
252
+
253
+ expect(result.thinking_budget_tokens).toBe(DEFAULT_THINKING_BUDGETS.high);
254
+ });
255
+ });
256
+ });
package/tests/mocks.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
2
2
  import { vi } from "vitest";
3
3
  import { Mode } from "../src/enums/mode";
4
+ import { ServerStatus } from "../src/enums/serverStatus";
4
5
  import { Status } from "../src/enums/status";
5
6
  import { BaseModel } from "../src/models/baseModel";
6
7
  import { Server } from "../src/server";
@@ -24,12 +25,14 @@ export const createMockServer = (
24
25
  fetchServerProps: () => mockRpc("/props?autoload=false"),
25
26
  postRequest: (resource: "load" | "unload", model: string) =>
26
27
  mockRpc(`/models/${resource}`, { model }),
27
- isReady: async () => {
28
+ isReady: async (timeout: number) => {
28
29
  try {
29
30
  const r = await mockRpc("/health");
30
- return r.status === "ok";
31
+ return r.status === "ok"
32
+ ? ServerStatus.READY
33
+ : ServerStatus.UNREACHABLE;
31
34
  } catch {
32
- return false;
35
+ return ServerStatus.UNREACHABLE;
33
36
  }
34
37
  },
35
38
  initialize: async () => {
@@ -4,26 +4,46 @@ import {
4
4
  DEFAULT_LLAMA_SERVER_URL,
5
5
  } from "../src/constants";
6
6
 
7
- // Mock getAgentDir before importing resolver
7
+ // Hoisted mock instances survives vi.resetModules()
8
+ const mockAuthStorage = vi.hoisted(() => ({
9
+ reload: vi.fn(),
10
+ getApiKey: vi.fn(),
11
+ }));
12
+
13
+ const mockSettingsManager = vi.hoisted(() => ({
14
+ getProjectSettings: vi.fn(),
15
+ getGlobalSettings: vi.fn(),
16
+ }));
17
+
18
+ // Mock getAgentDir, AuthStorage, and SettingsManager before importing resolver
8
19
  vi.mock("@earendil-works/pi-coding-agent", () => ({
9
20
  getAgentDir: vi.fn().mockReturnValue("/fake/agent/dir"),
21
+ AuthStorage: {
22
+ create: vi.fn().mockReturnValue(mockAuthStorage),
23
+ },
24
+ SettingsManager: {
25
+ create: vi.fn().mockReturnValue(mockSettingsManager),
26
+ },
10
27
  }));
11
28
 
12
29
  vi.mock("node:fs/promises", () => ({
13
- access: vi.fn(),
14
- constants: { F_OK: 0 },
15
30
  readFile: vi.fn(),
16
31
  }));
17
32
 
18
33
  // Import mocked modules
19
34
  import { getAgentDir } from "@earendil-works/pi-coding-agent";
20
- import { access, readFile } from "node:fs/promises";
35
+ import { readFile } from "node:fs/promises";
21
36
  import { ConfigResolver } from "../src/resolver";
22
37
 
23
38
  describe("URL resolution fallback chain", () => {
24
- const mockAccess = vi.mocked(access);
25
39
  const mockReadFile = vi.mocked(readFile);
26
40
  const mockGetAgentDir = vi.mocked(getAgentDir);
41
+ const mockGetProjectSettings = vi.mocked(
42
+ mockSettingsManager.getProjectSettings,
43
+ );
44
+ const mockGetGlobalSettings = vi.mocked(
45
+ mockSettingsManager.getGlobalSettings,
46
+ );
27
47
 
28
48
  afterEach(() => {
29
49
  delete process.env.LLAMA_SERVER_URL;
@@ -33,115 +53,89 @@ describe("URL resolution fallback chain", () => {
33
53
  beforeEach(() => {
34
54
  vi.clearAllMocks();
35
55
  mockGetAgentDir.mockReturnValue("/fake/agent/dir");
36
- // Default: no files exist
37
- mockAccess.mockRejectedValue(new Error("ENOENT"));
38
- mockReadFile.mockResolvedValue("");
56
+ // Default: no settings found
57
+ mockGetProjectSettings.mockReturnValue({});
58
+ mockGetGlobalSettings.mockReturnValue({});
39
59
  });
40
60
 
41
61
  it("should return default URL when no config is found", async () => {
42
62
  const resolver = new ConfigResolver();
43
- const result = await resolver.resolveUrls("/tmp/test-project");
63
+ const result = await resolver.resolveUrls();
44
64
 
45
65
  expect(result).toEqual([DEFAULT_LLAMA_SERVER_URL]);
46
66
  });
47
67
 
48
68
  it("should prioritize project config over env variable", async () => {
49
- mockAccess.mockImplementation(async (_path: unknown) => {
50
- if (typeof _path === "string" && _path.includes("llama-server.json"))
51
- return undefined;
52
- throw new Error("ENOENT");
69
+ mockGetProjectSettings.mockReturnValue({
70
+ llamaServerUrl: "http://localhost:9999",
53
71
  });
54
- mockReadFile.mockResolvedValue(
55
- JSON.stringify({ url: "http://localhost:9999" }),
56
- );
57
-
58
72
  process.env.LLAMA_SERVER_URL = "http://env-url:8080";
59
73
 
60
74
  const resolver = new ConfigResolver();
61
- const result = await resolver.resolveUrls("/tmp/test-project");
75
+ const result = await resolver.resolveUrls();
62
76
 
63
77
  expect(result).toEqual(["http://localhost:9999"]);
64
78
  });
65
79
 
66
80
  it("should use env variable when no project config exists", async () => {
81
+ mockGetProjectSettings.mockReturnValue({});
67
82
  process.env.LLAMA_SERVER_URL = "http://env-url:8080";
68
83
 
69
84
  const resolver = new ConfigResolver();
70
- const result = await resolver.resolveUrls("/tmp/test-project");
85
+ const result = await resolver.resolveUrls();
71
86
 
72
87
  expect(result).toEqual(["http://env-url:8080"]);
73
88
  });
74
89
 
75
90
  it("should use global settings when no project config or env exists", async () => {
76
- mockAccess.mockImplementation(async (_path: unknown) => {
77
- if (typeof _path === "string" && _path.includes("settings.json"))
78
- return undefined;
79
- throw new Error("ENOENT");
91
+ mockGetProjectSettings.mockReturnValue({});
92
+ mockGetGlobalSettings.mockReturnValue({
93
+ llamaServerUrl: "http://global:8080",
80
94
  });
81
- mockReadFile.mockResolvedValue(
82
- JSON.stringify({ llamaServerUrl: "http://global:8080" }),
83
- );
84
95
 
85
96
  const resolver = new ConfigResolver();
86
- const result = await resolver.resolveUrls("/tmp/test-project");
97
+ const result = await resolver.resolveUrls();
87
98
 
88
99
  expect(result).toEqual(["http://global:8080"]);
89
100
  });
90
101
 
91
102
  it("should strip trailing slashes from resolved URL", async () => {
92
- mockAccess.mockImplementation(async (_path: unknown) => {
93
- if (typeof _path === "string" && _path.includes("llama-server.json"))
94
- return undefined;
95
- throw new Error("ENOENT");
103
+ mockGetProjectSettings.mockReturnValue({
104
+ llamaServerUrl: "http://localhost:8080/",
96
105
  });
97
- mockReadFile.mockResolvedValue(
98
- JSON.stringify({ url: "http://localhost:8080/" }),
99
- );
100
106
 
101
107
  const resolver = new ConfigResolver();
102
- const result = await resolver.resolveUrls("/tmp/test-project");
108
+ const result = await resolver.resolveUrls();
103
109
 
104
110
  expect(result).toEqual(["http://localhost:8080"]);
105
111
  });
106
112
 
107
113
  it("should cache the resolved URL on subsequent calls", async () => {
108
- mockAccess.mockImplementation(async (_path: unknown) => {
109
- if (typeof _path === "string" && _path.includes("llama-server.json"))
110
- return undefined;
111
- throw new Error("ENOENT");
114
+ mockGetProjectSettings.mockReturnValue({
115
+ llamaServerUrl: "http://first:8080",
112
116
  });
113
- mockReadFile.mockResolvedValue(
114
- JSON.stringify({ url: "http://first:8080" }),
115
- );
116
117
 
117
118
  const resolver = new ConfigResolver();
118
- const result1 = await resolver.resolveUrls("/tmp/project1");
119
- const result2 = await resolver.resolveUrls("/tmp/project2");
119
+ const result1 = await resolver.resolveUrls();
120
+ const result2 = await resolver.resolveUrls();
120
121
 
121
122
  expect(result1).toEqual(["http://first:8080"]);
122
123
  expect(result2).toEqual(["http://first:8080"]);
123
124
  });
124
125
 
125
126
  it("should handle multiple URLs separated by semicolons", async () => {
126
- mockAccess.mockImplementation(async (_path: unknown) => {
127
- if (typeof _path === "string" && _path.includes("llama-server.json"))
128
- return undefined;
129
- throw new Error("ENOENT");
127
+ mockGetProjectSettings.mockReturnValue({
128
+ llamaServerUrl: "http://first:8080;http://second:9090/",
130
129
  });
131
- mockReadFile.mockResolvedValue(
132
- JSON.stringify({ url: "http://first:8080;http://second:9090/" }),
133
- );
134
130
 
135
131
  const resolver = new ConfigResolver();
136
- const result = await resolver.resolveUrls("/tmp/test-project");
132
+ const result = await resolver.resolveUrls();
137
133
 
138
134
  expect(result).toEqual(["http://first:8080", "http://second:9090"]);
139
135
  });
140
136
  });
141
137
 
142
138
  describe("API key resolution", () => {
143
- const mockAccess = vi.mocked(access);
144
- const mockReadFile = vi.mocked(readFile);
145
139
  const mockGetAgentDir = vi.mocked(getAgentDir);
146
140
 
147
141
  afterEach(() => {
@@ -151,11 +145,13 @@ describe("API key resolution", () => {
151
145
  beforeEach(() => {
152
146
  vi.clearAllMocks();
153
147
  mockGetAgentDir.mockReturnValue("/fake/agent/dir");
154
- mockAccess.mockRejectedValue(new Error("ENOENT"));
155
- mockReadFile.mockResolvedValue("");
148
+ mockAuthStorage.reload.mockReturnValue(undefined);
149
+ mockAuthStorage.getApiKey.mockResolvedValue(undefined);
156
150
  });
157
151
 
158
152
  it("should return placeholder when auth file does not exist", async () => {
153
+ mockAuthStorage.getApiKey.mockResolvedValue(undefined);
154
+
159
155
  const resolver = new ConfigResolver();
160
156
  const result = await resolver.resolveApiKey(
161
157
  "llama-server=http://127.0.0.1:8080",
@@ -165,10 +161,7 @@ describe("API key resolution", () => {
165
161
  });
166
162
 
167
163
  it("should return placeholder when provider key is missing", async () => {
168
- mockAccess.mockResolvedValue(undefined);
169
- mockReadFile.mockResolvedValue(
170
- JSON.stringify({ "other-provider": { key: "other-key" } }),
171
- );
164
+ mockAuthStorage.getApiKey.mockResolvedValue(undefined);
172
165
 
173
166
  const resolver = new ConfigResolver();
174
167
  const result = await resolver.resolveApiKey(
@@ -179,12 +172,7 @@ describe("API key resolution", () => {
179
172
  });
180
173
 
181
174
  it("should return the provider key when present", async () => {
182
- mockAccess.mockResolvedValue(undefined);
183
- mockReadFile.mockResolvedValue(
184
- JSON.stringify({
185
- "llama-server=http://127.0.0.1:8080": { key: "test-api-key" },
186
- }),
187
- );
175
+ mockAuthStorage.getApiKey.mockResolvedValue("test-api-key");
188
176
 
189
177
  const resolver = new ConfigResolver();
190
178
  const result = await resolver.resolveApiKey(
@@ -194,23 +182,13 @@ describe("API key resolution", () => {
194
182
  expect(result).toEqual("test-api-key");
195
183
  });
196
184
 
197
- it("should cache the auth file and reuse the key", async () => {
198
- mockAccess.mockResolvedValue(undefined);
199
- mockReadFile.mockResolvedValue(
200
- JSON.stringify({
201
- "llama-server=http://127.0.0.1:8080": { key: "cached-key" },
202
- }),
203
- );
185
+ it("should call reload before each getApiKey", async () => {
186
+ mockAuthStorage.getApiKey.mockResolvedValue("cached-key");
204
187
 
205
188
  const resolver = new ConfigResolver();
206
- const result1 = await resolver.resolveApiKey(
207
- "llama-server=http://127.0.0.1:8080",
208
- );
209
- const result2 = await resolver.resolveApiKey(
210
- "llama-server=http://127.0.0.1:8080",
211
- );
189
+ await resolver.resolveApiKey("llama-server=http://127.0.0.1:8080");
190
+ await resolver.resolveApiKey("llama-server=http://127.0.0.1:8080");
212
191
 
213
- expect(result1).toBe("cached-key");
214
- expect(result2).toBe("cached-key");
192
+ expect(mockAuthStorage.reload).toHaveBeenCalledTimes(2);
215
193
  });
216
194
  });
@@ -1,4 +1,5 @@
1
1
  import { beforeEach, describe, expect, it } from "vitest";
2
+ import { ServerStatus } from "../src/enums/serverStatus";
2
3
  import { Server } from "../src/server";
3
4
  import { createMockServer, mockRpc } from "./mocks";
4
5
 
@@ -146,30 +147,30 @@ describe("Server postRequest", () => {
146
147
  });
147
148
 
148
149
  describe("Server isReady", () => {
149
- it("should return true when health status is ok", async () => {
150
+ it("should return READY when health status is ok", async () => {
150
151
  mockRpc.mockResolvedValueOnce({ status: "ok" });
151
152
 
152
153
  const server = createMockServer();
153
- const ready = await server.isReady();
154
+ const status = await server.isReady(1000);
154
155
 
155
- expect(ready).toBe(true);
156
+ expect(status).toBe(ServerStatus.READY);
156
157
  });
157
158
 
158
- it("should return false when health check fails", async () => {
159
+ it("should return UNREACHABLE when health check fails", async () => {
159
160
  mockRpc.mockRejectedValueOnce(new Error("connection refused"));
160
161
 
161
162
  const server = createMockServer();
162
- const ready = await server.isReady();
163
+ const status = await server.isReady(1000);
163
164
 
164
- expect(ready).toBe(false);
165
+ expect(status).toBe(ServerStatus.UNREACHABLE);
165
166
  });
166
167
 
167
- it("should return false when health status is not ok", async () => {
168
+ it("should return UNREACHABLE when health status is not ok", async () => {
168
169
  mockRpc.mockResolvedValueOnce({ status: "error" });
169
170
 
170
171
  const server = createMockServer();
171
- const ready = await server.isReady();
172
+ const status = await server.isReady(1000);
172
173
 
173
- expect(ready).toBe(false);
174
+ expect(status).toBe(ServerStatus.UNREACHABLE);
174
175
  });
175
176
  });
@@ -12,7 +12,14 @@ const mockPi = {
12
12
 
13
13
  beforeEach(() => {
14
14
  vi.clearAllMocks();
15
- mockRpc.mockResolvedValue({});
15
+ mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
16
+ const defaults: Record<string, unknown> = {
17
+ "/health": { status: "ok" },
18
+ "/props?autoload=false": { role: "router" },
19
+ "/v1/models": { data: [], object: "list" },
20
+ };
21
+ return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
22
+ });
16
23
  });
17
24
 
18
25
  describe("Server", () => {
@@ -44,9 +51,15 @@ describe("ServerManager", () => {
44
51
  id: "test-model",
45
52
  toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
46
53
  } as unknown as BaseModel;
47
- mockRpc.mockResolvedValue({
48
- data: [mockModel],
49
- object: "list",
54
+ mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
55
+ if (endpoint === "/v1/models") {
56
+ return Promise.resolve({ data: [mockModel], object: "list" });
57
+ }
58
+ const defaults: Record<string, unknown> = {
59
+ "/health": { status: "ok" },
60
+ "/props?autoload=false": { role: "router" },
61
+ };
62
+ return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
50
63
  });
51
64
 
52
65
  const server1 = createMockServer({
@@ -63,7 +76,7 @@ describe("ServerManager", () => {
63
76
  });
64
77
  const manager = new ServerManager([server1, server2] as any);
65
78
 
66
- await manager.registerAllProviders(mockPi as any);
79
+ await manager.initialize(mockPi as any);
67
80
 
68
81
  expect(mockPi.registerProvider).toHaveBeenCalledTimes(2);
69
82
  expect(mockPi.registerProvider).toHaveBeenCalledWith(