pi-llama-cpp 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -3
- package/package.json +2 -2
- package/src/constants.ts +18 -1
- package/src/enums/serverStatus.ts +6 -0
- package/src/index.ts +24 -7
- package/src/interfaces/levels.ts +7 -0
- package/src/managers/command.ts +1 -1
- package/src/managers/events.ts +42 -4
- package/src/managers/server.ts +79 -14
- package/src/models/baseModel.ts +7 -0
- package/src/resolver.ts +87 -58
- package/src/server.ts +25 -9
- package/tests/events.test.ts +256 -0
- package/tests/mocks.ts +6 -3
- package/tests/resolver.test.ts +59 -81
- package/tests/server.test.ts +10 -9
- package/tests/serverManager.test.ts +18 -5
package/README.md
CHANGED
|
@@ -12,6 +12,7 @@ A [Pi Coding Agent](https://pi.dev/) extension that integrates with running [lla
|
|
|
12
12
|
- **Flexible URL resolution** — configures the server URL via project config, environment variable, or global settings
|
|
13
13
|
- **Auth support** — allows to login into a llama.cpp server that was secured with an API key
|
|
14
14
|
- **Multiple server support** — connect to multiple llama.cpp servers simultaneously by separating URLs with semicolons
|
|
15
|
+
- **Thinking budget support** — configurable token budgets for model reasoning/thinking, mapped to Pi's thinking levels
|
|
15
16
|
|
|
16
17
|
### Status Indicators
|
|
17
18
|
|
|
@@ -48,11 +49,11 @@ pi install https://github.com/gsanhueza/pi-llama-cpp
|
|
|
48
49
|
|
|
49
50
|
The extension resolves the llama.cpp server URL(s) using the following priority order:
|
|
50
51
|
|
|
51
|
-
1. **Per-project config** — `.pi/
|
|
52
|
+
1. **Per-project config** — `.pi/settings.json` in your project root:
|
|
52
53
|
|
|
53
54
|
```json
|
|
54
55
|
{
|
|
55
|
-
"
|
|
56
|
+
"llamaServerUrl": "http://127.0.0.1:8080"
|
|
56
57
|
}
|
|
57
58
|
```
|
|
58
59
|
|
|
@@ -127,7 +128,7 @@ The extension determines the context size as follows:
|
|
|
127
128
|
|
|
128
129
|
- **Router mode**
|
|
129
130
|
- When loaded, reads `meta.n_ctx` from the `/models` endpoint
|
|
130
|
-
- When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments
|
|
131
|
+
- When not loaded, reads `--ctx-size` and/or `--fit-ctx` from the server arguments (which can also originate from the **presets.ini** file the llama.cpp server uses to load its models).
|
|
131
132
|
- **Single mode** — reads `meta.n_ctx` from the `/models` endpoint
|
|
132
133
|
- **Legacy mode** — reads `max_model_len` from `/models`, falling back to `n_ctx` from `/props`
|
|
133
134
|
- Falls back to `128000` if not available
|
|
@@ -140,6 +141,8 @@ The extension determines the context size as follows:
|
|
|
140
141
|
| `/models info` | Show detailed information for all available models at once. |
|
|
141
142
|
| `/models unload` | Unload all loaded models at once. |
|
|
142
143
|
|
|
144
|
+
> **Note:** When a llama.cpp server is slow to respond, it will be skipped at startup with a warning. Run `/models` to retry without timeout and see all models.
|
|
145
|
+
|
|
143
146
|
> **Note:** When a llama.cpp server is unreachable, `/models` displays an error notification with the configured server URL, but healthy servers continue to show their models.
|
|
144
147
|
|
|
145
148
|
> **Note:** The `/models unload` command only makes sense in router mode.
|
|
@@ -157,12 +160,44 @@ When browsing models via the `/models` command, you can:
|
|
|
157
160
|
|
|
158
161
|
> **Note:** In single-model and legacy-model mode, **Unload** is not available, since there is only one model on the server.
|
|
159
162
|
|
|
163
|
+
### Thinking Budgets
|
|
164
|
+
|
|
165
|
+
The extension supports configurable **thinking budgets** that control how many tokens the model allocates to its reasoning/thinking process.
|
|
166
|
+
This is tied to Pi's thinking level selector (off, minimal, low, medium, high, xhigh).
|
|
167
|
+
|
|
168
|
+
| Level | Tokens | Description |
|
|
169
|
+
| --------- | ------ | ---------------------------- |
|
|
170
|
+
| `off` | 0 | Thinking disabled |
|
|
171
|
+
| `minimal` | 1,024 | Short reasoning steps |
|
|
172
|
+
| `low` | 2,048 | Light reasoning |
|
|
173
|
+
| `medium` | 8,192 | Balanced reasoning (default) |
|
|
174
|
+
| `high` | 16,384 | Extended reasoning |
|
|
175
|
+
| `xhigh` | -1 | Unlimited reasoning |
|
|
176
|
+
|
|
177
|
+
User-defined budgets can override the defaults by adding a `thinkingBudgets` object to `~/.pi/agent/settings.json` (global) or `.pi/settings.json` (per-project):
|
|
178
|
+
|
|
179
|
+
```json
|
|
180
|
+
{
|
|
181
|
+
"thinkingBudgets": {
|
|
182
|
+
"minimal": 256,
|
|
183
|
+
"low": 1024,
|
|
184
|
+
"medium": 2048,
|
|
185
|
+
"high": 4096
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Only `minimal`, `low`, `medium`, and `high` are configurable — `off` (0) and `xhigh` (-1, unlimited) are fixed.
|
|
191
|
+
The extension automatically injects the appropriate `thinking_budget_tokens` into each request payload based on the selected level.
|
|
192
|
+
|
|
160
193
|
### Model Selection Event
|
|
161
194
|
|
|
162
195
|
When you switch models via Pi's model picker (instead of using the `/models` command), the extension listens for the `model_select` event, which also loads the requested model before the conversation begins.
|
|
163
196
|
|
|
164
197
|
This keeps the server in sync with the active model in Pi, regardless of how the switch was initiated — you don't need to manually load models before using them.
|
|
165
198
|
|
|
199
|
+
> **Note:** If you switch sessions while a model load is in-flight, you'll see a warning, but the load continues in the background. Use `/models` in the new session to verify the model status.
|
|
200
|
+
|
|
166
201
|
### Loading Models
|
|
167
202
|
|
|
168
203
|
When you trigger a load, switch, or retry action, the extension polls the server to track progress. If a model takes longer than **60 seconds** to load, the polling times out with an error.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-llama-cpp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "Pi extension for llama.cpp integration. Supports router, single and legacy models. Supports multiple servers.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"@earendil-works/pi-tui": "*"
|
|
37
37
|
},
|
|
38
38
|
"devDependencies": {
|
|
39
|
-
"@types/node": "^25.9.
|
|
39
|
+
"@types/node": "^25.9.3",
|
|
40
40
|
"prettier-plugin-organize-imports": "^4.3.0",
|
|
41
41
|
"vitest": "^4.1.8"
|
|
42
42
|
}
|
package/src/constants.ts
CHANGED
|
@@ -39,6 +39,23 @@ export const POLLING_INTERVAL = 500;
|
|
|
39
39
|
export const POLLING_TIMEOUT = 60000;
|
|
40
40
|
|
|
41
41
|
/**
|
|
42
|
-
* Reasonable time to read notifications if context goes stale
|
|
42
|
+
* Reasonable time (ms) to read notifications if context goes stale
|
|
43
43
|
*/
|
|
44
44
|
export const READABLE_TIMEOUT = 15000;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Timeout (ms) for server verification before assuming failure
|
|
48
|
+
*/
|
|
49
|
+
export const SERVER_TIMEOUT = 1000;
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Thinking budgets to send to the server, depending on user-selected level in Pi.
|
|
53
|
+
*/
|
|
54
|
+
export const DEFAULT_THINKING_BUDGETS = {
|
|
55
|
+
off: 0,
|
|
56
|
+
minimal: 1024,
|
|
57
|
+
low: 2048,
|
|
58
|
+
medium: 8192,
|
|
59
|
+
high: 16384,
|
|
60
|
+
xhigh: -1,
|
|
61
|
+
};
|
package/src/index.ts
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
import {
|
|
2
|
+
type BeforeProviderRequestEvent,
|
|
3
|
+
type ExtensionAPI,
|
|
4
|
+
type ExtensionCommandContext,
|
|
5
|
+
type ExtensionContext,
|
|
6
|
+
type SessionBeforeSwitchEvent,
|
|
7
|
+
type SessionStartEvent,
|
|
6
8
|
} from "@earendil-works/pi-coding-agent";
|
|
7
9
|
import { PROVIDER_NAME } from "./constants";
|
|
8
10
|
import { ModelSelectEvent } from "./interfaces/events";
|
|
@@ -14,7 +16,7 @@ import { Server } from "./server";
|
|
|
14
16
|
|
|
15
17
|
export default async function (pi: ExtensionAPI) {
|
|
16
18
|
const resolver = new ConfigResolver();
|
|
17
|
-
const urls = await resolver.resolveUrls(
|
|
19
|
+
const urls = await resolver.resolveUrls();
|
|
18
20
|
const servers = urls.map((url) => new Server(url));
|
|
19
21
|
|
|
20
22
|
const eventManager = new EventManager(servers);
|
|
@@ -22,7 +24,7 @@ export default async function (pi: ExtensionAPI) {
|
|
|
22
24
|
const commandManager = new CommandManager(serverManager);
|
|
23
25
|
|
|
24
26
|
// Register providers once at startup
|
|
25
|
-
await serverManager.
|
|
27
|
+
await serverManager.initialize(pi);
|
|
26
28
|
|
|
27
29
|
// Single global /models command
|
|
28
30
|
pi.registerCommand("models", {
|
|
@@ -34,6 +36,21 @@ export default async function (pi: ExtensionAPI) {
|
|
|
34
36
|
});
|
|
35
37
|
|
|
36
38
|
// Events
|
|
39
|
+
pi.on("session_start", (event: SessionStartEvent, ctx: ExtensionContext) => {
|
|
40
|
+
if (event.reason !== "startup") return;
|
|
41
|
+
for (const warning of serverManager.getWarnings())
|
|
42
|
+
ctx.ui.notify(warning, "warning");
|
|
43
|
+
|
|
44
|
+
for (const warning of resolver.getWarnings())
|
|
45
|
+
ctx.ui.notify(warning, "warning");
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
pi.on(
|
|
49
|
+
"before_provider_request",
|
|
50
|
+
async (event: BeforeProviderRequestEvent) =>
|
|
51
|
+
await eventManager.onBeforeProviderRequest(event),
|
|
52
|
+
);
|
|
53
|
+
|
|
37
54
|
pi.on(
|
|
38
55
|
"model_select",
|
|
39
56
|
async (event: ModelSelectEvent, ctx: ExtensionContext) =>
|
package/src/managers/command.ts
CHANGED
|
@@ -50,7 +50,7 @@ export class CommandManager {
|
|
|
50
50
|
pi: ExtensionAPI,
|
|
51
51
|
) {
|
|
52
52
|
// Re-register providers so Pi sees updated model states
|
|
53
|
-
await this.serverManager.
|
|
53
|
+
await this.serverManager.update(pi);
|
|
54
54
|
|
|
55
55
|
// Notify about unreachable servers
|
|
56
56
|
for (const url of this.serverManager.failedUrls) {
|
package/src/managers/events.ts
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
import
|
|
1
|
+
import {
|
|
2
|
+
type BeforeProviderRequestEvent,
|
|
3
|
+
type ExtensionContext,
|
|
4
|
+
} from "@earendil-works/pi-coding-agent";
|
|
2
5
|
import { READABLE_TIMEOUT } from "../constants";
|
|
3
6
|
import { ModelSelectEvent } from "../interfaces/events";
|
|
4
7
|
import { BaseModel } from "../models/baseModel";
|
|
8
|
+
import { ConfigResolver } from "../resolver";
|
|
5
9
|
import { Server } from "../server";
|
|
6
10
|
|
|
7
11
|
export class EventManager {
|
|
@@ -9,6 +13,13 @@ export class EventManager {
|
|
|
9
13
|
|
|
10
14
|
constructor(private readonly servers: Server[]) {}
|
|
11
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Resets the in-flight model reference.
|
|
18
|
+
*/
|
|
19
|
+
static resetInflightModel() {
|
|
20
|
+
EventManager.inflightModel = null;
|
|
21
|
+
}
|
|
22
|
+
|
|
12
23
|
/**
|
|
13
24
|
* Reacts to a new model event triggered by Pi
|
|
14
25
|
*
|
|
@@ -55,9 +66,36 @@ export class EventManager {
|
|
|
55
66
|
}
|
|
56
67
|
|
|
57
68
|
/**
|
|
58
|
-
*
|
|
69
|
+
* Intercepts the request to add extra information, useful to llama.cpp.
|
|
70
|
+
* Adds a custom thinking budget to the request payload.
|
|
71
|
+
*
|
|
72
|
+
* @param event Request event
|
|
73
|
+
* @returns Updated payload
|
|
59
74
|
*/
|
|
60
|
-
|
|
61
|
-
|
|
75
|
+
async onBeforeProviderRequest(event: BeforeProviderRequestEvent) {
|
|
76
|
+
const payload = event.payload as { model?: string };
|
|
77
|
+
const { model } = payload;
|
|
78
|
+
if (!model) return payload;
|
|
79
|
+
|
|
80
|
+
// Check if this model belongs to one of our servers
|
|
81
|
+
const isLlamaCpp = this.servers.some((s) =>
|
|
82
|
+
s.models.some((m) => m.id === model),
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
if (!isLlamaCpp) return payload;
|
|
86
|
+
|
|
87
|
+
// Retrieve pi's current thinking level, so we can setup a budget
|
|
88
|
+
const resolver = new ConfigResolver();
|
|
89
|
+
const level = resolver.resolveThinkingLevel() ?? "medium";
|
|
90
|
+
const budgets = resolver.resolveThinkingBudgets();
|
|
91
|
+
const thinking_budget_tokens = budgets[level];
|
|
92
|
+
|
|
93
|
+
// Setup payload
|
|
94
|
+
if (level === "off")
|
|
95
|
+
return { ...payload, chat_template_kwargs: { enable_thinking: false } };
|
|
96
|
+
|
|
97
|
+
if (level === "xhigh") return payload;
|
|
98
|
+
|
|
99
|
+
return { ...payload, thinking_budget_tokens };
|
|
62
100
|
}
|
|
63
101
|
}
|
package/src/managers/server.ts
CHANGED
|
@@ -1,42 +1,97 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
-
import { API_TYPE } from "../constants";
|
|
2
|
+
import { API_TYPE, PROVIDER_NAME, SERVER_TIMEOUT } from "../constants";
|
|
3
|
+
import { ServerStatus } from "../enums/serverStatus";
|
|
3
4
|
import { BaseModel } from "../models/baseModel";
|
|
4
5
|
import { Server } from "../server";
|
|
5
6
|
|
|
6
7
|
export class ServerManager {
|
|
7
8
|
readonly failedUrls: string[] = [];
|
|
9
|
+
private readonly warnings: string[] = [];
|
|
8
10
|
|
|
9
11
|
constructor(private readonly servers: Server[]) {}
|
|
10
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Verifies reachability of servers and registers the providers
|
|
15
|
+
*
|
|
16
|
+
* @param pi The Pi extension API
|
|
17
|
+
*/
|
|
18
|
+
async initialize(pi: ExtensionAPI) {
|
|
19
|
+
// Register the providers with a timeout first
|
|
20
|
+
await this.update(pi, SERVER_TIMEOUT);
|
|
21
|
+
}
|
|
22
|
+
|
|
11
23
|
/**
|
|
12
24
|
* Registers one provider per server in Pi with their model configurations.
|
|
13
|
-
* Call this after the servers have been initialized.
|
|
14
25
|
* The manual awaiting per-server is deliberate (we want them in order)
|
|
15
26
|
*
|
|
16
|
-
* @param pi The Pi extension
|
|
27
|
+
* @param pi The Pi extension API
|
|
28
|
+
* @param timeout (Optional) Timeout before assuming server has failed
|
|
17
29
|
*/
|
|
18
|
-
async
|
|
30
|
+
async update(pi: ExtensionAPI, timeout?: number) {
|
|
19
31
|
this.failedUrls.length = 0;
|
|
20
32
|
|
|
21
|
-
|
|
22
|
-
await this.
|
|
33
|
+
const registrableServers = timeout
|
|
34
|
+
? await this.findRegistrableServers(timeout)
|
|
35
|
+
: this.servers;
|
|
36
|
+
|
|
37
|
+
// Initialization and registration
|
|
38
|
+
for (const server of registrableServers) {
|
|
39
|
+
try {
|
|
40
|
+
await server.initialize();
|
|
41
|
+
await this.registerProvider(server, pi);
|
|
42
|
+
} catch {
|
|
43
|
+
this.failedUrls.push(server.baseUrl);
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
23
46
|
}
|
|
24
47
|
}
|
|
25
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Runs concurrent health checks and returns only healthy servers.
|
|
51
|
+
*
|
|
52
|
+
* @param timeout Maximum time to wait for each server
|
|
53
|
+
* @returns Array of servers that passed the health check
|
|
54
|
+
*/
|
|
55
|
+
private async findRegistrableServers(timeout: number): Promise<Server[]> {
|
|
56
|
+
const healthResults = await Promise.all(
|
|
57
|
+
this.servers.map(async (server) => {
|
|
58
|
+
const status = await server.isReady(timeout);
|
|
59
|
+
return { server, status };
|
|
60
|
+
}),
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
const response: Server[] = [];
|
|
64
|
+
for (const { server, status } of healthResults) {
|
|
65
|
+
if (status === ServerStatus.READY) {
|
|
66
|
+
response.push(server);
|
|
67
|
+
} else if (status === ServerStatus.TIMEOUT) {
|
|
68
|
+
const message = [
|
|
69
|
+
"[pi-llama-cpp]",
|
|
70
|
+
`${PROVIDER_NAME} server initialization for '${server.baseUrl}' took more than ${SERVER_TIMEOUT} ms, so it has been skipped.`,
|
|
71
|
+
"Run `/models` to retry without timeout and see all models.",
|
|
72
|
+
].join("\n");
|
|
73
|
+
this.warnings.push(message);
|
|
74
|
+
this.failedUrls.push(server.baseUrl);
|
|
75
|
+
} else {
|
|
76
|
+
const message = [
|
|
77
|
+
"[pi-llama-cpp]",
|
|
78
|
+
`${PROVIDER_NAME} server at '${server.baseUrl}' is unreachable.`,
|
|
79
|
+
"Check the URL and try again. Run `/models` to retry.",
|
|
80
|
+
].join("\n");
|
|
81
|
+
this.warnings.push(message);
|
|
82
|
+
this.failedUrls.push(server.baseUrl);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return response;
|
|
87
|
+
}
|
|
88
|
+
|
|
26
89
|
/**
|
|
27
90
|
* Creates a Pi provider for the given server
|
|
28
91
|
*
|
|
29
92
|
* @param server The server
|
|
30
93
|
*/
|
|
31
94
|
private async registerProvider(server: Server, pi: ExtensionAPI) {
|
|
32
|
-
try {
|
|
33
|
-
await server.initialize();
|
|
34
|
-
} catch {
|
|
35
|
-
this.failedUrls.push(server.baseUrl);
|
|
36
|
-
return;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
// Setup the Pi registration
|
|
40
95
|
const { baseUrl, models, providerId, providerName } = server;
|
|
41
96
|
const apiKey = await server.getApiKey();
|
|
42
97
|
const modelConfigs = await Promise.all(
|
|
@@ -52,6 +107,16 @@ export class ServerManager {
|
|
|
52
107
|
});
|
|
53
108
|
}
|
|
54
109
|
|
|
110
|
+
/**
|
|
111
|
+
* Returns warnings collected during initialization.
|
|
112
|
+
*/
|
|
113
|
+
getWarnings(): string[] {
|
|
114
|
+
const warnings = [...this.warnings];
|
|
115
|
+
this.warnings.length = 0;
|
|
116
|
+
|
|
117
|
+
return warnings;
|
|
118
|
+
}
|
|
119
|
+
|
|
55
120
|
/**
|
|
56
121
|
* Returns all models from all servers.
|
|
57
122
|
*
|
package/src/models/baseModel.ts
CHANGED
|
@@ -172,6 +172,13 @@ export abstract class BaseModel {
|
|
|
172
172
|
id: this.id,
|
|
173
173
|
name: this.name,
|
|
174
174
|
reasoning: this.reasoning,
|
|
175
|
+
thinkingLevelMap: {
|
|
176
|
+
minimal: "minimal",
|
|
177
|
+
low: "low",
|
|
178
|
+
medium: "medium",
|
|
179
|
+
high: "high",
|
|
180
|
+
xhigh: "xhigh",
|
|
181
|
+
},
|
|
175
182
|
input: await this.getCapabilities(),
|
|
176
183
|
contextWindow: await this.getContextSize(),
|
|
177
184
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
package/src/resolver.ts
CHANGED
|
@@ -1,66 +1,65 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import {
|
|
2
|
+
AuthStorage,
|
|
3
|
+
getAgentDir,
|
|
4
|
+
SettingsManager,
|
|
5
|
+
} from "@earendil-works/pi-coding-agent";
|
|
6
|
+
import { readFile } from "node:fs/promises";
|
|
3
7
|
import { join } from "node:path";
|
|
4
|
-
import {
|
|
5
|
-
|
|
8
|
+
import {
|
|
9
|
+
API_KEY_PLACEHOLDER,
|
|
10
|
+
DEFAULT_LLAMA_SERVER_URL,
|
|
11
|
+
DEFAULT_THINKING_BUDGETS,
|
|
12
|
+
} from "./constants";
|
|
13
|
+
import { ThinkingLevel } from "./interfaces/levels";
|
|
6
14
|
|
|
7
15
|
export class ConfigResolver {
|
|
16
|
+
private warnings: string[] = [];
|
|
17
|
+
|
|
8
18
|
private cachedUrls: string[] = [];
|
|
19
|
+
private authStorage = AuthStorage.create(join(getAgentDir(), "auth.json"));
|
|
20
|
+
private settingsManager = SettingsManager.create(
|
|
21
|
+
process.cwd(),
|
|
22
|
+
getAgentDir(),
|
|
23
|
+
);
|
|
9
24
|
|
|
10
25
|
/**
|
|
11
|
-
*
|
|
26
|
+
* Resolves the llama-server URL by searching in the global settings.json
|
|
12
27
|
*/
|
|
13
|
-
private async
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
return false;
|
|
19
|
-
}
|
|
28
|
+
private async resolveGlobalUrl(): Promise<string | null> {
|
|
29
|
+
const settings = this.settingsManager.getGlobalSettings();
|
|
30
|
+
const { llamaServerUrl = null } = settings as Record<string, string>;
|
|
31
|
+
|
|
32
|
+
return llamaServerUrl;
|
|
20
33
|
}
|
|
21
34
|
|
|
22
35
|
/**
|
|
23
|
-
*
|
|
36
|
+
* Resolves the llama-server URL by searching in the project's .pi/settings.json
|
|
24
37
|
*/
|
|
25
|
-
private async
|
|
26
|
-
|
|
38
|
+
private async resolveProjectUrl(): Promise<string | null> {
|
|
39
|
+
// Warn the user for deprecation
|
|
27
40
|
try {
|
|
28
|
-
|
|
41
|
+
const filePath = join(process.cwd(), ".pi", "llama-server.json");
|
|
42
|
+
const { url = null } = JSON.parse(await readFile(filePath, "utf-8"));
|
|
43
|
+
|
|
44
|
+
const messages = [
|
|
45
|
+
"[pi-llama-cpp]",
|
|
46
|
+
"The project-level `.pi/llama-server.json` file has been deprecated.",
|
|
47
|
+
"It will work for now, but you must follow these instructions as soon as possible:",
|
|
48
|
+
'- Move your url to the project-level `.pi/settings.json` file as {"llamaServerUrl": "<url>"}.',
|
|
49
|
+
"- Remove the old `.pi/llama-server.json` file.",
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
this.warnings.push(messages.join("\n"));
|
|
53
|
+
|
|
54
|
+
return url;
|
|
29
55
|
} catch {
|
|
30
|
-
|
|
56
|
+
// No old file available, continue as normal
|
|
31
57
|
}
|
|
32
|
-
}
|
|
33
58
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
*/
|
|
37
|
-
private async readConfigValue<T>(
|
|
38
|
-
filePath: string,
|
|
39
|
-
key: keyof T,
|
|
40
|
-
): Promise<T[keyof T] | null> {
|
|
41
|
-
const cfg = await this.readJson<T>(filePath);
|
|
42
|
-
return cfg?.[key] ?? null;
|
|
43
|
-
}
|
|
59
|
+
const settings = this.settingsManager.getProjectSettings();
|
|
60
|
+
const { llamaServerUrl = null } = settings as Record<string, string>;
|
|
44
61
|
|
|
45
|
-
|
|
46
|
-
* Resolves the llama-server URL by searching in the global settings.json
|
|
47
|
-
*/
|
|
48
|
-
private async resolveGlobalUrl(): Promise<string | null> {
|
|
49
|
-
const globalPath = join(getAgentDir(), "settings.json");
|
|
50
|
-
if (!(await this.fileExists(globalPath))) return null;
|
|
51
|
-
return this.readConfigValue<Record<string, string>>(
|
|
52
|
-
globalPath,
|
|
53
|
-
"llamaServerUrl",
|
|
54
|
-
);
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
/**
|
|
58
|
-
* Resolves the llama-server URL by searching in the project's .pi/llama-server.json
|
|
59
|
-
*/
|
|
60
|
-
private async resolveProjectUrl(cwd: string): Promise<string | null> {
|
|
61
|
-
const projectPath = join(cwd, ".pi", "llama-server.json");
|
|
62
|
-
if (!(await this.fileExists(projectPath))) return null;
|
|
63
|
-
return this.readConfigValue<Record<string, string>>(projectPath, "url");
|
|
62
|
+
return llamaServerUrl;
|
|
64
63
|
}
|
|
65
64
|
|
|
66
65
|
/**
|
|
@@ -73,9 +72,9 @@ export class ConfigResolver {
|
|
|
73
72
|
/**
|
|
74
73
|
* Tries all possible ways to retrieve the llama-server URL(s)
|
|
75
74
|
*/
|
|
76
|
-
private async extractJoinedUrls(
|
|
75
|
+
private async extractJoinedUrls(): Promise<string> {
|
|
77
76
|
// 1. per-project config
|
|
78
|
-
let response = await this.resolveProjectUrl(
|
|
77
|
+
let response = await this.resolveProjectUrl();
|
|
79
78
|
if (response) return response;
|
|
80
79
|
|
|
81
80
|
// 2. env
|
|
@@ -93,10 +92,10 @@ export class ConfigResolver {
|
|
|
93
92
|
/**
|
|
94
93
|
* Resolves URLs where llama-servers are running (cached)
|
|
95
94
|
*/
|
|
96
|
-
async resolveUrls(
|
|
95
|
+
async resolveUrls(): Promise<string[]> {
|
|
97
96
|
if (this.cachedUrls.length > 0) return this.cachedUrls;
|
|
98
97
|
|
|
99
|
-
const raw = await this.extractJoinedUrls(
|
|
98
|
+
const raw = await this.extractJoinedUrls();
|
|
100
99
|
const urls = raw
|
|
101
100
|
.split(";")
|
|
102
101
|
.map((u) => u.trim())
|
|
@@ -108,16 +107,46 @@ export class ConfigResolver {
|
|
|
108
107
|
}
|
|
109
108
|
|
|
110
109
|
/**
|
|
111
|
-
* Resolves API key for the provider ID using Pi's
|
|
112
|
-
* Deliberately not cached, to react to changes in the file
|
|
110
|
+
* Resolves API key for the provider ID using Pi's AuthStorage
|
|
113
111
|
*/
|
|
114
112
|
async resolveApiKey(providerId: string): Promise<string> {
|
|
115
|
-
|
|
116
|
-
|
|
113
|
+
this.authStorage.reload();
|
|
114
|
+
const apiKey = await this.authStorage.getApiKey(providerId);
|
|
115
|
+
|
|
116
|
+
return apiKey ?? API_KEY_PLACEHOLDER;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Returns warnings collected during URL resolution.
|
|
121
|
+
*/
|
|
122
|
+
getWarnings(): string[] {
|
|
123
|
+
const warnings = [...this.warnings];
|
|
124
|
+
this.warnings.length = 0;
|
|
125
|
+
|
|
126
|
+
return warnings;
|
|
127
|
+
}
|
|
117
128
|
|
|
118
|
-
|
|
119
|
-
|
|
129
|
+
/*
|
|
130
|
+
* Resolves the current thinking level from Pi.
|
|
131
|
+
*
|
|
132
|
+
* @returns Selected level
|
|
133
|
+
*/
|
|
134
|
+
resolveThinkingLevel(): ThinkingLevel | undefined {
|
|
135
|
+
return this.settingsManager.getDefaultThinkingLevel();
|
|
136
|
+
}
|
|
120
137
|
|
|
121
|
-
|
|
138
|
+
/**
|
|
139
|
+
* Resolves the effective thinking budgets from settings
|
|
140
|
+
*
|
|
141
|
+
* @returns Thinking budgets
|
|
142
|
+
*/
|
|
143
|
+
resolveThinkingBudgets(): Record<ThinkingLevel, number> {
|
|
144
|
+
const settingsBudgets = this.settingsManager.getThinkingBudgets() ?? {};
|
|
145
|
+
const availableBudgets = {
|
|
146
|
+
...DEFAULT_THINKING_BUDGETS,
|
|
147
|
+
...settingsBudgets,
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
return availableBudgets;
|
|
122
151
|
}
|
|
123
152
|
}
|
package/src/server.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { PROVIDER_NAME, PROVIDER_PREFIX } from "./constants";
|
|
2
2
|
import { Mode } from "./enums/mode";
|
|
3
|
+
import { ServerStatus } from "./enums/serverStatus";
|
|
3
4
|
import { HealthEndpoint } from "./interfaces/endpoints/health";
|
|
4
5
|
import { ModelsEndpoint } from "./interfaces/endpoints/models";
|
|
5
6
|
import { PropsEndpoint } from "./interfaces/endpoints/props";
|
|
@@ -10,7 +11,8 @@ import { SingleModel } from "./models/singleModel";
|
|
|
10
11
|
import { ConfigResolver } from "./resolver";
|
|
11
12
|
|
|
12
13
|
export class Server {
|
|
13
|
-
readonly models: BaseModel[] = [];
|
|
14
|
+
public readonly models: BaseModel[] = [];
|
|
15
|
+
private configResolver = new ConfigResolver();
|
|
14
16
|
|
|
15
17
|
constructor(readonly baseUrl: string) {}
|
|
16
18
|
|
|
@@ -33,7 +35,7 @@ export class Server {
|
|
|
33
35
|
* @returns The API key
|
|
34
36
|
*/
|
|
35
37
|
async getApiKey(): Promise<string> {
|
|
36
|
-
return await
|
|
38
|
+
return await this.configResolver.resolveApiKey(this.providerId);
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
/**
|
|
@@ -73,15 +75,29 @@ export class Server {
|
|
|
73
75
|
}
|
|
74
76
|
|
|
75
77
|
/**
|
|
76
|
-
*
|
|
77
|
-
*
|
|
78
|
+
* Checks if the server is ready, with a timeout.
|
|
79
|
+
*
|
|
80
|
+
* @param timeout Maximum time to wait for the health check
|
|
81
|
+
* @returns The server status
|
|
78
82
|
*/
|
|
79
|
-
async isReady(): Promise<
|
|
83
|
+
async isReady(timeout: number): Promise<ServerStatus> {
|
|
80
84
|
try {
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
+
const timeoutPromise = new Promise<never>((_, reject) =>
|
|
86
|
+
setTimeout(() => reject(new Error("timeout")), timeout),
|
|
87
|
+
);
|
|
88
|
+
const health = await Promise.race([
|
|
89
|
+
this.fetchServerHealth(),
|
|
90
|
+
timeoutPromise,
|
|
91
|
+
]);
|
|
92
|
+
if (health.status === "ok") {
|
|
93
|
+
return ServerStatus.READY;
|
|
94
|
+
}
|
|
95
|
+
return ServerStatus.UNREACHABLE;
|
|
96
|
+
} catch (error) {
|
|
97
|
+
if (error instanceof Error && error.message === "timeout") {
|
|
98
|
+
return ServerStatus.TIMEOUT;
|
|
99
|
+
}
|
|
100
|
+
return ServerStatus.UNREACHABLE;
|
|
85
101
|
}
|
|
86
102
|
}
|
|
87
103
|
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { DEFAULT_THINKING_BUDGETS } from "../src/constants";
|
|
3
|
+
import { createMockModel, createMockServer } from "./mocks";
|
|
4
|
+
|
|
5
|
+
// Create a mutable mock object shared across tests
|
|
6
|
+
const mockSettingsManager = {
|
|
7
|
+
getDefaultThinkingLevel: vi.fn(() => "medium"),
|
|
8
|
+
getThinkingBudgets: vi.fn<() => Record<string, number> | undefined>(),
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
vi.mock("@earendil-works/pi-coding-agent", async (importOriginal) => {
|
|
12
|
+
const actual =
|
|
13
|
+
await importOriginal<typeof import("@earendil-works/pi-coding-agent")>();
|
|
14
|
+
return {
|
|
15
|
+
...actual,
|
|
16
|
+
SettingsManager: {
|
|
17
|
+
create: () => mockSettingsManager,
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
let EventManager: typeof import("../src/managers/events").EventManager;
|
|
23
|
+
|
|
24
|
+
beforeAll(async () => {
|
|
25
|
+
const mod = await vi.importActual("../src/managers/events");
|
|
26
|
+
EventManager =
|
|
27
|
+
mod.EventManager as typeof import("../src/managers/events").EventManager;
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
vi.restoreAllMocks();
|
|
32
|
+
EventManager.resetInflightModel();
|
|
33
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
|
|
34
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue(undefined);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const createPayload = (modelId: string) => ({
|
|
38
|
+
model: modelId,
|
|
39
|
+
messages: [{ role: "user", content: "hello" }],
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
const createNonLlamaPayload = () => ({
|
|
43
|
+
model: "gpt-4",
|
|
44
|
+
messages: [{ role: "user", content: "hello" }],
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe("EventManager.onBeforeProviderRequest", () => {
|
|
48
|
+
describe("normal usage — each thinking level", () => {
|
|
49
|
+
it.each([
|
|
50
|
+
{
|
|
51
|
+
level: "off",
|
|
52
|
+
expected: { chat_template_kwargs: { enable_thinking: false } },
|
|
53
|
+
},
|
|
54
|
+
{ level: "minimal", expected: { thinking_budget_tokens: 1024 } },
|
|
55
|
+
{ level: "low", expected: { thinking_budget_tokens: 2048 } },
|
|
56
|
+
{ level: "medium", expected: { thinking_budget_tokens: 8192 } },
|
|
57
|
+
{ level: "high", expected: { thinking_budget_tokens: 16384 } },
|
|
58
|
+
{ level: "xhigh", expected: {} },
|
|
59
|
+
])(
|
|
60
|
+
'level "$level" should return $expected',
|
|
61
|
+
async ({ level, expected }) => {
|
|
62
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue(level);
|
|
63
|
+
|
|
64
|
+
const server = createMockServer({
|
|
65
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
66
|
+
});
|
|
67
|
+
const eventManager = new EventManager([server]);
|
|
68
|
+
const event = { payload: createPayload("model-a") };
|
|
69
|
+
|
|
70
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
71
|
+
event as any,
|
|
72
|
+
)) as Record<string, unknown>;
|
|
73
|
+
|
|
74
|
+
expect(result.model).toBe("model-a");
|
|
75
|
+
expect(result).toMatchObject(expected);
|
|
76
|
+
},
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
it("should preserve original payload fields alongside new ones", async () => {
|
|
80
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("low");
|
|
81
|
+
|
|
82
|
+
const server = createMockServer({
|
|
83
|
+
models: ["model-b"].map((id) => createMockModel(id)),
|
|
84
|
+
});
|
|
85
|
+
const eventManager = new EventManager([server]);
|
|
86
|
+
const event = {
|
|
87
|
+
payload: {
|
|
88
|
+
model: "model-b",
|
|
89
|
+
messages: [{ role: "user", content: "test" }],
|
|
90
|
+
temperature: 0.7,
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
95
|
+
event as any,
|
|
96
|
+
)) as Record<string, unknown>;
|
|
97
|
+
|
|
98
|
+
expect(result.messages).toEqual([{ role: "user", content: "test" }]);
|
|
99
|
+
expect(result.temperature).toBe(0.7);
|
|
100
|
+
expect(result.thinking_budget_tokens).toBe(DEFAULT_THINKING_BUDGETS.low);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("non-llama.cpp models", () => {
|
|
105
|
+
it("should return the payload unchanged for unknown models", async () => {
|
|
106
|
+
const server = createMockServer({
|
|
107
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
108
|
+
});
|
|
109
|
+
const eventManager = new EventManager([server]);
|
|
110
|
+
const event = { payload: createNonLlamaPayload() };
|
|
111
|
+
|
|
112
|
+
const result = await eventManager.onBeforeProviderRequest(event as any);
|
|
113
|
+
|
|
114
|
+
expect(result).toEqual(createNonLlamaPayload());
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
describe("missing model in payload", () => {
|
|
119
|
+
it("should return the payload unchanged when model is absent", async () => {
|
|
120
|
+
const server = createMockServer({
|
|
121
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
122
|
+
});
|
|
123
|
+
const eventManager = new EventManager([server]);
|
|
124
|
+
const event = { payload: { messages: [] } };
|
|
125
|
+
|
|
126
|
+
const result = await eventManager.onBeforeProviderRequest(event as any);
|
|
127
|
+
|
|
128
|
+
expect(result).toEqual({ messages: [] });
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
describe("user-defined budget overrides", () => {
|
|
133
|
+
it("should use user-defined budgets instead of defaults", async () => {
|
|
134
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("low");
|
|
135
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({ low: 4096 });
|
|
136
|
+
|
|
137
|
+
const server = createMockServer({
|
|
138
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
139
|
+
});
|
|
140
|
+
const eventManager = new EventManager([server]);
|
|
141
|
+
const event = { payload: createPayload("model-a") };
|
|
142
|
+
|
|
143
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
144
|
+
event as any,
|
|
145
|
+
)) as Record<string, unknown>;
|
|
146
|
+
|
|
147
|
+
expect(result.thinking_budget_tokens).toBe(4096);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it("should merge user budgets with defaults (partial override)", async () => {
|
|
151
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
|
|
152
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({ low: 4096 });
|
|
153
|
+
|
|
154
|
+
const server = createMockServer({
|
|
155
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
156
|
+
});
|
|
157
|
+
const eventManager = new EventManager([server]);
|
|
158
|
+
const event = { payload: createPayload("model-a") };
|
|
159
|
+
|
|
160
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
161
|
+
event as any,
|
|
162
|
+
)) as Record<string, unknown>;
|
|
163
|
+
|
|
164
|
+
// medium uses default since user only overrode low
|
|
165
|
+
expect(result.thinking_budget_tokens).toBe(
|
|
166
|
+
DEFAULT_THINKING_BUDGETS.medium,
|
|
167
|
+
);
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// ─── Edge cases ─────────────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
describe("edge cases", () => {
|
|
174
|
+
it("should ignore invalid keys in user budgets (they are silently dropped)", async () => {
|
|
175
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("medium");
|
|
176
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({
|
|
177
|
+
foo: 999,
|
|
178
|
+
bar: 123,
|
|
179
|
+
} as any);
|
|
180
|
+
|
|
181
|
+
const server = createMockServer({
|
|
182
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
183
|
+
});
|
|
184
|
+
const eventManager = new EventManager([server]);
|
|
185
|
+
const event = { payload: createPayload("model-a") };
|
|
186
|
+
|
|
187
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
188
|
+
event as any,
|
|
189
|
+
)) as Record<string, unknown>;
|
|
190
|
+
|
|
191
|
+
// Should fall back to default since "medium" is not in user budgets
|
|
192
|
+
expect(result.thinking_budget_tokens).toBe(
|
|
193
|
+
DEFAULT_THINKING_BUDGETS.medium,
|
|
194
|
+
);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
it("should not allow overriding 'off' — thinking stays disabled", async () => {
|
|
198
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("off");
|
|
199
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({
|
|
200
|
+
off: 99999,
|
|
201
|
+
} as any);
|
|
202
|
+
|
|
203
|
+
const server = createMockServer({
|
|
204
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
205
|
+
});
|
|
206
|
+
const eventManager = new EventManager([server]);
|
|
207
|
+
const event = { payload: createPayload("model-a") };
|
|
208
|
+
|
|
209
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
210
|
+
event as any,
|
|
211
|
+
)) as Record<string, unknown>;
|
|
212
|
+
|
|
213
|
+
expect(result).toMatchObject({
|
|
214
|
+
chat_template_kwargs: { enable_thinking: false },
|
|
215
|
+
});
|
|
216
|
+
expect(result).not.toHaveProperty("thinking_budget_tokens");
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
it("should not allow overriding 'xhigh' — no budget is injected", async () => {
|
|
220
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("xhigh");
|
|
221
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({
|
|
222
|
+
xhigh: 1,
|
|
223
|
+
} as any);
|
|
224
|
+
|
|
225
|
+
const server = createMockServer({
|
|
226
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
227
|
+
});
|
|
228
|
+
const eventManager = new EventManager([server]);
|
|
229
|
+
const event = { payload: createPayload("model-a") };
|
|
230
|
+
|
|
231
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
232
|
+
event as any,
|
|
233
|
+
)) as Record<string, unknown>;
|
|
234
|
+
|
|
235
|
+
expect(result).toEqual(createPayload("model-a"));
|
|
236
|
+
expect(result).not.toHaveProperty("thinking_budget_tokens");
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
it("should handle empty user budgets gracefully", async () => {
|
|
240
|
+
mockSettingsManager.getDefaultThinkingLevel.mockReturnValue("high");
|
|
241
|
+
mockSettingsManager.getThinkingBudgets.mockReturnValue({});
|
|
242
|
+
|
|
243
|
+
const server = createMockServer({
|
|
244
|
+
models: ["model-a"].map((id) => createMockModel(id)),
|
|
245
|
+
});
|
|
246
|
+
const eventManager = new EventManager([server]);
|
|
247
|
+
const event = { payload: createPayload("model-a") };
|
|
248
|
+
|
|
249
|
+
const result = (await eventManager.onBeforeProviderRequest(
|
|
250
|
+
event as any,
|
|
251
|
+
)) as Record<string, unknown>;
|
|
252
|
+
|
|
253
|
+
expect(result.thinking_budget_tokens).toBe(DEFAULT_THINKING_BUDGETS.high);
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
});
|
package/tests/mocks.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import { vi } from "vitest";
|
|
3
3
|
import { Mode } from "../src/enums/mode";
|
|
4
|
+
import { ServerStatus } from "../src/enums/serverStatus";
|
|
4
5
|
import { Status } from "../src/enums/status";
|
|
5
6
|
import { BaseModel } from "../src/models/baseModel";
|
|
6
7
|
import { Server } from "../src/server";
|
|
@@ -24,12 +25,14 @@ export const createMockServer = (
|
|
|
24
25
|
fetchServerProps: () => mockRpc("/props?autoload=false"),
|
|
25
26
|
postRequest: (resource: "load" | "unload", model: string) =>
|
|
26
27
|
mockRpc(`/models/${resource}`, { model }),
|
|
27
|
-
isReady: async () => {
|
|
28
|
+
isReady: async (timeout: number) => {
|
|
28
29
|
try {
|
|
29
30
|
const r = await mockRpc("/health");
|
|
30
|
-
return r.status === "ok"
|
|
31
|
+
return r.status === "ok"
|
|
32
|
+
? ServerStatus.READY
|
|
33
|
+
: ServerStatus.UNREACHABLE;
|
|
31
34
|
} catch {
|
|
32
|
-
return
|
|
35
|
+
return ServerStatus.UNREACHABLE;
|
|
33
36
|
}
|
|
34
37
|
},
|
|
35
38
|
initialize: async () => {
|
package/tests/resolver.test.ts
CHANGED
|
@@ -4,26 +4,46 @@ import {
|
|
|
4
4
|
DEFAULT_LLAMA_SERVER_URL,
|
|
5
5
|
} from "../src/constants";
|
|
6
6
|
|
|
7
|
-
//
|
|
7
|
+
// Hoisted mock instances — survives vi.resetModules()
|
|
8
|
+
const mockAuthStorage = vi.hoisted(() => ({
|
|
9
|
+
reload: vi.fn(),
|
|
10
|
+
getApiKey: vi.fn(),
|
|
11
|
+
}));
|
|
12
|
+
|
|
13
|
+
const mockSettingsManager = vi.hoisted(() => ({
|
|
14
|
+
getProjectSettings: vi.fn(),
|
|
15
|
+
getGlobalSettings: vi.fn(),
|
|
16
|
+
}));
|
|
17
|
+
|
|
18
|
+
// Mock getAgentDir, AuthStorage, and SettingsManager before importing resolver
|
|
8
19
|
vi.mock("@earendil-works/pi-coding-agent", () => ({
|
|
9
20
|
getAgentDir: vi.fn().mockReturnValue("/fake/agent/dir"),
|
|
21
|
+
AuthStorage: {
|
|
22
|
+
create: vi.fn().mockReturnValue(mockAuthStorage),
|
|
23
|
+
},
|
|
24
|
+
SettingsManager: {
|
|
25
|
+
create: vi.fn().mockReturnValue(mockSettingsManager),
|
|
26
|
+
},
|
|
10
27
|
}));
|
|
11
28
|
|
|
12
29
|
vi.mock("node:fs/promises", () => ({
|
|
13
|
-
access: vi.fn(),
|
|
14
|
-
constants: { F_OK: 0 },
|
|
15
30
|
readFile: vi.fn(),
|
|
16
31
|
}));
|
|
17
32
|
|
|
18
33
|
// Import mocked modules
|
|
19
34
|
import { getAgentDir } from "@earendil-works/pi-coding-agent";
|
|
20
|
-
import {
|
|
35
|
+
import { readFile } from "node:fs/promises";
|
|
21
36
|
import { ConfigResolver } from "../src/resolver";
|
|
22
37
|
|
|
23
38
|
describe("URL resolution fallback chain", () => {
|
|
24
|
-
const mockAccess = vi.mocked(access);
|
|
25
39
|
const mockReadFile = vi.mocked(readFile);
|
|
26
40
|
const mockGetAgentDir = vi.mocked(getAgentDir);
|
|
41
|
+
const mockGetProjectSettings = vi.mocked(
|
|
42
|
+
mockSettingsManager.getProjectSettings,
|
|
43
|
+
);
|
|
44
|
+
const mockGetGlobalSettings = vi.mocked(
|
|
45
|
+
mockSettingsManager.getGlobalSettings,
|
|
46
|
+
);
|
|
27
47
|
|
|
28
48
|
afterEach(() => {
|
|
29
49
|
delete process.env.LLAMA_SERVER_URL;
|
|
@@ -33,115 +53,89 @@ describe("URL resolution fallback chain", () => {
|
|
|
33
53
|
beforeEach(() => {
|
|
34
54
|
vi.clearAllMocks();
|
|
35
55
|
mockGetAgentDir.mockReturnValue("/fake/agent/dir");
|
|
36
|
-
// Default: no
|
|
37
|
-
|
|
38
|
-
|
|
56
|
+
// Default: no settings found
|
|
57
|
+
mockGetProjectSettings.mockReturnValue({});
|
|
58
|
+
mockGetGlobalSettings.mockReturnValue({});
|
|
39
59
|
});
|
|
40
60
|
|
|
41
61
|
it("should return default URL when no config is found", async () => {
|
|
42
62
|
const resolver = new ConfigResolver();
|
|
43
|
-
const result = await resolver.resolveUrls(
|
|
63
|
+
const result = await resolver.resolveUrls();
|
|
44
64
|
|
|
45
65
|
expect(result).toEqual([DEFAULT_LLAMA_SERVER_URL]);
|
|
46
66
|
});
|
|
47
67
|
|
|
48
68
|
it("should prioritize project config over env variable", async () => {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
return undefined;
|
|
52
|
-
throw new Error("ENOENT");
|
|
69
|
+
mockGetProjectSettings.mockReturnValue({
|
|
70
|
+
llamaServerUrl: "http://localhost:9999",
|
|
53
71
|
});
|
|
54
|
-
mockReadFile.mockResolvedValue(
|
|
55
|
-
JSON.stringify({ url: "http://localhost:9999" }),
|
|
56
|
-
);
|
|
57
|
-
|
|
58
72
|
process.env.LLAMA_SERVER_URL = "http://env-url:8080";
|
|
59
73
|
|
|
60
74
|
const resolver = new ConfigResolver();
|
|
61
|
-
const result = await resolver.resolveUrls(
|
|
75
|
+
const result = await resolver.resolveUrls();
|
|
62
76
|
|
|
63
77
|
expect(result).toEqual(["http://localhost:9999"]);
|
|
64
78
|
});
|
|
65
79
|
|
|
66
80
|
it("should use env variable when no project config exists", async () => {
|
|
81
|
+
mockGetProjectSettings.mockReturnValue({});
|
|
67
82
|
process.env.LLAMA_SERVER_URL = "http://env-url:8080";
|
|
68
83
|
|
|
69
84
|
const resolver = new ConfigResolver();
|
|
70
|
-
const result = await resolver.resolveUrls(
|
|
85
|
+
const result = await resolver.resolveUrls();
|
|
71
86
|
|
|
72
87
|
expect(result).toEqual(["http://env-url:8080"]);
|
|
73
88
|
});
|
|
74
89
|
|
|
75
90
|
it("should use global settings when no project config or env exists", async () => {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
throw new Error("ENOENT");
|
|
91
|
+
mockGetProjectSettings.mockReturnValue({});
|
|
92
|
+
mockGetGlobalSettings.mockReturnValue({
|
|
93
|
+
llamaServerUrl: "http://global:8080",
|
|
80
94
|
});
|
|
81
|
-
mockReadFile.mockResolvedValue(
|
|
82
|
-
JSON.stringify({ llamaServerUrl: "http://global:8080" }),
|
|
83
|
-
);
|
|
84
95
|
|
|
85
96
|
const resolver = new ConfigResolver();
|
|
86
|
-
const result = await resolver.resolveUrls(
|
|
97
|
+
const result = await resolver.resolveUrls();
|
|
87
98
|
|
|
88
99
|
expect(result).toEqual(["http://global:8080"]);
|
|
89
100
|
});
|
|
90
101
|
|
|
91
102
|
it("should strip trailing slashes from resolved URL", async () => {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
return undefined;
|
|
95
|
-
throw new Error("ENOENT");
|
|
103
|
+
mockGetProjectSettings.mockReturnValue({
|
|
104
|
+
llamaServerUrl: "http://localhost:8080/",
|
|
96
105
|
});
|
|
97
|
-
mockReadFile.mockResolvedValue(
|
|
98
|
-
JSON.stringify({ url: "http://localhost:8080/" }),
|
|
99
|
-
);
|
|
100
106
|
|
|
101
107
|
const resolver = new ConfigResolver();
|
|
102
|
-
const result = await resolver.resolveUrls(
|
|
108
|
+
const result = await resolver.resolveUrls();
|
|
103
109
|
|
|
104
110
|
expect(result).toEqual(["http://localhost:8080"]);
|
|
105
111
|
});
|
|
106
112
|
|
|
107
113
|
it("should cache the resolved URL on subsequent calls", async () => {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return undefined;
|
|
111
|
-
throw new Error("ENOENT");
|
|
114
|
+
mockGetProjectSettings.mockReturnValue({
|
|
115
|
+
llamaServerUrl: "http://first:8080",
|
|
112
116
|
});
|
|
113
|
-
mockReadFile.mockResolvedValue(
|
|
114
|
-
JSON.stringify({ url: "http://first:8080" }),
|
|
115
|
-
);
|
|
116
117
|
|
|
117
118
|
const resolver = new ConfigResolver();
|
|
118
|
-
const result1 = await resolver.resolveUrls(
|
|
119
|
-
const result2 = await resolver.resolveUrls(
|
|
119
|
+
const result1 = await resolver.resolveUrls();
|
|
120
|
+
const result2 = await resolver.resolveUrls();
|
|
120
121
|
|
|
121
122
|
expect(result1).toEqual(["http://first:8080"]);
|
|
122
123
|
expect(result2).toEqual(["http://first:8080"]);
|
|
123
124
|
});
|
|
124
125
|
|
|
125
126
|
it("should handle multiple URLs separated by semicolons", async () => {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
return undefined;
|
|
129
|
-
throw new Error("ENOENT");
|
|
127
|
+
mockGetProjectSettings.mockReturnValue({
|
|
128
|
+
llamaServerUrl: "http://first:8080;http://second:9090/",
|
|
130
129
|
});
|
|
131
|
-
mockReadFile.mockResolvedValue(
|
|
132
|
-
JSON.stringify({ url: "http://first:8080;http://second:9090/" }),
|
|
133
|
-
);
|
|
134
130
|
|
|
135
131
|
const resolver = new ConfigResolver();
|
|
136
|
-
const result = await resolver.resolveUrls(
|
|
132
|
+
const result = await resolver.resolveUrls();
|
|
137
133
|
|
|
138
134
|
expect(result).toEqual(["http://first:8080", "http://second:9090"]);
|
|
139
135
|
});
|
|
140
136
|
});
|
|
141
137
|
|
|
142
138
|
describe("API key resolution", () => {
|
|
143
|
-
const mockAccess = vi.mocked(access);
|
|
144
|
-
const mockReadFile = vi.mocked(readFile);
|
|
145
139
|
const mockGetAgentDir = vi.mocked(getAgentDir);
|
|
146
140
|
|
|
147
141
|
afterEach(() => {
|
|
@@ -151,11 +145,13 @@ describe("API key resolution", () => {
|
|
|
151
145
|
beforeEach(() => {
|
|
152
146
|
vi.clearAllMocks();
|
|
153
147
|
mockGetAgentDir.mockReturnValue("/fake/agent/dir");
|
|
154
|
-
|
|
155
|
-
|
|
148
|
+
mockAuthStorage.reload.mockReturnValue(undefined);
|
|
149
|
+
mockAuthStorage.getApiKey.mockResolvedValue(undefined);
|
|
156
150
|
});
|
|
157
151
|
|
|
158
152
|
it("should return placeholder when auth file does not exist", async () => {
|
|
153
|
+
mockAuthStorage.getApiKey.mockResolvedValue(undefined);
|
|
154
|
+
|
|
159
155
|
const resolver = new ConfigResolver();
|
|
160
156
|
const result = await resolver.resolveApiKey(
|
|
161
157
|
"llama-server=http://127.0.0.1:8080",
|
|
@@ -165,10 +161,7 @@ describe("API key resolution", () => {
|
|
|
165
161
|
});
|
|
166
162
|
|
|
167
163
|
it("should return placeholder when provider key is missing", async () => {
|
|
168
|
-
|
|
169
|
-
mockReadFile.mockResolvedValue(
|
|
170
|
-
JSON.stringify({ "other-provider": { key: "other-key" } }),
|
|
171
|
-
);
|
|
164
|
+
mockAuthStorage.getApiKey.mockResolvedValue(undefined);
|
|
172
165
|
|
|
173
166
|
const resolver = new ConfigResolver();
|
|
174
167
|
const result = await resolver.resolveApiKey(
|
|
@@ -179,12 +172,7 @@ describe("API key resolution", () => {
|
|
|
179
172
|
});
|
|
180
173
|
|
|
181
174
|
it("should return the provider key when present", async () => {
|
|
182
|
-
|
|
183
|
-
mockReadFile.mockResolvedValue(
|
|
184
|
-
JSON.stringify({
|
|
185
|
-
"llama-server=http://127.0.0.1:8080": { key: "test-api-key" },
|
|
186
|
-
}),
|
|
187
|
-
);
|
|
175
|
+
mockAuthStorage.getApiKey.mockResolvedValue("test-api-key");
|
|
188
176
|
|
|
189
177
|
const resolver = new ConfigResolver();
|
|
190
178
|
const result = await resolver.resolveApiKey(
|
|
@@ -194,23 +182,13 @@ describe("API key resolution", () => {
|
|
|
194
182
|
expect(result).toEqual("test-api-key");
|
|
195
183
|
});
|
|
196
184
|
|
|
197
|
-
it("should
|
|
198
|
-
|
|
199
|
-
mockReadFile.mockResolvedValue(
|
|
200
|
-
JSON.stringify({
|
|
201
|
-
"llama-server=http://127.0.0.1:8080": { key: "cached-key" },
|
|
202
|
-
}),
|
|
203
|
-
);
|
|
185
|
+
it("should call reload before each getApiKey", async () => {
|
|
186
|
+
mockAuthStorage.getApiKey.mockResolvedValue("cached-key");
|
|
204
187
|
|
|
205
188
|
const resolver = new ConfigResolver();
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
);
|
|
209
|
-
const result2 = await resolver.resolveApiKey(
|
|
210
|
-
"llama-server=http://127.0.0.1:8080",
|
|
211
|
-
);
|
|
189
|
+
await resolver.resolveApiKey("llama-server=http://127.0.0.1:8080");
|
|
190
|
+
await resolver.resolveApiKey("llama-server=http://127.0.0.1:8080");
|
|
212
191
|
|
|
213
|
-
expect(
|
|
214
|
-
expect(result2).toBe("cached-key");
|
|
192
|
+
expect(mockAuthStorage.reload).toHaveBeenCalledTimes(2);
|
|
215
193
|
});
|
|
216
194
|
});
|
package/tests/server.test.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { ServerStatus } from "../src/enums/serverStatus";
|
|
2
3
|
import { Server } from "../src/server";
|
|
3
4
|
import { createMockServer, mockRpc } from "./mocks";
|
|
4
5
|
|
|
@@ -146,30 +147,30 @@ describe("Server postRequest", () => {
|
|
|
146
147
|
});
|
|
147
148
|
|
|
148
149
|
describe("Server isReady", () => {
|
|
149
|
-
it("should return
|
|
150
|
+
it("should return READY when health status is ok", async () => {
|
|
150
151
|
mockRpc.mockResolvedValueOnce({ status: "ok" });
|
|
151
152
|
|
|
152
153
|
const server = createMockServer();
|
|
153
|
-
const
|
|
154
|
+
const status = await server.isReady(1000);
|
|
154
155
|
|
|
155
|
-
expect(
|
|
156
|
+
expect(status).toBe(ServerStatus.READY);
|
|
156
157
|
});
|
|
157
158
|
|
|
158
|
-
it("should return
|
|
159
|
+
it("should return UNREACHABLE when health check fails", async () => {
|
|
159
160
|
mockRpc.mockRejectedValueOnce(new Error("connection refused"));
|
|
160
161
|
|
|
161
162
|
const server = createMockServer();
|
|
162
|
-
const
|
|
163
|
+
const status = await server.isReady(1000);
|
|
163
164
|
|
|
164
|
-
expect(
|
|
165
|
+
expect(status).toBe(ServerStatus.UNREACHABLE);
|
|
165
166
|
});
|
|
166
167
|
|
|
167
|
-
it("should return
|
|
168
|
+
it("should return UNREACHABLE when health status is not ok", async () => {
|
|
168
169
|
mockRpc.mockResolvedValueOnce({ status: "error" });
|
|
169
170
|
|
|
170
171
|
const server = createMockServer();
|
|
171
|
-
const
|
|
172
|
+
const status = await server.isReady(1000);
|
|
172
173
|
|
|
173
|
-
expect(
|
|
174
|
+
expect(status).toBe(ServerStatus.UNREACHABLE);
|
|
174
175
|
});
|
|
175
176
|
});
|
|
@@ -12,7 +12,14 @@ const mockPi = {
|
|
|
12
12
|
|
|
13
13
|
beforeEach(() => {
|
|
14
14
|
vi.clearAllMocks();
|
|
15
|
-
mockRpc.
|
|
15
|
+
mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
|
|
16
|
+
const defaults: Record<string, unknown> = {
|
|
17
|
+
"/health": { status: "ok" },
|
|
18
|
+
"/props?autoload=false": { role: "router" },
|
|
19
|
+
"/v1/models": { data: [], object: "list" },
|
|
20
|
+
};
|
|
21
|
+
return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
|
|
22
|
+
});
|
|
16
23
|
});
|
|
17
24
|
|
|
18
25
|
describe("Server", () => {
|
|
@@ -44,9 +51,15 @@ describe("ServerManager", () => {
|
|
|
44
51
|
id: "test-model",
|
|
45
52
|
toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
|
|
46
53
|
} as unknown as BaseModel;
|
|
47
|
-
mockRpc.
|
|
48
|
-
|
|
49
|
-
|
|
54
|
+
mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
|
|
55
|
+
if (endpoint === "/v1/models") {
|
|
56
|
+
return Promise.resolve({ data: [mockModel], object: "list" });
|
|
57
|
+
}
|
|
58
|
+
const defaults: Record<string, unknown> = {
|
|
59
|
+
"/health": { status: "ok" },
|
|
60
|
+
"/props?autoload=false": { role: "router" },
|
|
61
|
+
};
|
|
62
|
+
return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
|
|
50
63
|
});
|
|
51
64
|
|
|
52
65
|
const server1 = createMockServer({
|
|
@@ -63,7 +76,7 @@ describe("ServerManager", () => {
|
|
|
63
76
|
});
|
|
64
77
|
const manager = new ServerManager([server1, server2] as any);
|
|
65
78
|
|
|
66
|
-
await manager.
|
|
79
|
+
await manager.initialize(mockPi as any);
|
|
67
80
|
|
|
68
81
|
expect(mockPi.registerProvider).toHaveBeenCalledTimes(2);
|
|
69
82
|
expect(mockPi.registerProvider).toHaveBeenCalledWith(
|