pi-llama-cpp 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -30
- package/package.json +6 -5
- package/src/constants.ts +27 -5
- package/src/enums/action.ts +3 -2
- package/src/enums/mode.ts +1 -0
- package/src/enums/serverStatus.ts +6 -0
- package/src/enums/status.ts +1 -0
- package/src/index.ts +53 -31
- package/src/interfaces/auth.ts +1 -5
- package/src/interfaces/endpoints/props.ts +1 -0
- package/src/interfaces/levels.ts +7 -0
- package/src/managers/command.ts +290 -0
- package/src/managers/events.ts +101 -0
- package/src/managers/server.ts +136 -0
- package/src/models/baseModel.ts +75 -20
- package/src/models/legacyModel.ts +45 -0
- package/src/models/routerModel.ts +7 -30
- package/src/models/singleModel.ts +9 -6
- package/src/resolver.ts +152 -0
- package/src/server.ts +187 -0
- package/tests/commandManager.test.ts +182 -133
- package/tests/events.test.ts +256 -0
- package/tests/legacyModel.test.ts +112 -0
- package/tests/mocks.ts +100 -0
- package/tests/resolver.test.ts +143 -106
- package/tests/routerModel.test.ts +46 -68
- package/tests/server.test.ts +176 -0
- package/tests/serverManager.test.ts +130 -0
- package/tests/singleModel.test.ts +21 -29
- package/src/commands/models.ts +0 -228
- package/src/events.ts +0 -26
- package/src/manager.ts +0 -96
- package/src/tools/resolver.ts +0 -136
- package/src/tools/retriever.ts +0 -71
- package/tests/handlers.test.ts +0 -164
- package/tests/modelsCommand.test.ts +0 -270
package/src/models/baseModel.ts
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import type { ProviderModelConfig } from "@earendil-works/pi-coding-agent";
|
|
2
|
-
import { POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
2
|
+
import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
3
3
|
import { Mode } from "../enums/mode";
|
|
4
4
|
import { Status } from "../enums/status";
|
|
5
|
-
import { DataProperty
|
|
6
|
-
import {
|
|
7
|
-
import { rpc } from "../tools/retriever";
|
|
5
|
+
import { DataProperty } from "../interfaces/endpoints/models";
|
|
6
|
+
import { Server } from "../server";
|
|
8
7
|
|
|
9
8
|
/**
|
|
10
9
|
* Abstract base class for llama-server models.
|
|
@@ -12,7 +11,10 @@ import { rpc } from "../tools/retriever";
|
|
|
12
11
|
* loading/unloading, and configuration conversion.
|
|
13
12
|
*/
|
|
14
13
|
export abstract class BaseModel {
|
|
15
|
-
constructor(
|
|
14
|
+
constructor(
|
|
15
|
+
protected readonly model: DataProperty,
|
|
16
|
+
protected readonly server: Server,
|
|
17
|
+
) {}
|
|
16
18
|
|
|
17
19
|
protected readonly statusMapper: Record<string, Status> = {
|
|
18
20
|
loaded: Status.LOADED,
|
|
@@ -28,20 +30,44 @@ export abstract class BaseModel {
|
|
|
28
30
|
[Status.FAILED]: "🔴",
|
|
29
31
|
[Status.SLEEPING]: "🔵",
|
|
30
32
|
[Status.UNLOADED]: "⚪",
|
|
33
|
+
[Status.UNAUTHORIZED]: "⛔",
|
|
31
34
|
};
|
|
32
35
|
|
|
33
36
|
abstract get mode(): Mode;
|
|
34
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Returns the server URL associated with this model
|
|
40
|
+
*/
|
|
41
|
+
get serverUrl(): string {
|
|
42
|
+
return this.server.baseUrl;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Returns the provider id associated with this model
|
|
47
|
+
*/
|
|
48
|
+
get serverId(): string {
|
|
49
|
+
return this.server.providerId;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Returns the model's unique identifier
|
|
54
|
+
*/
|
|
35
55
|
get id(): string {
|
|
36
56
|
return this.model.id;
|
|
37
57
|
}
|
|
38
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Returns the model's display name (first alias, or id as fallback)
|
|
61
|
+
*/
|
|
39
62
|
get name(): string {
|
|
40
63
|
return this.model.aliases?.[0] || this.model.id;
|
|
41
64
|
}
|
|
42
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Whether the model is a reasoning model.
|
|
68
|
+
* Currently always returns true since there's no way to detect this from llama-server.
|
|
69
|
+
*/
|
|
43
70
|
get reasoning(): boolean {
|
|
44
|
-
// We don't have a way to detect this, so we'll fallback to true
|
|
45
71
|
return true;
|
|
46
72
|
}
|
|
47
73
|
|
|
@@ -50,21 +76,38 @@ export abstract class BaseModel {
|
|
|
50
76
|
*
|
|
51
77
|
* @returns An array of capabilities, as expected by Pi
|
|
52
78
|
*/
|
|
53
|
-
|
|
79
|
+
async getCapabilities(): Promise<("text" | "image")[]> {
|
|
80
|
+
try {
|
|
81
|
+
// When loaded, this works alright
|
|
82
|
+
const { modalities } = await this.server.fetchModelProps(this.id);
|
|
83
|
+
return modalities.vision ? ["text", "image"] : ["text"];
|
|
84
|
+
} catch {
|
|
85
|
+
// Otherwise, we have to search for it ourselves
|
|
86
|
+
const { data } = await this.server.fetchModels();
|
|
87
|
+
const model = data.find((d) => d.id === this.id);
|
|
88
|
+
if (!model) return ["text"];
|
|
89
|
+
|
|
90
|
+
const { input_modalities } = model.architecture!;
|
|
91
|
+
const response = input_modalities.filter(
|
|
92
|
+
(mod) => mod === "text" || mod === "image",
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
return response;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
54
98
|
|
|
55
99
|
/**
|
|
56
100
|
* Gets the load status of the model
|
|
57
101
|
*
|
|
58
|
-
* @returns The current
|
|
102
|
+
* @returns The current {@link Status}
|
|
59
103
|
*/
|
|
60
104
|
public async getStatus(): Promise<Status> {
|
|
61
105
|
try {
|
|
62
|
-
const { is_sleeping, error } = await
|
|
63
|
-
`/props?model=${this.id}&autoload=false`,
|
|
64
|
-
);
|
|
106
|
+
const { is_sleeping, error } = await this.server.fetchModelProps(this.id);
|
|
65
107
|
|
|
66
108
|
if (is_sleeping) return Status.SLEEPING;
|
|
67
109
|
if (!error) return Status.LOADED;
|
|
110
|
+
if (error.code === 401) return Status.UNAUTHORIZED;
|
|
68
111
|
if (error.code === 503) return Status.LOADING;
|
|
69
112
|
if (error.code === 400 && error.message === "model is not loaded")
|
|
70
113
|
return Status.UNLOADED;
|
|
@@ -76,19 +119,23 @@ export abstract class BaseModel {
|
|
|
76
119
|
}
|
|
77
120
|
|
|
78
121
|
/**
|
|
79
|
-
* Gets the context size of a particular model
|
|
122
|
+
* Gets the context size of a particular model.
|
|
80
123
|
*
|
|
81
|
-
* @returns The
|
|
124
|
+
* @returns The context size in tokens
|
|
82
125
|
*/
|
|
83
126
|
async getContextSize(): Promise<number> {
|
|
84
|
-
|
|
85
|
-
|
|
127
|
+
try {
|
|
128
|
+
const { data } = await this.server.fetchModels();
|
|
129
|
+
const { n_ctx } = data.find((m) => m.id === this.id)?.meta!;
|
|
86
130
|
|
|
87
|
-
|
|
131
|
+
return n_ctx ?? DEFAULT_CTX;
|
|
132
|
+
} catch {
|
|
133
|
+
return DEFAULT_CTX;
|
|
134
|
+
}
|
|
88
135
|
}
|
|
89
136
|
|
|
90
137
|
/**
|
|
91
|
-
*
|
|
138
|
+
* Returns a label for the model selection screen
|
|
92
139
|
* @returns A label structured as "<icon> <name>"
|
|
93
140
|
*/
|
|
94
141
|
async getLabel(): Promise<string> {
|
|
@@ -97,11 +144,12 @@ export abstract class BaseModel {
|
|
|
97
144
|
}
|
|
98
145
|
|
|
99
146
|
/**
|
|
100
|
-
* Returns
|
|
147
|
+
* Returns human-readable information about the model
|
|
101
148
|
* @returns A string with the model information
|
|
102
149
|
*/
|
|
103
150
|
async getInfo(): Promise<string> {
|
|
104
151
|
const messages = [
|
|
152
|
+
`Server : ${this.serverUrl}`,
|
|
105
153
|
`ID : ${this.id}`,
|
|
106
154
|
`Model : ${this.name}`,
|
|
107
155
|
`Reasoning : ${this.reasoning}`,
|
|
@@ -124,6 +172,13 @@ export abstract class BaseModel {
|
|
|
124
172
|
id: this.id,
|
|
125
173
|
name: this.name,
|
|
126
174
|
reasoning: this.reasoning,
|
|
175
|
+
thinkingLevelMap: {
|
|
176
|
+
minimal: "minimal",
|
|
177
|
+
low: "low",
|
|
178
|
+
medium: "medium",
|
|
179
|
+
high: "high",
|
|
180
|
+
xhigh: "xhigh",
|
|
181
|
+
},
|
|
127
182
|
input: await this.getCapabilities(),
|
|
128
183
|
contextWindow: await this.getContextSize(),
|
|
129
184
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
@@ -140,7 +195,7 @@ export abstract class BaseModel {
|
|
|
140
195
|
const status = await this.getStatus();
|
|
141
196
|
if (status === Status.LOADED || status === Status.SLEEPING) return;
|
|
142
197
|
|
|
143
|
-
await
|
|
198
|
+
await this.server.postRequest("load", this.id);
|
|
144
199
|
await this.pollStatus();
|
|
145
200
|
}
|
|
146
201
|
|
|
@@ -148,7 +203,7 @@ export abstract class BaseModel {
|
|
|
148
203
|
* Unloads the model from llama-server
|
|
149
204
|
*/
|
|
150
205
|
async unload(): Promise<void> {
|
|
151
|
-
await
|
|
206
|
+
await this.server.postRequest("unload", this.id);
|
|
152
207
|
}
|
|
153
208
|
|
|
154
209
|
/**
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { DEFAULT_CTX } from "../constants";
|
|
2
|
+
import { Mode } from "../enums/mode";
|
|
3
|
+
import { SingleModel } from "./singleModel";
|
|
4
|
+
|
|
5
|
+
export class LegacyModel extends SingleModel {
|
|
6
|
+
get mode(): Mode {
|
|
7
|
+
return Mode.LEGACY;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Retrieves the context size when the user is running
|
|
12
|
+
* a server that uses legacy models, such as ik_llama.cpp
|
|
13
|
+
*
|
|
14
|
+
* @returns The context size
|
|
15
|
+
*/
|
|
16
|
+
async getContextSize(): Promise<number> {
|
|
17
|
+
const props = await this.server.fetchModelProps(this.id);
|
|
18
|
+
const models = await this.server.fetchModels();
|
|
19
|
+
|
|
20
|
+
const { n_ctx } = props as unknown as { n_ctx: number };
|
|
21
|
+
const { data } = models as unknown as {
|
|
22
|
+
data: { max_model_len: number }[];
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const [{ max_model_len }] = data;
|
|
26
|
+
const contextSize = max_model_len === 0 ? n_ctx : max_model_len;
|
|
27
|
+
|
|
28
|
+
return contextSize ?? DEFAULT_CTX;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Detects the capabilities of the model when the user is running
|
|
33
|
+
* a server that uses legacy models, such as ik_llama.cpp
|
|
34
|
+
*
|
|
35
|
+
* @returns An array of capabilities, as expected by Pi
|
|
36
|
+
*/
|
|
37
|
+
async getCapabilities(): Promise<("text" | "image")[]> {
|
|
38
|
+
try {
|
|
39
|
+
return await super.getCapabilities();
|
|
40
|
+
} catch {
|
|
41
|
+
// When auth is wrong in a legacy model, we simply can't detect the real capabilities
|
|
42
|
+
return ["text"];
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
import { DEFAULT_CTX, POLLING_INTERVAL, POLLING_TIMEOUT } from "../constants";
|
|
2
2
|
import { Mode } from "../enums/mode";
|
|
3
3
|
import { Status } from "../enums/status";
|
|
4
|
-
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
5
|
-
import { PropsEndpoint } from "../interfaces/endpoints/props";
|
|
6
|
-
import { rpc } from "../tools/retriever";
|
|
7
4
|
import { BaseModel } from "./baseModel";
|
|
8
5
|
|
|
9
6
|
/**
|
|
@@ -16,19 +13,6 @@ export class RouterModel extends BaseModel {
|
|
|
16
13
|
return Mode.ROUTER;
|
|
17
14
|
}
|
|
18
15
|
|
|
19
|
-
async getStatus(): Promise<Status> {
|
|
20
|
-
const { data } = await rpc<ModelsEndpoint>("/models");
|
|
21
|
-
const model = data.find((m) => m.id === this.id);
|
|
22
|
-
if (!model) return Status.FAILED;
|
|
23
|
-
|
|
24
|
-
const status = this.statusMapper[model.status!.value];
|
|
25
|
-
if (status === Status.UNLOADED || status === Status.LOADING) {
|
|
26
|
-
return super.getStatus();
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
return status;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
16
|
/**
|
|
33
17
|
* Workaround for the currently-bugged /models status detection
|
|
34
18
|
* (I suspect it was introduced in PR #22683 of llama.cpp)
|
|
@@ -50,7 +34,7 @@ export class RouterModel extends BaseModel {
|
|
|
50
34
|
// Grab the glitch
|
|
51
35
|
while (Date.now() - startTime <= limit) {
|
|
52
36
|
try {
|
|
53
|
-
await
|
|
37
|
+
await this.server.fetchModelProps(this.id);
|
|
54
38
|
break;
|
|
55
39
|
} catch {
|
|
56
40
|
elapsed += POLLING_INTERVAL;
|
|
@@ -62,19 +46,12 @@ export class RouterModel extends BaseModel {
|
|
|
62
46
|
return await super.pollStatus(startTime, timeout);
|
|
63
47
|
}
|
|
64
48
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
const response = input_modalities.filter(
|
|
72
|
-
(mod) => mod === "text" || mod === "image",
|
|
73
|
-
);
|
|
74
|
-
|
|
75
|
-
return response;
|
|
76
|
-
}
|
|
77
|
-
|
|
49
|
+
/**
|
|
50
|
+
* Gets the context size of a particular model.
|
|
51
|
+
* In router mode, falls back to parsing CLI args when the model is unloaded.
|
|
52
|
+
*
|
|
53
|
+
* @returns The context size in tokens
|
|
54
|
+
*/
|
|
78
55
|
async getContextSize(): Promise<number> {
|
|
79
56
|
// We can get a more accurate context size if the model is already loaded
|
|
80
57
|
if ((await this.getStatus()) === Status.LOADED) {
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
import { Mode } from "../enums/mode";
|
|
2
|
-
import { ModelsEndpoint } from "../interfaces/endpoints/models";
|
|
3
|
-
import { rpc } from "../tools/retriever";
|
|
4
2
|
import { BaseModel } from "./baseModel";
|
|
5
3
|
|
|
6
4
|
export class SingleModel extends BaseModel {
|
|
@@ -9,10 +7,15 @@ export class SingleModel extends BaseModel {
|
|
|
9
7
|
}
|
|
10
8
|
|
|
11
9
|
async getCapabilities(): Promise<("text" | "image")[]> {
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
try {
|
|
11
|
+
return await super.getCapabilities();
|
|
12
|
+
} catch {
|
|
13
|
+
// This is required when auth is wrong
|
|
14
|
+
const { models } = await this.server.fetchModels();
|
|
15
|
+
const [{ capabilities }] = models!;
|
|
14
16
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
+
const hasImage = capabilities.includes("multimodal");
|
|
18
|
+
return hasImage ? ["text", "image"] : ["text"];
|
|
19
|
+
}
|
|
17
20
|
}
|
|
18
21
|
}
|
package/src/resolver.ts
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AuthStorage,
|
|
3
|
+
getAgentDir,
|
|
4
|
+
SettingsManager,
|
|
5
|
+
} from "@earendil-works/pi-coding-agent";
|
|
6
|
+
import { readFile } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import {
|
|
9
|
+
API_KEY_PLACEHOLDER,
|
|
10
|
+
DEFAULT_LLAMA_SERVER_URL,
|
|
11
|
+
DEFAULT_THINKING_BUDGETS,
|
|
12
|
+
} from "./constants";
|
|
13
|
+
import { ThinkingLevel } from "./interfaces/levels";
|
|
14
|
+
|
|
15
|
+
export class ConfigResolver {
|
|
16
|
+
private warnings: string[] = [];
|
|
17
|
+
|
|
18
|
+
private cachedUrls: string[] = [];
|
|
19
|
+
private authStorage = AuthStorage.create(join(getAgentDir(), "auth.json"));
|
|
20
|
+
private settingsManager = SettingsManager.create(
|
|
21
|
+
process.cwd(),
|
|
22
|
+
getAgentDir(),
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Resolves the llama-server URL by searching in the global settings.json
|
|
27
|
+
*/
|
|
28
|
+
private async resolveGlobalUrl(): Promise<string | null> {
|
|
29
|
+
const settings = this.settingsManager.getGlobalSettings();
|
|
30
|
+
const { llamaServerUrl = null } = settings as Record<string, string>;
|
|
31
|
+
|
|
32
|
+
return llamaServerUrl;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Resolves the llama-server URL by searching in the project's .pi/settings.json
|
|
37
|
+
*/
|
|
38
|
+
private async resolveProjectUrl(): Promise<string | null> {
|
|
39
|
+
// Warn the user for deprecation
|
|
40
|
+
try {
|
|
41
|
+
const filePath = join(process.cwd(), ".pi", "llama-server.json");
|
|
42
|
+
const { url = null } = JSON.parse(await readFile(filePath, "utf-8"));
|
|
43
|
+
|
|
44
|
+
const messages = [
|
|
45
|
+
"[pi-llama-cpp]",
|
|
46
|
+
"The project-level `.pi/llama-server.json` file has been deprecated.",
|
|
47
|
+
"It will work for now, but you must follow these instructions as soon as possible:",
|
|
48
|
+
'- Move your url to the project-level `.pi/settings.json` file as {"llamaServerUrl": "<url>"}.',
|
|
49
|
+
"- Remove the old `.pi/llama-server.json` file.",
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
this.warnings.push(messages.join("\n"));
|
|
53
|
+
|
|
54
|
+
return url;
|
|
55
|
+
} catch {
|
|
56
|
+
// No old file available, continue as normal
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const settings = this.settingsManager.getProjectSettings();
|
|
60
|
+
const { llamaServerUrl = null } = settings as Record<string, string>;
|
|
61
|
+
|
|
62
|
+
return llamaServerUrl;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Resolves the llama-server URL from the environment
|
|
67
|
+
*/
|
|
68
|
+
private async resolveEnvUrl(): Promise<string | null> {
|
|
69
|
+
return process.env.LLAMA_SERVER_URL ?? null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Tries all possible ways to retrieve the llama-server URL(s)
|
|
74
|
+
*/
|
|
75
|
+
private async extractJoinedUrls(): Promise<string> {
|
|
76
|
+
// 1. per-project config
|
|
77
|
+
let response = await this.resolveProjectUrl();
|
|
78
|
+
if (response) return response;
|
|
79
|
+
|
|
80
|
+
// 2. env
|
|
81
|
+
response = await this.resolveEnvUrl();
|
|
82
|
+
if (response) return response;
|
|
83
|
+
|
|
84
|
+
// 3. global settings
|
|
85
|
+
response = await this.resolveGlobalUrl();
|
|
86
|
+
if (response) return response;
|
|
87
|
+
|
|
88
|
+
// 4. default
|
|
89
|
+
return DEFAULT_LLAMA_SERVER_URL;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Resolves URLs where llama-servers are running (cached)
|
|
94
|
+
*/
|
|
95
|
+
async resolveUrls(): Promise<string[]> {
|
|
96
|
+
if (this.cachedUrls.length > 0) return this.cachedUrls;
|
|
97
|
+
|
|
98
|
+
const raw = await this.extractJoinedUrls();
|
|
99
|
+
const urls = raw
|
|
100
|
+
.split(";")
|
|
101
|
+
.map((u) => u.trim())
|
|
102
|
+
.filter((u) => u.length > 0)
|
|
103
|
+
.map((u) => u.replace(/\/+$/, ""));
|
|
104
|
+
|
|
105
|
+
this.cachedUrls = urls;
|
|
106
|
+
return this.cachedUrls;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Resolves API key for the provider ID using Pi's AuthStorage
|
|
111
|
+
*/
|
|
112
|
+
async resolveApiKey(providerId: string): Promise<string> {
|
|
113
|
+
this.authStorage.reload();
|
|
114
|
+
const apiKey = await this.authStorage.getApiKey(providerId);
|
|
115
|
+
|
|
116
|
+
return apiKey ?? API_KEY_PLACEHOLDER;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Returns warnings collected during URL resolution.
|
|
121
|
+
*/
|
|
122
|
+
getWarnings(): string[] {
|
|
123
|
+
const warnings = [...this.warnings];
|
|
124
|
+
this.warnings.length = 0;
|
|
125
|
+
|
|
126
|
+
return warnings;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/*
|
|
130
|
+
* Resolves the current thinking level from Pi.
|
|
131
|
+
*
|
|
132
|
+
* @returns Selected level
|
|
133
|
+
*/
|
|
134
|
+
resolveThinkingLevel(): ThinkingLevel | undefined {
|
|
135
|
+
return this.settingsManager.getDefaultThinkingLevel();
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Resolves the effective thinking budgets from settings
|
|
140
|
+
*
|
|
141
|
+
* @returns Thinking budgets
|
|
142
|
+
*/
|
|
143
|
+
resolveThinkingBudgets(): Record<ThinkingLevel, number> {
|
|
144
|
+
const settingsBudgets = this.settingsManager.getThinkingBudgets() ?? {};
|
|
145
|
+
const availableBudgets = {
|
|
146
|
+
...DEFAULT_THINKING_BUDGETS,
|
|
147
|
+
...settingsBudgets,
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
return availableBudgets;
|
|
151
|
+
}
|
|
152
|
+
}
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { PROVIDER_NAME, PROVIDER_PREFIX } from "./constants";
|
|
2
|
+
import { Mode } from "./enums/mode";
|
|
3
|
+
import { ServerStatus } from "./enums/serverStatus";
|
|
4
|
+
import { HealthEndpoint } from "./interfaces/endpoints/health";
|
|
5
|
+
import { ModelsEndpoint } from "./interfaces/endpoints/models";
|
|
6
|
+
import { PropsEndpoint } from "./interfaces/endpoints/props";
|
|
7
|
+
import { BaseModel } from "./models/baseModel";
|
|
8
|
+
import { LegacyModel } from "./models/legacyModel";
|
|
9
|
+
import { RouterModel } from "./models/routerModel";
|
|
10
|
+
import { SingleModel } from "./models/singleModel";
|
|
11
|
+
import { ConfigResolver } from "./resolver";
|
|
12
|
+
|
|
13
|
+
export class Server {
|
|
14
|
+
public readonly models: BaseModel[] = [];
|
|
15
|
+
private configResolver = new ConfigResolver();
|
|
16
|
+
|
|
17
|
+
constructor(readonly baseUrl: string) {}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Generates a unique provider ID from a server URL.
|
|
21
|
+
*/
|
|
22
|
+
get providerId(): string {
|
|
23
|
+
return `${PROVIDER_PREFIX}=${this.baseUrl}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Generates a human-readable provider name from a server URL.
|
|
28
|
+
*/
|
|
29
|
+
get providerName(): string {
|
|
30
|
+
return `${PROVIDER_NAME} (${this.baseUrl})`;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Retrieves the API key from the resolver
|
|
35
|
+
* @returns The API key
|
|
36
|
+
*/
|
|
37
|
+
async getApiKey(): Promise<string> {
|
|
38
|
+
return await this.configResolver.resolveApiKey(this.providerId);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Fetches models from the server and populates {@link models}
|
|
43
|
+
*/
|
|
44
|
+
async initialize() {
|
|
45
|
+
const { data } = await this.fetchModels();
|
|
46
|
+
const mode = await this.detectServerMode();
|
|
47
|
+
|
|
48
|
+
// Setup models
|
|
49
|
+
const modelCtor = {
|
|
50
|
+
[Mode.ROUTER]: RouterModel,
|
|
51
|
+
[Mode.LEGACY]: LegacyModel,
|
|
52
|
+
[Mode.SINGLE]: SingleModel,
|
|
53
|
+
}[mode];
|
|
54
|
+
|
|
55
|
+
const models: BaseModel[] = data
|
|
56
|
+
.map((m) => new modelCtor(m, this))
|
|
57
|
+
.sort((a, b) => (a.id > b.id ? 1 : a.id === b.id ? 0 : -1));
|
|
58
|
+
|
|
59
|
+
this.models.length = 0;
|
|
60
|
+
this.models.push(...models);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Detects the mode of the server
|
|
65
|
+
*
|
|
66
|
+
* @returns The detected mode
|
|
67
|
+
*/
|
|
68
|
+
private async detectServerMode(): Promise<Mode> {
|
|
69
|
+
const { role } = await this.fetchServerProps();
|
|
70
|
+
const { data } = await this.fetchModels();
|
|
71
|
+
|
|
72
|
+
if (role === "router") return Mode.ROUTER;
|
|
73
|
+
if ("max_model_len" in data[0]) return Mode.LEGACY;
|
|
74
|
+
return Mode.SINGLE;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Checks if the server is ready, with a timeout.
|
|
79
|
+
*
|
|
80
|
+
* @param timeout Maximum time to wait for the health check
|
|
81
|
+
* @returns The server status
|
|
82
|
+
*/
|
|
83
|
+
async isReady(timeout: number): Promise<ServerStatus> {
|
|
84
|
+
try {
|
|
85
|
+
const timeoutPromise = new Promise<never>((_, reject) =>
|
|
86
|
+
setTimeout(() => reject(new Error("timeout")), timeout),
|
|
87
|
+
);
|
|
88
|
+
const health = await Promise.race([
|
|
89
|
+
this.fetchServerHealth(),
|
|
90
|
+
timeoutPromise,
|
|
91
|
+
]);
|
|
92
|
+
if (health.status === "ok") {
|
|
93
|
+
return ServerStatus.READY;
|
|
94
|
+
}
|
|
95
|
+
return ServerStatus.UNREACHABLE;
|
|
96
|
+
} catch (error) {
|
|
97
|
+
if (error instanceof Error && error.message === "timeout") {
|
|
98
|
+
return ServerStatus.TIMEOUT;
|
|
99
|
+
}
|
|
100
|
+
return ServerStatus.UNREACHABLE;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Retrieves the health status of the server
|
|
106
|
+
*
|
|
107
|
+
* @returns The health status
|
|
108
|
+
*/
|
|
109
|
+
async fetchServerHealth(): Promise<HealthEndpoint> {
|
|
110
|
+
return await this.rpc<HealthEndpoint>("/health");
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Fetches models from the server
|
|
115
|
+
*
|
|
116
|
+
* @return The models from the server
|
|
117
|
+
*/
|
|
118
|
+
async fetchModels(): Promise<ModelsEndpoint> {
|
|
119
|
+
return await this.rpc<ModelsEndpoint>("/v1/models");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Fetches general properties of the server
|
|
124
|
+
*
|
|
125
|
+
* @return The properties of the server
|
|
126
|
+
*/
|
|
127
|
+
async fetchServerProps(): Promise<PropsEndpoint> {
|
|
128
|
+
return await this.rpc<PropsEndpoint>("/props?autoload=false");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Fetches properties of a specific model from the server
|
|
133
|
+
*
|
|
134
|
+
* @param modelId The ID of the model
|
|
135
|
+
* @return The properties of the specified model
|
|
136
|
+
*/
|
|
137
|
+
async fetchModelProps(modelId: string): Promise<PropsEndpoint> {
|
|
138
|
+
return await this.rpc<PropsEndpoint>(
|
|
139
|
+
`/props?model=${modelId}&autoload=false`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Sends a request associated to a specific model from the server
|
|
145
|
+
*
|
|
146
|
+
* @param resource The specified resource ("load" | "unload")
|
|
147
|
+
* @param model The targeted model
|
|
148
|
+
*/
|
|
149
|
+
async postRequest(
|
|
150
|
+
resource: "load" | "unload",
|
|
151
|
+
model: string,
|
|
152
|
+
): Promise<ModelsEndpoint> {
|
|
153
|
+
return await this.rpc<ModelsEndpoint>(`/models/${resource}`, { model });
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Makes an HTTP request to the llama-server and returns the parsed JSON response
|
|
158
|
+
*
|
|
159
|
+
* @param endpoint The endpoint path to fetch (e.g. "/health")
|
|
160
|
+
* @param body The optional request body for POST requests
|
|
161
|
+
* @returns The parsed JSON response from the server
|
|
162
|
+
*/
|
|
163
|
+
private async rpc<T>(
|
|
164
|
+
endpoint: string,
|
|
165
|
+
body?: Record<string, unknown>,
|
|
166
|
+
): Promise<T> {
|
|
167
|
+
const url = `${this.baseUrl}${endpoint}`;
|
|
168
|
+
const apiKey = await this.getApiKey();
|
|
169
|
+
|
|
170
|
+
const data = {
|
|
171
|
+
method: body ? "POST" : "GET",
|
|
172
|
+
headers: body ? { "Content-Type": "application/json" } : undefined,
|
|
173
|
+
body: body ? JSON.stringify(body) : undefined,
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
const res = await fetch(url, {
|
|
177
|
+
...data,
|
|
178
|
+
headers: {
|
|
179
|
+
...data.headers,
|
|
180
|
+
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const response: T = await res.json();
|
|
185
|
+
return response;
|
|
186
|
+
}
|
|
187
|
+
}
|