pi-llama-cpp 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -30
- package/package.json +6 -5
- package/src/constants.ts +27 -5
- package/src/enums/action.ts +3 -2
- package/src/enums/mode.ts +1 -0
- package/src/enums/serverStatus.ts +6 -0
- package/src/enums/status.ts +1 -0
- package/src/index.ts +53 -31
- package/src/interfaces/auth.ts +1 -5
- package/src/interfaces/endpoints/props.ts +1 -0
- package/src/interfaces/levels.ts +7 -0
- package/src/managers/command.ts +290 -0
- package/src/managers/events.ts +101 -0
- package/src/managers/server.ts +136 -0
- package/src/models/baseModel.ts +75 -20
- package/src/models/legacyModel.ts +45 -0
- package/src/models/routerModel.ts +7 -30
- package/src/models/singleModel.ts +9 -6
- package/src/resolver.ts +152 -0
- package/src/server.ts +187 -0
- package/tests/commandManager.test.ts +182 -133
- package/tests/events.test.ts +256 -0
- package/tests/legacyModel.test.ts +112 -0
- package/tests/mocks.ts +100 -0
- package/tests/resolver.test.ts +143 -106
- package/tests/routerModel.test.ts +46 -68
- package/tests/server.test.ts +176 -0
- package/tests/serverManager.test.ts +130 -0
- package/tests/singleModel.test.ts +21 -29
- package/src/commands/models.ts +0 -228
- package/src/events.ts +0 -26
- package/src/manager.ts +0 -96
- package/src/tools/resolver.ts +0 -136
- package/src/tools/retriever.ts +0 -71
- package/tests/handlers.test.ts +0 -164
- package/tests/modelsCommand.test.ts +0 -270
|
@@ -1,16 +1,8 @@
|
|
|
1
|
-
import { describe, expect, it
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
2
|
import { Mode } from "../src/enums/mode";
|
|
3
3
|
import { DataProperty } from "../src/interfaces/endpoints/models";
|
|
4
4
|
import { RouterModel } from "../src/models/routerModel";
|
|
5
|
-
|
|
6
|
-
// Mock the retriever module before importing anything that depends on it
|
|
7
|
-
const mockRpc = vi.fn();
|
|
8
|
-
|
|
9
|
-
vi.mock("../src/tools/retriever", () => ({
|
|
10
|
-
rpc: (...args: unknown[]) => mockRpc(...args),
|
|
11
|
-
isServerReady: vi.fn(),
|
|
12
|
-
listModels: vi.fn(),
|
|
13
|
-
}));
|
|
5
|
+
import { createMockServer, mockRpc } from "./mocks";
|
|
14
6
|
|
|
15
7
|
// Helper to create a mock DataProperty
|
|
16
8
|
const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
|
|
@@ -24,6 +16,10 @@ const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
|
|
|
24
16
|
...overrides,
|
|
25
17
|
});
|
|
26
18
|
|
|
19
|
+
beforeEach(() => {
|
|
20
|
+
mockRpc.mockClear();
|
|
21
|
+
});
|
|
22
|
+
|
|
27
23
|
describe("RouterModel context size extraction", () => {
|
|
28
24
|
it("should extract --ctx-size value", () => {
|
|
29
25
|
const model = new RouterModel(
|
|
@@ -41,6 +37,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
41
37
|
preset: "default",
|
|
42
38
|
},
|
|
43
39
|
}),
|
|
40
|
+
createMockServer(),
|
|
44
41
|
);
|
|
45
42
|
|
|
46
43
|
// Access the private method via any
|
|
@@ -57,6 +54,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
57
54
|
preset: "default",
|
|
58
55
|
},
|
|
59
56
|
}),
|
|
57
|
+
createMockServer(),
|
|
60
58
|
);
|
|
61
59
|
|
|
62
60
|
const extractFrom = (model as any).extractFrom.bind(model);
|
|
@@ -72,6 +70,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
72
70
|
preset: "default",
|
|
73
71
|
},
|
|
74
72
|
}),
|
|
73
|
+
createMockServer(),
|
|
75
74
|
);
|
|
76
75
|
|
|
77
76
|
const extractFrom = (model as any).extractFrom.bind(model);
|
|
@@ -88,6 +87,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
88
87
|
preset: "default",
|
|
89
88
|
},
|
|
90
89
|
}),
|
|
90
|
+
createMockServer(),
|
|
91
91
|
);
|
|
92
92
|
|
|
93
93
|
const extractFrom = (model as any).extractFrom.bind(model);
|
|
@@ -103,6 +103,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
103
103
|
preset: "default",
|
|
104
104
|
},
|
|
105
105
|
}),
|
|
106
|
+
createMockServer(),
|
|
106
107
|
);
|
|
107
108
|
|
|
108
109
|
const extractFrom = (model as any).extractFrom.bind(model);
|
|
@@ -110,27 +111,9 @@ describe("RouterModel context size extraction", () => {
|
|
|
110
111
|
});
|
|
111
112
|
|
|
112
113
|
it("should prefer --ctx-size over --fit-ctx when loaded", async () => {
|
|
113
|
-
// First call: getStatus() ->
|
|
114
|
-
mockRpc.mockResolvedValueOnce({
|
|
115
|
-
|
|
116
|
-
{
|
|
117
|
-
id: "test-model",
|
|
118
|
-
status: {
|
|
119
|
-
value: "loaded",
|
|
120
|
-
args: [
|
|
121
|
-
"--model",
|
|
122
|
-
"gguf",
|
|
123
|
-
"--ctx-size",
|
|
124
|
-
"4096",
|
|
125
|
-
"--fit-ctx",
|
|
126
|
-
"8192",
|
|
127
|
-
],
|
|
128
|
-
preset: "default",
|
|
129
|
-
},
|
|
130
|
-
},
|
|
131
|
-
],
|
|
132
|
-
});
|
|
133
|
-
// Second call: super.getContextSize() -> /models with meta.n_ctx
|
|
114
|
+
// First call: getStatus() -> fetchModelProps
|
|
115
|
+
mockRpc.mockResolvedValueOnce({ is_sleeping: false });
|
|
116
|
+
// Second call: super.getContextSize() -> fetchModels with meta.n_ctx
|
|
134
117
|
mockRpc.mockResolvedValueOnce({
|
|
135
118
|
data: [
|
|
136
119
|
{
|
|
@@ -148,6 +131,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
148
131
|
preset: "default",
|
|
149
132
|
},
|
|
150
133
|
}),
|
|
134
|
+
createMockServer(),
|
|
151
135
|
);
|
|
152
136
|
|
|
153
137
|
const ctxSize = await model.getContextSize();
|
|
@@ -155,20 +139,9 @@ describe("RouterModel context size extraction", () => {
|
|
|
155
139
|
});
|
|
156
140
|
|
|
157
141
|
it("should return n_ctx from meta when loaded without context size args", async () => {
|
|
158
|
-
// First call: getStatus() ->
|
|
159
|
-
mockRpc.mockResolvedValueOnce({
|
|
160
|
-
|
|
161
|
-
{
|
|
162
|
-
id: "test-model",
|
|
163
|
-
status: {
|
|
164
|
-
value: "loaded",
|
|
165
|
-
args: ["--model", "gguf"],
|
|
166
|
-
preset: "default",
|
|
167
|
-
},
|
|
168
|
-
},
|
|
169
|
-
],
|
|
170
|
-
});
|
|
171
|
-
// Second call: super.getContextSize() -> /models with meta.n_ctx
|
|
142
|
+
// First call: getStatus() -> fetchModelProps
|
|
143
|
+
mockRpc.mockResolvedValueOnce({ is_sleeping: false });
|
|
144
|
+
// Second call: super.getContextSize() -> fetchModels with meta.n_ctx
|
|
172
145
|
mockRpc.mockResolvedValueOnce({
|
|
173
146
|
data: [
|
|
174
147
|
{
|
|
@@ -186,6 +159,7 @@ describe("RouterModel context size extraction", () => {
|
|
|
186
159
|
preset: "default",
|
|
187
160
|
},
|
|
188
161
|
}),
|
|
162
|
+
createMockServer(),
|
|
189
163
|
);
|
|
190
164
|
|
|
191
165
|
const ctxSize = await model.getContextSize();
|
|
@@ -194,33 +168,34 @@ describe("RouterModel context size extraction", () => {
|
|
|
194
168
|
});
|
|
195
169
|
|
|
196
170
|
describe("RouterModel capabilities detection", () => {
|
|
197
|
-
it("should detect image capability
|
|
198
|
-
mockRpc.mockResolvedValueOnce({
|
|
199
|
-
data: [
|
|
200
|
-
{
|
|
201
|
-
id: "test-model",
|
|
202
|
-
status: {
|
|
203
|
-
value: "loaded",
|
|
204
|
-
args: [],
|
|
205
|
-
preset: "default",
|
|
206
|
-
failed: false,
|
|
207
|
-
},
|
|
208
|
-
architecture: {
|
|
209
|
-
input_modalities: ["text", "image"],
|
|
210
|
-
output_modalities: ["text"],
|
|
211
|
-
},
|
|
212
|
-
},
|
|
213
|
-
],
|
|
214
|
-
});
|
|
171
|
+
it("should detect image capability when modalities.vision is true", async () => {
|
|
172
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
|
|
215
173
|
|
|
216
|
-
const model = new RouterModel(createModel());
|
|
174
|
+
const model = new RouterModel(createModel(), createMockServer());
|
|
217
175
|
const capabilities = await model.getCapabilities();
|
|
218
176
|
|
|
219
177
|
expect(capabilities).toEqual(["text", "image"]);
|
|
220
|
-
expect(mockRpc).toHaveBeenCalledWith(
|
|
178
|
+
expect(mockRpc).toHaveBeenCalledWith(
|
|
179
|
+
"/props?model=test-model&autoload=false",
|
|
180
|
+
);
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("should return text-only when fetchModelProps fails", async () => {
|
|
184
|
+
// First call (fetchModelProps) throws to trigger fallback
|
|
185
|
+
mockRpc.mockRejectedValueOnce(new Error("props not available"));
|
|
186
|
+
// Second call (fetchModels) returns empty data so model is not found
|
|
187
|
+
mockRpc.mockResolvedValueOnce({ data: [] });
|
|
188
|
+
|
|
189
|
+
const model = new RouterModel(createModel(), createMockServer());
|
|
190
|
+
const capabilities = await model.getCapabilities();
|
|
191
|
+
|
|
192
|
+
expect(capabilities).toEqual(["text"]);
|
|
221
193
|
});
|
|
222
194
|
|
|
223
195
|
it("should detect text-only capability when only text in input_modalities", async () => {
|
|
196
|
+
// First call (fetchModelProps) throws to trigger fallback
|
|
197
|
+
mockRpc.mockRejectedValueOnce(new Error("props not available"));
|
|
198
|
+
// Second call (fetchModels) returns the data
|
|
224
199
|
mockRpc.mockResolvedValueOnce({
|
|
225
200
|
data: [
|
|
226
201
|
{
|
|
@@ -239,13 +214,16 @@ describe("RouterModel capabilities detection", () => {
|
|
|
239
214
|
],
|
|
240
215
|
});
|
|
241
216
|
|
|
242
|
-
const model = new RouterModel(createModel());
|
|
217
|
+
const model = new RouterModel(createModel(), createMockServer());
|
|
243
218
|
const capabilities = await model.getCapabilities();
|
|
244
219
|
|
|
245
220
|
expect(capabilities).toEqual(["text"]);
|
|
246
221
|
});
|
|
247
222
|
|
|
248
223
|
it("should return text when model not found in /models response", async () => {
|
|
224
|
+
// First call (fetchModelProps) throws to trigger fallback
|
|
225
|
+
mockRpc.mockRejectedValueOnce(new Error("props not available"));
|
|
226
|
+
// Second call (fetchModels) returns data without matching model
|
|
249
227
|
mockRpc.mockResolvedValueOnce({
|
|
250
228
|
data: [
|
|
251
229
|
{
|
|
@@ -260,7 +238,7 @@ describe("RouterModel capabilities detection", () => {
|
|
|
260
238
|
],
|
|
261
239
|
});
|
|
262
240
|
|
|
263
|
-
const model = new RouterModel(createModel());
|
|
241
|
+
const model = new RouterModel(createModel(), createMockServer());
|
|
264
242
|
const capabilities = await model.getCapabilities();
|
|
265
243
|
|
|
266
244
|
expect(capabilities).toEqual(["text"]);
|
|
@@ -269,7 +247,7 @@ describe("RouterModel capabilities detection", () => {
|
|
|
269
247
|
|
|
270
248
|
describe("RouterModel mode", () => {
|
|
271
249
|
it("should always return ROUTER mode", () => {
|
|
272
|
-
const model = new RouterModel(createModel());
|
|
250
|
+
const model = new RouterModel(createModel(), createMockServer());
|
|
273
251
|
expect(model.mode).toBe(Mode.ROUTER);
|
|
274
252
|
});
|
|
275
253
|
});
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { ServerStatus } from "../src/enums/serverStatus";
|
|
3
|
+
import { Server } from "../src/server";
|
|
4
|
+
import { createMockServer, mockRpc } from "./mocks";
|
|
5
|
+
|
|
6
|
+
beforeEach(() => {
|
|
7
|
+
mockRpc.mockClear();
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
describe("Server providerId", () => {
|
|
11
|
+
it("should generate a unique provider ID from baseUrl", () => {
|
|
12
|
+
const server = new Server("http://127.0.0.1:8080");
|
|
13
|
+
expect(server.providerId).toBe("llama-server=http://127.0.0.1:8080");
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("should generate different IDs for different baseUrls", () => {
|
|
17
|
+
const server1 = new Server("http://127.0.0.1:8080");
|
|
18
|
+
const server2 = new Server("http://127.0.0.1:8081");
|
|
19
|
+
expect(server1.providerId).not.toBe(server2.providerId);
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("Server providerName", () => {
|
|
24
|
+
it("should generate a human-readable provider name", () => {
|
|
25
|
+
const server = new Server("http://127.0.0.1:8080");
|
|
26
|
+
expect(server.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe("Server fetchModels", () => {
|
|
31
|
+
it("should call the /models endpoint", async () => {
|
|
32
|
+
mockRpc.mockResolvedValueOnce({
|
|
33
|
+
data: [{ id: "model1" }],
|
|
34
|
+
models: [{ id: "model1" }],
|
|
35
|
+
object: "list",
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const server = createMockServer();
|
|
39
|
+
const result = await server.fetchModels();
|
|
40
|
+
|
|
41
|
+
expect(result).toEqual({
|
|
42
|
+
data: [{ id: "model1" }],
|
|
43
|
+
models: [{ id: "model1" }],
|
|
44
|
+
object: "list",
|
|
45
|
+
});
|
|
46
|
+
expect(mockRpc).toHaveBeenCalledWith("/v1/models");
|
|
47
|
+
});
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe("Server fetchModelProps", () => {
|
|
51
|
+
it("should call the /props endpoint with model id", async () => {
|
|
52
|
+
mockRpc.mockResolvedValueOnce({
|
|
53
|
+
is_sleeping: false,
|
|
54
|
+
default_generation_settings: {},
|
|
55
|
+
total_slots: 1,
|
|
56
|
+
model_alias: "test",
|
|
57
|
+
model_path: "/path/to/model.gguf",
|
|
58
|
+
modalities: { vision: false, audio: false },
|
|
59
|
+
media_marker: "",
|
|
60
|
+
endpoint_slots: false,
|
|
61
|
+
endpoint_props: false,
|
|
62
|
+
endpoint_metrics: false,
|
|
63
|
+
webui: false,
|
|
64
|
+
webui_settings: {},
|
|
65
|
+
chat_template: "",
|
|
66
|
+
chat_template_caps: {},
|
|
67
|
+
bos_token: "",
|
|
68
|
+
eos_token: "",
|
|
69
|
+
build_info: "",
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const server = createMockServer();
|
|
73
|
+
const result = await server.fetchModelProps("test-model");
|
|
74
|
+
|
|
75
|
+
expect(result.is_sleeping).toBe(false);
|
|
76
|
+
expect(mockRpc).toHaveBeenCalledWith(
|
|
77
|
+
"/props?model=test-model&autoload=false",
|
|
78
|
+
);
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
describe("Server fetchServerHealth", () => {
|
|
83
|
+
it("should call the /health endpoint", async () => {
|
|
84
|
+
mockRpc.mockResolvedValueOnce({ status: "ok" });
|
|
85
|
+
|
|
86
|
+
const server = createMockServer();
|
|
87
|
+
const result = await server.fetchServerHealth();
|
|
88
|
+
|
|
89
|
+
expect(result).toEqual({ status: "ok" });
|
|
90
|
+
expect(mockRpc).toHaveBeenCalledWith("/health");
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe("Server fetchServerProps", () => {
|
|
95
|
+
it("should call the /props endpoint without model", async () => {
|
|
96
|
+
mockRpc.mockResolvedValueOnce({
|
|
97
|
+
role: "router",
|
|
98
|
+
default_generation_settings: {},
|
|
99
|
+
total_slots: 2,
|
|
100
|
+
model_alias: "",
|
|
101
|
+
model_path: "",
|
|
102
|
+
modalities: { vision: false, audio: false },
|
|
103
|
+
media_marker: "",
|
|
104
|
+
endpoint_slots: false,
|
|
105
|
+
endpoint_props: false,
|
|
106
|
+
endpoint_metrics: false,
|
|
107
|
+
webui: false,
|
|
108
|
+
webui_settings: {},
|
|
109
|
+
chat_template: "",
|
|
110
|
+
chat_template_caps: {},
|
|
111
|
+
bos_token: "",
|
|
112
|
+
eos_token: "",
|
|
113
|
+
build_info: "",
|
|
114
|
+
is_sleeping: false,
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
const server = createMockServer();
|
|
118
|
+
const result = await server.fetchServerProps();
|
|
119
|
+
|
|
120
|
+
expect(result.role).toBe("router");
|
|
121
|
+
expect(mockRpc).toHaveBeenCalledWith("/props?autoload=false");
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
describe("Server postRequest", () => {
|
|
126
|
+
it("should call /models/load with model in body", async () => {
|
|
127
|
+
mockRpc.mockResolvedValueOnce({});
|
|
128
|
+
|
|
129
|
+
const server = createMockServer();
|
|
130
|
+
await server.postRequest("load", "test-model");
|
|
131
|
+
|
|
132
|
+
expect(mockRpc).toHaveBeenCalledWith("/models/load", {
|
|
133
|
+
model: "test-model",
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("should call /models/unload with model in body", async () => {
|
|
138
|
+
mockRpc.mockResolvedValueOnce({});
|
|
139
|
+
|
|
140
|
+
const server = createMockServer();
|
|
141
|
+
await server.postRequest("unload", "test-model");
|
|
142
|
+
|
|
143
|
+
expect(mockRpc).toHaveBeenCalledWith("/models/unload", {
|
|
144
|
+
model: "test-model",
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
describe("Server isReady", () => {
|
|
150
|
+
it("should return READY when health status is ok", async () => {
|
|
151
|
+
mockRpc.mockResolvedValueOnce({ status: "ok" });
|
|
152
|
+
|
|
153
|
+
const server = createMockServer();
|
|
154
|
+
const status = await server.isReady(1000);
|
|
155
|
+
|
|
156
|
+
expect(status).toBe(ServerStatus.READY);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("should return UNREACHABLE when health check fails", async () => {
|
|
160
|
+
mockRpc.mockRejectedValueOnce(new Error("connection refused"));
|
|
161
|
+
|
|
162
|
+
const server = createMockServer();
|
|
163
|
+
const status = await server.isReady(1000);
|
|
164
|
+
|
|
165
|
+
expect(status).toBe(ServerStatus.UNREACHABLE);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("should return UNREACHABLE when health status is not ok", async () => {
|
|
169
|
+
mockRpc.mockResolvedValueOnce({ status: "error" });
|
|
170
|
+
|
|
171
|
+
const server = createMockServer();
|
|
172
|
+
const status = await server.isReady(1000);
|
|
173
|
+
|
|
174
|
+
expect(status).toBe(ServerStatus.UNREACHABLE);
|
|
175
|
+
});
|
|
176
|
+
});
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { ServerManager } from "../src/managers/server";
|
|
3
|
+
import { BaseModel } from "../src/models/baseModel";
|
|
4
|
+
import { Server } from "../src/server";
|
|
5
|
+
import { createMockServer, mockRpc } from "./mocks";
|
|
6
|
+
|
|
7
|
+
const mockPi = {
|
|
8
|
+
registerProvider: vi.fn(),
|
|
9
|
+
registerCommand: vi.fn(),
|
|
10
|
+
setModel: vi.fn(),
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
beforeEach(() => {
|
|
14
|
+
vi.clearAllMocks();
|
|
15
|
+
mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
|
|
16
|
+
const defaults: Record<string, unknown> = {
|
|
17
|
+
"/health": { status: "ok" },
|
|
18
|
+
"/props?autoload=false": { role: "router" },
|
|
19
|
+
"/v1/models": { data: [], object: "list" },
|
|
20
|
+
};
|
|
21
|
+
return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
describe("Server", () => {
|
|
26
|
+
it("should generate provider IDs from URLs", () => {
|
|
27
|
+
const server1 = new Server("http://127.0.0.1:8080");
|
|
28
|
+
expect(server1.providerId).toBe("llama-server=http://127.0.0.1:8080");
|
|
29
|
+
const server2 = new Server("http://10.0.0.5:8080");
|
|
30
|
+
expect(server2.providerId).toBe("llama-server=http://10.0.0.5:8080");
|
|
31
|
+
const server3 = new Server("http://127.0.0.1");
|
|
32
|
+
expect(server3.providerId).toBe("llama-server=http://127.0.0.1");
|
|
33
|
+
const server4 = new Server("http://127.0.0.1:80");
|
|
34
|
+
expect(server4.providerId).toBe("llama-server=http://127.0.0.1:80");
|
|
35
|
+
const server5 = new Server("https://127.0.0.1:443");
|
|
36
|
+
expect(server5.providerId).toBe("llama-server=https://127.0.0.1:443");
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("should generate provider names from URLs", () => {
|
|
40
|
+
const server1 = new Server("http://127.0.0.1:8080");
|
|
41
|
+
expect(server1.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
|
|
42
|
+
const server2 = new Server("http://10.0.0.5:8080");
|
|
43
|
+
expect(server2.providerName).toBe("Llama.cpp (http://10.0.0.5:8080)");
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe("ServerManager", () => {
|
|
48
|
+
it("should register providers for all servers", async () => {
|
|
49
|
+
const mockModel = {
|
|
50
|
+
name: "test-model",
|
|
51
|
+
id: "test-model",
|
|
52
|
+
toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
|
|
53
|
+
} as unknown as BaseModel;
|
|
54
|
+
mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
|
|
55
|
+
if (endpoint === "/v1/models") {
|
|
56
|
+
return Promise.resolve({ data: [mockModel], object: "list" });
|
|
57
|
+
}
|
|
58
|
+
const defaults: Record<string, unknown> = {
|
|
59
|
+
"/health": { status: "ok" },
|
|
60
|
+
"/props?autoload=false": { role: "router" },
|
|
61
|
+
};
|
|
62
|
+
return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const server1 = createMockServer({
|
|
66
|
+
baseUrl: "http://127.0.0.1:8080",
|
|
67
|
+
apiKey: "key-1",
|
|
68
|
+
providerId: "llama-server=http://127.0.0.1:8080",
|
|
69
|
+
providerName: "Llama.cpp (http://127.0.0.1:8080)",
|
|
70
|
+
});
|
|
71
|
+
const server2 = createMockServer({
|
|
72
|
+
baseUrl: "http://127.0.0.1:8081",
|
|
73
|
+
apiKey: "key-2",
|
|
74
|
+
providerId: "llama-server=http://127.0.0.1:8081",
|
|
75
|
+
providerName: "Llama.cpp (http://127.0.0.1:8081)",
|
|
76
|
+
});
|
|
77
|
+
const manager = new ServerManager([server1, server2] as any);
|
|
78
|
+
|
|
79
|
+
await manager.initialize(mockPi as any);
|
|
80
|
+
|
|
81
|
+
expect(mockPi.registerProvider).toHaveBeenCalledTimes(2);
|
|
82
|
+
expect(mockPi.registerProvider).toHaveBeenCalledWith(
|
|
83
|
+
"llama-server=http://127.0.0.1:8080",
|
|
84
|
+
{
|
|
85
|
+
name: "Llama.cpp (http://127.0.0.1:8080)",
|
|
86
|
+
baseUrl: "http://127.0.0.1:8080",
|
|
87
|
+
api: "openai-completions",
|
|
88
|
+
apiKey: "key-1",
|
|
89
|
+
models: [{ id: "test-model" }],
|
|
90
|
+
},
|
|
91
|
+
);
|
|
92
|
+
expect(mockPi.registerProvider).toHaveBeenCalledWith(
|
|
93
|
+
"llama-server=http://127.0.0.1:8081",
|
|
94
|
+
{
|
|
95
|
+
name: "Llama.cpp (http://127.0.0.1:8081)",
|
|
96
|
+
baseUrl: "http://127.0.0.1:8081",
|
|
97
|
+
api: "openai-completions",
|
|
98
|
+
apiKey: "key-2",
|
|
99
|
+
models: [{ id: "test-model" }],
|
|
100
|
+
},
|
|
101
|
+
);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("should return all models from all servers", () => {
|
|
105
|
+
const mockModel1 = {
|
|
106
|
+
name: "model-1",
|
|
107
|
+
id: "model-1",
|
|
108
|
+
} as unknown as BaseModel;
|
|
109
|
+
const mockModel2 = {
|
|
110
|
+
name: "model-2",
|
|
111
|
+
id: "model-2",
|
|
112
|
+
} as unknown as BaseModel;
|
|
113
|
+
const server1 = createMockServer({
|
|
114
|
+
baseUrl: "http://127.0.0.1:8080",
|
|
115
|
+
});
|
|
116
|
+
const server2 = createMockServer({
|
|
117
|
+
baseUrl: "http://127.0.0.1:8081",
|
|
118
|
+
});
|
|
119
|
+
const manager = new ServerManager([
|
|
120
|
+
{ ...server1, models: [mockModel1] } as any,
|
|
121
|
+
{ ...server2, models: [mockModel2] } as any,
|
|
122
|
+
] as any);
|
|
123
|
+
|
|
124
|
+
const allModels = manager.getAllModels();
|
|
125
|
+
|
|
126
|
+
expect(allModels).toHaveLength(2);
|
|
127
|
+
expect(allModels[0]).toBe(mockModel1);
|
|
128
|
+
expect(allModels[1]).toBe(mockModel2);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
@@ -1,29 +1,26 @@
|
|
|
1
|
-
import { beforeEach, describe, expect, it
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
2
|
import { Mode } from "../src/enums/mode";
|
|
3
3
|
import { Status } from "../src/enums/status";
|
|
4
|
-
import {
|
|
4
|
+
import { DataProperty } from "../src/interfaces/endpoints/models";
|
|
5
5
|
import { SingleModel } from "../src/models/singleModel";
|
|
6
|
-
|
|
7
|
-
const mockRpc = vi.fn();
|
|
8
|
-
|
|
9
|
-
vi.mock("../src/tools/retriever", () => ({
|
|
10
|
-
rpc: (...args: unknown[]) => mockRpc(...args),
|
|
11
|
-
isServerReady: vi.fn(),
|
|
12
|
-
listModels: vi.fn(),
|
|
13
|
-
}));
|
|
6
|
+
import { createMockServer, mockRpc } from "./mocks";
|
|
14
7
|
|
|
15
8
|
beforeEach(() => {
|
|
16
|
-
mockRpc.
|
|
9
|
+
mockRpc.mockReset();
|
|
17
10
|
});
|
|
18
11
|
|
|
19
|
-
const createModel = (extra: Partial<
|
|
20
|
-
new SingleModel(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
12
|
+
const createModel = (extra: Partial<DataProperty> = {}): SingleModel =>
|
|
13
|
+
new SingleModel(
|
|
14
|
+
{
|
|
15
|
+
id: "test",
|
|
16
|
+
tags: [],
|
|
17
|
+
object: "model",
|
|
18
|
+
owned_by: "test",
|
|
19
|
+
created: Date.now(),
|
|
20
|
+
...extra,
|
|
21
|
+
},
|
|
22
|
+
createMockServer(),
|
|
23
|
+
);
|
|
27
24
|
|
|
28
25
|
describe("SingleModel mode", () => {
|
|
29
26
|
it("should always return SINGLE mode", () => {
|
|
@@ -34,21 +31,16 @@ describe("SingleModel mode", () => {
|
|
|
34
31
|
|
|
35
32
|
describe("SingleModel capabilities", () => {
|
|
36
33
|
it("should detect image capability when multimodal is in capabilities", async () => {
|
|
37
|
-
mockRpc.mockResolvedValueOnce({
|
|
38
|
-
models: [{ id: "test", capabilities: ["multimodal"] }],
|
|
39
|
-
});
|
|
34
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
|
|
40
35
|
|
|
41
36
|
const model = createModel();
|
|
42
37
|
const capabilities = await model.getCapabilities();
|
|
43
38
|
|
|
44
39
|
expect(capabilities).toEqual(["text", "image"]);
|
|
45
|
-
expect(mockRpc).toHaveBeenCalledWith("/models");
|
|
46
40
|
});
|
|
47
41
|
|
|
48
42
|
it("should detect text-only capability when multimodal is not in capabilities", async () => {
|
|
49
|
-
mockRpc.mockResolvedValueOnce({
|
|
50
|
-
models: [{ id: "test", capabilities: [] }],
|
|
51
|
-
});
|
|
43
|
+
mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
|
|
52
44
|
|
|
53
45
|
const model = createModel();
|
|
54
46
|
const capabilities = await model.getCapabilities();
|
|
@@ -81,8 +73,8 @@ describe("SingleModel getStatus", () => {
|
|
|
81
73
|
});
|
|
82
74
|
|
|
83
75
|
describe("SingleModel getContextSize", () => {
|
|
84
|
-
it("should return n_ctx from /models endpoint meta", async () => {
|
|
85
|
-
mockRpc.
|
|
76
|
+
it("should return n_ctx from /v1/models endpoint meta", async () => {
|
|
77
|
+
mockRpc.mockResolvedValue({
|
|
86
78
|
data: [{ id: "test", meta: { n_ctx: 8192 } }],
|
|
87
79
|
});
|
|
88
80
|
|
|
@@ -90,6 +82,6 @@ describe("SingleModel getContextSize", () => {
|
|
|
90
82
|
const ctxSize = await model.getContextSize();
|
|
91
83
|
|
|
92
84
|
expect(ctxSize).toBe(8192);
|
|
93
|
-
expect(mockRpc).toHaveBeenCalledWith("/models");
|
|
85
|
+
expect(mockRpc).toHaveBeenCalledWith("/v1/models");
|
|
94
86
|
});
|
|
95
87
|
});
|