pi-llama-cpp 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,8 @@
1
- import { describe, expect, it, vi } from "vitest";
1
+ import { beforeEach, describe, expect, it } from "vitest";
2
2
  import { Mode } from "../src/enums/mode";
3
3
  import { DataProperty } from "../src/interfaces/endpoints/models";
4
4
  import { RouterModel } from "../src/models/routerModel";
5
-
6
- // Mock the retriever module before importing anything that depends on it
7
- const mockRpc = vi.fn();
8
-
9
- vi.mock("../src/tools/retriever", () => ({
10
- rpc: (...args: unknown[]) => mockRpc(...args),
11
- isServerReady: vi.fn(),
12
- listModels: vi.fn(),
13
- }));
5
+ import { createMockServer, mockRpc } from "./mocks";
14
6
 
15
7
  // Helper to create a mock DataProperty
16
8
  const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
@@ -24,6 +16,10 @@ const createModel = (overrides: Partial<DataProperty> = {}): DataProperty => ({
24
16
  ...overrides,
25
17
  });
26
18
 
19
+ beforeEach(() => {
20
+ mockRpc.mockClear();
21
+ });
22
+
27
23
  describe("RouterModel context size extraction", () => {
28
24
  it("should extract --ctx-size value", () => {
29
25
  const model = new RouterModel(
@@ -41,6 +37,7 @@ describe("RouterModel context size extraction", () => {
41
37
  preset: "default",
42
38
  },
43
39
  }),
40
+ createMockServer(),
44
41
  );
45
42
 
46
43
  // Access the private method via any
@@ -57,6 +54,7 @@ describe("RouterModel context size extraction", () => {
57
54
  preset: "default",
58
55
  },
59
56
  }),
57
+ createMockServer(),
60
58
  );
61
59
 
62
60
  const extractFrom = (model as any).extractFrom.bind(model);
@@ -72,6 +70,7 @@ describe("RouterModel context size extraction", () => {
72
70
  preset: "default",
73
71
  },
74
72
  }),
73
+ createMockServer(),
75
74
  );
76
75
 
77
76
  const extractFrom = (model as any).extractFrom.bind(model);
@@ -88,6 +87,7 @@ describe("RouterModel context size extraction", () => {
88
87
  preset: "default",
89
88
  },
90
89
  }),
90
+ createMockServer(),
91
91
  );
92
92
 
93
93
  const extractFrom = (model as any).extractFrom.bind(model);
@@ -103,6 +103,7 @@ describe("RouterModel context size extraction", () => {
103
103
  preset: "default",
104
104
  },
105
105
  }),
106
+ createMockServer(),
106
107
  );
107
108
 
108
109
  const extractFrom = (model as any).extractFrom.bind(model);
@@ -110,27 +111,9 @@ describe("RouterModel context size extraction", () => {
110
111
  });
111
112
 
112
113
  it("should prefer --ctx-size over --fit-ctx when loaded", async () => {
113
- // First call: getStatus() -> /models
114
- mockRpc.mockResolvedValueOnce({
115
- data: [
116
- {
117
- id: "test-model",
118
- status: {
119
- value: "loaded",
120
- args: [
121
- "--model",
122
- "gguf",
123
- "--ctx-size",
124
- "4096",
125
- "--fit-ctx",
126
- "8192",
127
- ],
128
- preset: "default",
129
- },
130
- },
131
- ],
132
- });
133
- // Second call: super.getContextSize() -> /models with meta.n_ctx
114
+ // First call: getStatus() -> fetchModelProps
115
+ mockRpc.mockResolvedValueOnce({ is_sleeping: false });
116
+ // Second call: super.getContextSize() -> fetchModels with meta.n_ctx
134
117
  mockRpc.mockResolvedValueOnce({
135
118
  data: [
136
119
  {
@@ -148,6 +131,7 @@ describe("RouterModel context size extraction", () => {
148
131
  preset: "default",
149
132
  },
150
133
  }),
134
+ createMockServer(),
151
135
  );
152
136
 
153
137
  const ctxSize = await model.getContextSize();
@@ -155,20 +139,9 @@ describe("RouterModel context size extraction", () => {
155
139
  });
156
140
 
157
141
  it("should return n_ctx from meta when loaded without context size args", async () => {
158
- // First call: getStatus() -> /models
159
- mockRpc.mockResolvedValueOnce({
160
- data: [
161
- {
162
- id: "test-model",
163
- status: {
164
- value: "loaded",
165
- args: ["--model", "gguf"],
166
- preset: "default",
167
- },
168
- },
169
- ],
170
- });
171
- // Second call: super.getContextSize() -> /models with meta.n_ctx
142
+ // First call: getStatus() -> fetchModelProps
143
+ mockRpc.mockResolvedValueOnce({ is_sleeping: false });
144
+ // Second call: super.getContextSize() -> fetchModels with meta.n_ctx
172
145
  mockRpc.mockResolvedValueOnce({
173
146
  data: [
174
147
  {
@@ -186,6 +159,7 @@ describe("RouterModel context size extraction", () => {
186
159
  preset: "default",
187
160
  },
188
161
  }),
162
+ createMockServer(),
189
163
  );
190
164
 
191
165
  const ctxSize = await model.getContextSize();
@@ -194,33 +168,34 @@ describe("RouterModel context size extraction", () => {
194
168
  });
195
169
 
196
170
  describe("RouterModel capabilities detection", () => {
197
- it("should detect image capability from architecture.input_modalities", async () => {
198
- mockRpc.mockResolvedValueOnce({
199
- data: [
200
- {
201
- id: "test-model",
202
- status: {
203
- value: "loaded",
204
- args: [],
205
- preset: "default",
206
- failed: false,
207
- },
208
- architecture: {
209
- input_modalities: ["text", "image"],
210
- output_modalities: ["text"],
211
- },
212
- },
213
- ],
214
- });
171
+ it("should detect image capability when modalities.vision is true", async () => {
172
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
215
173
 
216
- const model = new RouterModel(createModel());
174
+ const model = new RouterModel(createModel(), createMockServer());
217
175
  const capabilities = await model.getCapabilities();
218
176
 
219
177
  expect(capabilities).toEqual(["text", "image"]);
220
- expect(mockRpc).toHaveBeenCalledWith("/models");
178
+ expect(mockRpc).toHaveBeenCalledWith(
179
+ "/props?model=test-model&autoload=false",
180
+ );
181
+ });
182
+
183
+ it("should return text-only when fetchModelProps fails", async () => {
184
+ // First call (fetchModelProps) throws to trigger fallback
185
+ mockRpc.mockRejectedValueOnce(new Error("props not available"));
186
+ // Second call (fetchModels) returns empty data so model is not found
187
+ mockRpc.mockResolvedValueOnce({ data: [] });
188
+
189
+ const model = new RouterModel(createModel(), createMockServer());
190
+ const capabilities = await model.getCapabilities();
191
+
192
+ expect(capabilities).toEqual(["text"]);
221
193
  });
222
194
 
223
195
  it("should detect text-only capability when only text in input_modalities", async () => {
196
+ // First call (fetchModelProps) throws to trigger fallback
197
+ mockRpc.mockRejectedValueOnce(new Error("props not available"));
198
+ // Second call (fetchModels) returns the data
224
199
  mockRpc.mockResolvedValueOnce({
225
200
  data: [
226
201
  {
@@ -239,13 +214,16 @@ describe("RouterModel capabilities detection", () => {
239
214
  ],
240
215
  });
241
216
 
242
- const model = new RouterModel(createModel());
217
+ const model = new RouterModel(createModel(), createMockServer());
243
218
  const capabilities = await model.getCapabilities();
244
219
 
245
220
  expect(capabilities).toEqual(["text"]);
246
221
  });
247
222
 
248
223
  it("should return text when model not found in /models response", async () => {
224
+ // First call (fetchModelProps) throws to trigger fallback
225
+ mockRpc.mockRejectedValueOnce(new Error("props not available"));
226
+ // Second call (fetchModels) returns data without matching model
249
227
  mockRpc.mockResolvedValueOnce({
250
228
  data: [
251
229
  {
@@ -260,7 +238,7 @@ describe("RouterModel capabilities detection", () => {
260
238
  ],
261
239
  });
262
240
 
263
- const model = new RouterModel(createModel());
241
+ const model = new RouterModel(createModel(), createMockServer());
264
242
  const capabilities = await model.getCapabilities();
265
243
 
266
244
  expect(capabilities).toEqual(["text"]);
@@ -269,7 +247,7 @@ describe("RouterModel capabilities detection", () => {
269
247
 
270
248
  describe("RouterModel mode", () => {
271
249
  it("should always return ROUTER mode", () => {
272
- const model = new RouterModel(createModel());
250
+ const model = new RouterModel(createModel(), createMockServer());
273
251
  expect(model.mode).toBe(Mode.ROUTER);
274
252
  });
275
253
  });
@@ -0,0 +1,176 @@
1
+ import { beforeEach, describe, expect, it } from "vitest";
2
+ import { ServerStatus } from "../src/enums/serverStatus";
3
+ import { Server } from "../src/server";
4
+ import { createMockServer, mockRpc } from "./mocks";
5
+
6
+ beforeEach(() => {
7
+ mockRpc.mockClear();
8
+ });
9
+
10
+ describe("Server providerId", () => {
11
+ it("should generate a unique provider ID from baseUrl", () => {
12
+ const server = new Server("http://127.0.0.1:8080");
13
+ expect(server.providerId).toBe("llama-server=http://127.0.0.1:8080");
14
+ });
15
+
16
+ it("should generate different IDs for different baseUrls", () => {
17
+ const server1 = new Server("http://127.0.0.1:8080");
18
+ const server2 = new Server("http://127.0.0.1:8081");
19
+ expect(server1.providerId).not.toBe(server2.providerId);
20
+ });
21
+ });
22
+
23
+ describe("Server providerName", () => {
24
+ it("should generate a human-readable provider name", () => {
25
+ const server = new Server("http://127.0.0.1:8080");
26
+ expect(server.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
27
+ });
28
+ });
29
+
30
+ describe("Server fetchModels", () => {
31
+ it("should call the /models endpoint", async () => {
32
+ mockRpc.mockResolvedValueOnce({
33
+ data: [{ id: "model1" }],
34
+ models: [{ id: "model1" }],
35
+ object: "list",
36
+ });
37
+
38
+ const server = createMockServer();
39
+ const result = await server.fetchModels();
40
+
41
+ expect(result).toEqual({
42
+ data: [{ id: "model1" }],
43
+ models: [{ id: "model1" }],
44
+ object: "list",
45
+ });
46
+ expect(mockRpc).toHaveBeenCalledWith("/v1/models");
47
+ });
48
+ });
49
+
50
+ describe("Server fetchModelProps", () => {
51
+ it("should call the /props endpoint with model id", async () => {
52
+ mockRpc.mockResolvedValueOnce({
53
+ is_sleeping: false,
54
+ default_generation_settings: {},
55
+ total_slots: 1,
56
+ model_alias: "test",
57
+ model_path: "/path/to/model.gguf",
58
+ modalities: { vision: false, audio: false },
59
+ media_marker: "",
60
+ endpoint_slots: false,
61
+ endpoint_props: false,
62
+ endpoint_metrics: false,
63
+ webui: false,
64
+ webui_settings: {},
65
+ chat_template: "",
66
+ chat_template_caps: {},
67
+ bos_token: "",
68
+ eos_token: "",
69
+ build_info: "",
70
+ });
71
+
72
+ const server = createMockServer();
73
+ const result = await server.fetchModelProps("test-model");
74
+
75
+ expect(result.is_sleeping).toBe(false);
76
+ expect(mockRpc).toHaveBeenCalledWith(
77
+ "/props?model=test-model&autoload=false",
78
+ );
79
+ });
80
+ });
81
+
82
+ describe("Server fetchServerHealth", () => {
83
+ it("should call the /health endpoint", async () => {
84
+ mockRpc.mockResolvedValueOnce({ status: "ok" });
85
+
86
+ const server = createMockServer();
87
+ const result = await server.fetchServerHealth();
88
+
89
+ expect(result).toEqual({ status: "ok" });
90
+ expect(mockRpc).toHaveBeenCalledWith("/health");
91
+ });
92
+ });
93
+
94
+ describe("Server fetchServerProps", () => {
95
+ it("should call the /props endpoint without model", async () => {
96
+ mockRpc.mockResolvedValueOnce({
97
+ role: "router",
98
+ default_generation_settings: {},
99
+ total_slots: 2,
100
+ model_alias: "",
101
+ model_path: "",
102
+ modalities: { vision: false, audio: false },
103
+ media_marker: "",
104
+ endpoint_slots: false,
105
+ endpoint_props: false,
106
+ endpoint_metrics: false,
107
+ webui: false,
108
+ webui_settings: {},
109
+ chat_template: "",
110
+ chat_template_caps: {},
111
+ bos_token: "",
112
+ eos_token: "",
113
+ build_info: "",
114
+ is_sleeping: false,
115
+ });
116
+
117
+ const server = createMockServer();
118
+ const result = await server.fetchServerProps();
119
+
120
+ expect(result.role).toBe("router");
121
+ expect(mockRpc).toHaveBeenCalledWith("/props?autoload=false");
122
+ });
123
+ });
124
+
125
+ describe("Server postRequest", () => {
126
+ it("should call /models/load with model in body", async () => {
127
+ mockRpc.mockResolvedValueOnce({});
128
+
129
+ const server = createMockServer();
130
+ await server.postRequest("load", "test-model");
131
+
132
+ expect(mockRpc).toHaveBeenCalledWith("/models/load", {
133
+ model: "test-model",
134
+ });
135
+ });
136
+
137
+ it("should call /models/unload with model in body", async () => {
138
+ mockRpc.mockResolvedValueOnce({});
139
+
140
+ const server = createMockServer();
141
+ await server.postRequest("unload", "test-model");
142
+
143
+ expect(mockRpc).toHaveBeenCalledWith("/models/unload", {
144
+ model: "test-model",
145
+ });
146
+ });
147
+ });
148
+
149
+ describe("Server isReady", () => {
150
+ it("should return READY when health status is ok", async () => {
151
+ mockRpc.mockResolvedValueOnce({ status: "ok" });
152
+
153
+ const server = createMockServer();
154
+ const status = await server.isReady(1000);
155
+
156
+ expect(status).toBe(ServerStatus.READY);
157
+ });
158
+
159
+ it("should return UNREACHABLE when health check fails", async () => {
160
+ mockRpc.mockRejectedValueOnce(new Error("connection refused"));
161
+
162
+ const server = createMockServer();
163
+ const status = await server.isReady(1000);
164
+
165
+ expect(status).toBe(ServerStatus.UNREACHABLE);
166
+ });
167
+
168
+ it("should return UNREACHABLE when health status is not ok", async () => {
169
+ mockRpc.mockResolvedValueOnce({ status: "error" });
170
+
171
+ const server = createMockServer();
172
+ const status = await server.isReady(1000);
173
+
174
+ expect(status).toBe(ServerStatus.UNREACHABLE);
175
+ });
176
+ });
@@ -0,0 +1,130 @@
1
+ import { beforeEach, describe, expect, it, vi } from "vitest";
2
+ import { ServerManager } from "../src/managers/server";
3
+ import { BaseModel } from "../src/models/baseModel";
4
+ import { Server } from "../src/server";
5
+ import { createMockServer, mockRpc } from "./mocks";
6
+
7
+ const mockPi = {
8
+ registerProvider: vi.fn(),
9
+ registerCommand: vi.fn(),
10
+ setModel: vi.fn(),
11
+ };
12
+
13
+ beforeEach(() => {
14
+ vi.clearAllMocks();
15
+ mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
16
+ const defaults: Record<string, unknown> = {
17
+ "/health": { status: "ok" },
18
+ "/props?autoload=false": { role: "router" },
19
+ "/v1/models": { data: [], object: "list" },
20
+ };
21
+ return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
22
+ });
23
+ });
24
+
25
+ describe("Server", () => {
26
+ it("should generate provider IDs from URLs", () => {
27
+ const server1 = new Server("http://127.0.0.1:8080");
28
+ expect(server1.providerId).toBe("llama-server=http://127.0.0.1:8080");
29
+ const server2 = new Server("http://10.0.0.5:8080");
30
+ expect(server2.providerId).toBe("llama-server=http://10.0.0.5:8080");
31
+ const server3 = new Server("http://127.0.0.1");
32
+ expect(server3.providerId).toBe("llama-server=http://127.0.0.1");
33
+ const server4 = new Server("http://127.0.0.1:80");
34
+ expect(server4.providerId).toBe("llama-server=http://127.0.0.1:80");
35
+ const server5 = new Server("https://127.0.0.1:443");
36
+ expect(server5.providerId).toBe("llama-server=https://127.0.0.1:443");
37
+ });
38
+
39
+ it("should generate provider names from URLs", () => {
40
+ const server1 = new Server("http://127.0.0.1:8080");
41
+ expect(server1.providerName).toBe("Llama.cpp (http://127.0.0.1:8080)");
42
+ const server2 = new Server("http://10.0.0.5:8080");
43
+ expect(server2.providerName).toBe("Llama.cpp (http://10.0.0.5:8080)");
44
+ });
45
+ });
46
+
47
+ describe("ServerManager", () => {
48
+ it("should register providers for all servers", async () => {
49
+ const mockModel = {
50
+ name: "test-model",
51
+ id: "test-model",
52
+ toProviderConfig: vi.fn().mockResolvedValue({ id: "test-model" }),
53
+ } as unknown as BaseModel;
54
+ mockRpc.mockImplementation((endpoint: string, fallback?: unknown) => {
55
+ if (endpoint === "/v1/models") {
56
+ return Promise.resolve({ data: [mockModel], object: "list" });
57
+ }
58
+ const defaults: Record<string, unknown> = {
59
+ "/health": { status: "ok" },
60
+ "/props?autoload=false": { role: "router" },
61
+ };
62
+ return Promise.resolve(defaults[endpoint] ?? fallback ?? {});
63
+ });
64
+
65
+ const server1 = createMockServer({
66
+ baseUrl: "http://127.0.0.1:8080",
67
+ apiKey: "key-1",
68
+ providerId: "llama-server=http://127.0.0.1:8080",
69
+ providerName: "Llama.cpp (http://127.0.0.1:8080)",
70
+ });
71
+ const server2 = createMockServer({
72
+ baseUrl: "http://127.0.0.1:8081",
73
+ apiKey: "key-2",
74
+ providerId: "llama-server=http://127.0.0.1:8081",
75
+ providerName: "Llama.cpp (http://127.0.0.1:8081)",
76
+ });
77
+ const manager = new ServerManager([server1, server2] as any);
78
+
79
+ await manager.initialize(mockPi as any);
80
+
81
+ expect(mockPi.registerProvider).toHaveBeenCalledTimes(2);
82
+ expect(mockPi.registerProvider).toHaveBeenCalledWith(
83
+ "llama-server=http://127.0.0.1:8080",
84
+ {
85
+ name: "Llama.cpp (http://127.0.0.1:8080)",
86
+ baseUrl: "http://127.0.0.1:8080",
87
+ api: "openai-completions",
88
+ apiKey: "key-1",
89
+ models: [{ id: "test-model" }],
90
+ },
91
+ );
92
+ expect(mockPi.registerProvider).toHaveBeenCalledWith(
93
+ "llama-server=http://127.0.0.1:8081",
94
+ {
95
+ name: "Llama.cpp (http://127.0.0.1:8081)",
96
+ baseUrl: "http://127.0.0.1:8081",
97
+ api: "openai-completions",
98
+ apiKey: "key-2",
99
+ models: [{ id: "test-model" }],
100
+ },
101
+ );
102
+ });
103
+
104
+ it("should return all models from all servers", () => {
105
+ const mockModel1 = {
106
+ name: "model-1",
107
+ id: "model-1",
108
+ } as unknown as BaseModel;
109
+ const mockModel2 = {
110
+ name: "model-2",
111
+ id: "model-2",
112
+ } as unknown as BaseModel;
113
+ const server1 = createMockServer({
114
+ baseUrl: "http://127.0.0.1:8080",
115
+ });
116
+ const server2 = createMockServer({
117
+ baseUrl: "http://127.0.0.1:8081",
118
+ });
119
+ const manager = new ServerManager([
120
+ { ...server1, models: [mockModel1] } as any,
121
+ { ...server2, models: [mockModel2] } as any,
122
+ ] as any);
123
+
124
+ const allModels = manager.getAllModels();
125
+
126
+ expect(allModels).toHaveLength(2);
127
+ expect(allModels[0]).toBe(mockModel1);
128
+ expect(allModels[1]).toBe(mockModel2);
129
+ });
130
+ });
@@ -1,29 +1,26 @@
1
- import { beforeEach, describe, expect, it, vi } from "vitest";
1
+ import { beforeEach, describe, expect, it } from "vitest";
2
2
  import { Mode } from "../src/enums/mode";
3
3
  import { Status } from "../src/enums/status";
4
- import { ModelProperty } from "../src/interfaces/endpoints/models";
4
+ import { DataProperty } from "../src/interfaces/endpoints/models";
5
5
  import { SingleModel } from "../src/models/singleModel";
6
-
7
- const mockRpc = vi.fn();
8
-
9
- vi.mock("../src/tools/retriever", () => ({
10
- rpc: (...args: unknown[]) => mockRpc(...args),
11
- isServerReady: vi.fn(),
12
- listModels: vi.fn(),
13
- }));
6
+ import { createMockServer, mockRpc } from "./mocks";
14
7
 
15
8
  beforeEach(() => {
16
- mockRpc.mockClear();
9
+ mockRpc.mockReset();
17
10
  });
18
11
 
19
- const createModel = (extra: Partial<ModelProperty> = {}): SingleModel =>
20
- new SingleModel({
21
- id: "test",
22
- tags: [],
23
- object: "model",
24
- owned_by: "test",
25
- created: Date.now(),
26
- });
12
+ const createModel = (extra: Partial<DataProperty> = {}): SingleModel =>
13
+ new SingleModel(
14
+ {
15
+ id: "test",
16
+ tags: [],
17
+ object: "model",
18
+ owned_by: "test",
19
+ created: Date.now(),
20
+ ...extra,
21
+ },
22
+ createMockServer(),
23
+ );
27
24
 
28
25
  describe("SingleModel mode", () => {
29
26
  it("should always return SINGLE mode", () => {
@@ -34,21 +31,16 @@ describe("SingleModel mode", () => {
34
31
 
35
32
  describe("SingleModel capabilities", () => {
36
33
  it("should detect image capability when multimodal is in capabilities", async () => {
37
- mockRpc.mockResolvedValueOnce({
38
- models: [{ id: "test", capabilities: ["multimodal"] }],
39
- });
34
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: true } });
40
35
 
41
36
  const model = createModel();
42
37
  const capabilities = await model.getCapabilities();
43
38
 
44
39
  expect(capabilities).toEqual(["text", "image"]);
45
- expect(mockRpc).toHaveBeenCalledWith("/models");
46
40
  });
47
41
 
48
42
  it("should detect text-only capability when multimodal is not in capabilities", async () => {
49
- mockRpc.mockResolvedValueOnce({
50
- models: [{ id: "test", capabilities: [] }],
51
- });
43
+ mockRpc.mockResolvedValueOnce({ modalities: { vision: false } });
52
44
 
53
45
  const model = createModel();
54
46
  const capabilities = await model.getCapabilities();
@@ -81,8 +73,8 @@ describe("SingleModel getStatus", () => {
81
73
  });
82
74
 
83
75
  describe("SingleModel getContextSize", () => {
84
- it("should return n_ctx from /models endpoint meta", async () => {
85
- mockRpc.mockResolvedValueOnce({
76
+ it("should return n_ctx from /v1/models endpoint meta", async () => {
77
+ mockRpc.mockResolvedValue({
86
78
  data: [{ id: "test", meta: { n_ctx: 8192 } }],
87
79
  });
88
80
 
@@ -90,6 +82,6 @@ describe("SingleModel getContextSize", () => {
90
82
  const ctxSize = await model.getContextSize();
91
83
 
92
84
  expect(ctxSize).toBe(8192);
93
- expect(mockRpc).toHaveBeenCalledWith("/models");
85
+ expect(mockRpc).toHaveBeenCalledWith("/v1/models");
94
86
  });
95
87
  });