@appkit/llamacpp-cli 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +84 -0
- package/dist/cli.js +80 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/config.d.ts +1 -0
- package/dist/commands/config.d.ts.map +1 -1
- package/dist/commands/config.js +167 -12
- package/dist/commands/config.js.map +1 -1
- package/dist/commands/router/config.d.ts +10 -0
- package/dist/commands/router/config.d.ts.map +1 -0
- package/dist/commands/router/config.js +95 -0
- package/dist/commands/router/config.js.map +1 -0
- package/dist/commands/router/restart.d.ts +2 -0
- package/dist/commands/router/restart.d.ts.map +1 -0
- package/dist/commands/router/restart.js +39 -0
- package/dist/commands/router/restart.js.map +1 -0
- package/dist/commands/router/start.d.ts +2 -0
- package/dist/commands/router/start.d.ts.map +1 -0
- package/dist/commands/router/start.js +60 -0
- package/dist/commands/router/start.js.map +1 -0
- package/dist/commands/router/status.d.ts +2 -0
- package/dist/commands/router/status.d.ts.map +1 -0
- package/dist/commands/router/status.js +116 -0
- package/dist/commands/router/status.js.map +1 -0
- package/dist/commands/router/stop.d.ts +2 -0
- package/dist/commands/router/stop.d.ts.map +1 -0
- package/dist/commands/router/stop.js +36 -0
- package/dist/commands/router/stop.js.map +1 -0
- package/dist/lib/router-manager.d.ts +103 -0
- package/dist/lib/router-manager.d.ts.map +1 -0
- package/dist/lib/router-manager.js +393 -0
- package/dist/lib/router-manager.js.map +1 -0
- package/dist/lib/router-server.d.ts +52 -0
- package/dist/lib/router-server.d.ts.map +1 -0
- package/dist/lib/router-server.js +373 -0
- package/dist/lib/router-server.js.map +1 -0
- package/dist/types/router-config.d.ts +18 -0
- package/dist/types/router-config.d.ts.map +1 -0
- package/dist/types/router-config.js +3 -0
- package/dist/types/router-config.js.map +1 -0
- package/package.json +1 -1
- package/src/cli.ts +81 -0
- package/src/commands/config.ts +146 -14
- package/src/commands/router/config.ts +109 -0
- package/src/commands/router/restart.ts +36 -0
- package/src/commands/router/start.ts +60 -0
- package/src/commands/router/status.ts +119 -0
- package/src/commands/router/stop.ts +33 -0
- package/src/lib/router-manager.ts +413 -0
- package/src/lib/router-server.ts +407 -0
- package/src/types/router-config.ts +24 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import * as http from 'http';
|
|
4
|
+
import * as https from 'https';
|
|
5
|
+
import { URL } from 'url';
|
|
6
|
+
import * as fs from 'fs/promises';
|
|
7
|
+
import * as path from 'path';
|
|
8
|
+
import { RouterConfig } from '../types/router-config';
|
|
9
|
+
import { ServerConfig } from '../types/server-config';
|
|
10
|
+
import { readJson, fileExists, getConfigDir, getServersDir } from '../utils/file-utils';
|
|
11
|
+
|
|
12
|
+
interface ErrorResponse {
|
|
13
|
+
error: string;
|
|
14
|
+
details?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface ModelInfo {
|
|
18
|
+
id: string;
|
|
19
|
+
object: 'model';
|
|
20
|
+
created: number;
|
|
21
|
+
owned_by: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
interface ModelsResponse {
|
|
25
|
+
object: 'list';
|
|
26
|
+
data: ModelInfo[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Router HTTP server - proxies requests to backend llama.cpp servers
|
|
31
|
+
*/
|
|
32
|
+
class RouterServer {
|
|
33
|
+
private config!: RouterConfig;
|
|
34
|
+
private server!: http.Server;
|
|
35
|
+
|
|
36
|
+
async initialize(): Promise<void> {
|
|
37
|
+
// Load router config
|
|
38
|
+
const configPath = path.join(getConfigDir(), 'router.json');
|
|
39
|
+
if (!(await fileExists(configPath))) {
|
|
40
|
+
throw new Error('Router configuration not found');
|
|
41
|
+
}
|
|
42
|
+
this.config = await readJson<RouterConfig>(configPath);
|
|
43
|
+
|
|
44
|
+
// Create HTTP server
|
|
45
|
+
this.server = http.createServer(async (req, res) => {
|
|
46
|
+
await this.handleRequest(req, res);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
// Graceful shutdown
|
|
50
|
+
process.on('SIGTERM', async () => {
|
|
51
|
+
console.error('[Router] Received SIGTERM, shutting down gracefully...');
|
|
52
|
+
this.server.close(() => {
|
|
53
|
+
console.error('[Router] Server closed');
|
|
54
|
+
process.exit(0);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
process.on('SIGINT', async () => {
|
|
59
|
+
console.error('[Router] Received SIGINT, shutting down gracefully...');
|
|
60
|
+
this.server.close(() => {
|
|
61
|
+
console.error('[Router] Server closed');
|
|
62
|
+
process.exit(0);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async start(): Promise<void> {
|
|
68
|
+
await this.initialize();
|
|
69
|
+
|
|
70
|
+
this.server.listen(this.config.port, this.config.host, () => {
|
|
71
|
+
console.error(`[Router] Listening on http://${this.config.host}:${this.config.port}`);
|
|
72
|
+
console.error(`[Router] PID: ${process.pid}`);
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Main request handler
|
|
78
|
+
*/
|
|
79
|
+
private async handleRequest(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
80
|
+
// Log request
|
|
81
|
+
console.error(`[Router] ${req.method} ${req.url}`);
|
|
82
|
+
|
|
83
|
+
// CORS headers
|
|
84
|
+
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
85
|
+
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
|
86
|
+
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
|
87
|
+
|
|
88
|
+
// Handle OPTIONS preflight
|
|
89
|
+
if (req.method === 'OPTIONS') {
|
|
90
|
+
res.writeHead(200);
|
|
91
|
+
res.end();
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
// Route based on path
|
|
97
|
+
if (req.url === '/health' && req.method === 'GET') {
|
|
98
|
+
await this.handleHealth(req, res);
|
|
99
|
+
} else if (req.url === '/v1/models' && req.method === 'GET') {
|
|
100
|
+
await this.handleModels(req, res);
|
|
101
|
+
} else if (req.url === '/v1/chat/completions' && req.method === 'POST') {
|
|
102
|
+
await this.handleChatCompletions(req, res);
|
|
103
|
+
} else if (req.url === '/v1/embeddings' && req.method === 'POST') {
|
|
104
|
+
await this.handleEmbeddings(req, res);
|
|
105
|
+
} else {
|
|
106
|
+
this.sendError(res, 404, 'Not Found', `Unknown endpoint: ${req.url}`);
|
|
107
|
+
}
|
|
108
|
+
} catch (error) {
|
|
109
|
+
console.error('[Router] Error handling request:', error);
|
|
110
|
+
this.sendError(res, 500, 'Internal Server Error', (error as Error).message);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Health check endpoint
|
|
116
|
+
*/
|
|
117
|
+
private async handleHealth(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
118
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
119
|
+
res.end(JSON.stringify({
|
|
120
|
+
status: 'healthy',
|
|
121
|
+
uptime: process.uptime(),
|
|
122
|
+
timestamp: new Date().toISOString(),
|
|
123
|
+
}));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* List models endpoint - aggregate from all running servers
|
|
128
|
+
*/
|
|
129
|
+
private async handleModels(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
130
|
+
const servers = await this.getAllServers();
|
|
131
|
+
const runningServers = servers.filter(s => s.status === 'running');
|
|
132
|
+
|
|
133
|
+
const models: ModelInfo[] = runningServers.map(server => ({
|
|
134
|
+
id: server.modelName,
|
|
135
|
+
object: 'model',
|
|
136
|
+
created: Math.floor(new Date(server.createdAt).getTime() / 1000),
|
|
137
|
+
owned_by: 'llamacpp',
|
|
138
|
+
}));
|
|
139
|
+
|
|
140
|
+
const response: ModelsResponse = {
|
|
141
|
+
object: 'list',
|
|
142
|
+
data: models,
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
146
|
+
res.end(JSON.stringify(response));
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Chat completions endpoint - route to backend server
|
|
151
|
+
*/
|
|
152
|
+
private async handleChatCompletions(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
153
|
+
// Parse request body
|
|
154
|
+
const body = await this.readBody(req);
|
|
155
|
+
let requestData: any;
|
|
156
|
+
try {
|
|
157
|
+
requestData = JSON.parse(body);
|
|
158
|
+
} catch (error) {
|
|
159
|
+
this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Extract model name
|
|
164
|
+
const modelName = requestData.model;
|
|
165
|
+
if (!modelName) {
|
|
166
|
+
this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Find server for model
|
|
171
|
+
const server = await this.findServerForModel(modelName);
|
|
172
|
+
if (!server) {
|
|
173
|
+
this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (server.status !== 'running') {
|
|
178
|
+
this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Proxy request to backend
|
|
183
|
+
const backendUrl = `http://${server.host}:${server.port}/v1/chat/completions`;
|
|
184
|
+
await this.proxyRequest(backendUrl, requestData, req, res);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Embeddings endpoint - route to backend server
|
|
189
|
+
*/
|
|
190
|
+
private async handleEmbeddings(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
191
|
+
// Parse request body
|
|
192
|
+
const body = await this.readBody(req);
|
|
193
|
+
let requestData: any;
|
|
194
|
+
try {
|
|
195
|
+
requestData = JSON.parse(body);
|
|
196
|
+
} catch (error) {
|
|
197
|
+
this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Extract model name
|
|
202
|
+
const modelName = requestData.model;
|
|
203
|
+
if (!modelName) {
|
|
204
|
+
this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Find server for model
|
|
209
|
+
const server = await this.findServerForModel(modelName);
|
|
210
|
+
if (!server) {
|
|
211
|
+
this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (server.status !== 'running') {
|
|
216
|
+
this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
|
|
217
|
+
return;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check if server has embeddings enabled
|
|
221
|
+
if (!server.embeddings) {
|
|
222
|
+
this.sendError(res, 400, 'Bad Request', `Server for model "${modelName}" does not have embeddings enabled`);
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Proxy request to backend
|
|
227
|
+
const backendUrl = `http://${server.host}:${server.port}/v1/embeddings`;
|
|
228
|
+
await this.proxyRequest(backendUrl, requestData, req, res);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* Proxy a request to a backend server
|
|
233
|
+
*/
|
|
234
|
+
private async proxyRequest(
|
|
235
|
+
backendUrl: string,
|
|
236
|
+
requestData: any,
|
|
237
|
+
originalReq: http.IncomingMessage,
|
|
238
|
+
res: http.ServerResponse
|
|
239
|
+
): Promise<void> {
|
|
240
|
+
const url = new URL(backendUrl);
|
|
241
|
+
const isHttps = url.protocol === 'https:';
|
|
242
|
+
const httpModule = isHttps ? https : http;
|
|
243
|
+
|
|
244
|
+
const requestBody = JSON.stringify(requestData);
|
|
245
|
+
|
|
246
|
+
const options: http.RequestOptions = {
|
|
247
|
+
hostname: url.hostname,
|
|
248
|
+
port: url.port || (isHttps ? 443 : 80),
|
|
249
|
+
path: url.pathname + url.search,
|
|
250
|
+
method: 'POST',
|
|
251
|
+
headers: {
|
|
252
|
+
'Content-Type': 'application/json',
|
|
253
|
+
'Content-Length': Buffer.byteLength(requestBody),
|
|
254
|
+
},
|
|
255
|
+
timeout: this.config.requestTimeout,
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
return new Promise((resolve, reject) => {
|
|
259
|
+
const proxyReq = httpModule.request(options, (proxyRes) => {
|
|
260
|
+
// Forward status and headers
|
|
261
|
+
res.writeHead(proxyRes.statusCode || 200, proxyRes.headers);
|
|
262
|
+
|
|
263
|
+
// Stream response
|
|
264
|
+
proxyRes.pipe(res);
|
|
265
|
+
|
|
266
|
+
proxyRes.on('end', () => {
|
|
267
|
+
resolve();
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
proxyReq.on('error', (error) => {
|
|
272
|
+
console.error('[Router] Proxy request failed:', error);
|
|
273
|
+
if (!res.headersSent) {
|
|
274
|
+
this.sendError(res, 502, 'Bad Gateway', 'Failed to connect to backend server');
|
|
275
|
+
}
|
|
276
|
+
reject(error);
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
proxyReq.on('timeout', () => {
|
|
280
|
+
console.error('[Router] Proxy request timed out');
|
|
281
|
+
proxyReq.destroy();
|
|
282
|
+
if (!res.headersSent) {
|
|
283
|
+
this.sendError(res, 504, 'Gateway Timeout', 'Backend server did not respond in time');
|
|
284
|
+
}
|
|
285
|
+
reject(new Error('Request timeout'));
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// Send request body
|
|
289
|
+
proxyReq.write(requestBody);
|
|
290
|
+
proxyReq.end();
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Read request body as string
|
|
296
|
+
*/
|
|
297
|
+
private async readBody(req: http.IncomingMessage): Promise<string> {
|
|
298
|
+
return new Promise((resolve, reject) => {
|
|
299
|
+
const chunks: Buffer[] = [];
|
|
300
|
+
req.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
301
|
+
req.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
|
|
302
|
+
req.on('error', reject);
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Send error response
|
|
308
|
+
*/
|
|
309
|
+
private sendError(res: http.ServerResponse, statusCode: number, error: string, details?: string): void {
|
|
310
|
+
if (res.headersSent) return;
|
|
311
|
+
|
|
312
|
+
const response: ErrorResponse = { error };
|
|
313
|
+
if (details) response.details = details;
|
|
314
|
+
|
|
315
|
+
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
|
316
|
+
res.end(JSON.stringify(response));
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Get all server configurations
|
|
321
|
+
*/
|
|
322
|
+
private async getAllServers(): Promise<ServerConfig[]> {
|
|
323
|
+
const serversDir = getServersDir();
|
|
324
|
+
try {
|
|
325
|
+
const files = await fs.readdir(serversDir);
|
|
326
|
+
const configFiles = files.filter(f => f.endsWith('.json'));
|
|
327
|
+
|
|
328
|
+
const servers: ServerConfig[] = [];
|
|
329
|
+
for (const file of configFiles) {
|
|
330
|
+
const filePath = path.join(serversDir, file);
|
|
331
|
+
try {
|
|
332
|
+
const config = await readJson<ServerConfig>(filePath);
|
|
333
|
+
servers.push(config);
|
|
334
|
+
} catch (error) {
|
|
335
|
+
console.error(`[Router] Failed to load server config ${file}:`, error);
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return servers;
|
|
340
|
+
} catch (error) {
|
|
341
|
+
console.error('[Router] Failed to read servers directory:', error);
|
|
342
|
+
return [];
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Find a server by model name
|
|
348
|
+
*/
|
|
349
|
+
private async findServerForModel(modelName: string): Promise<ServerConfig | null> {
|
|
350
|
+
const servers = await this.getAllServers();
|
|
351
|
+
|
|
352
|
+
// Normalize a model name for flexible matching (lowercase, no extension, normalize separators)
|
|
353
|
+
const normalize = (name: string): string => {
|
|
354
|
+
return name
|
|
355
|
+
.toLowerCase()
|
|
356
|
+
.replace(/\.gguf$/i, '')
|
|
357
|
+
.replace(/[_-]/g, '-'); // Normalize underscores and hyphens to hyphens
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
const normalizedRequest = normalize(modelName);
|
|
361
|
+
|
|
362
|
+
// Try exact match first
|
|
363
|
+
const exactMatch = servers.find(s => s.modelName === modelName);
|
|
364
|
+
if (exactMatch) return exactMatch;
|
|
365
|
+
|
|
366
|
+
// Try case-insensitive match
|
|
367
|
+
const caseInsensitiveMatch = servers.find(
|
|
368
|
+
s => s.modelName.toLowerCase() === modelName.toLowerCase()
|
|
369
|
+
);
|
|
370
|
+
if (caseInsensitiveMatch) return caseInsensitiveMatch;
|
|
371
|
+
|
|
372
|
+
// Try adding .gguf extension if not present
|
|
373
|
+
if (!modelName.endsWith('.gguf')) {
|
|
374
|
+
const withExtension = modelName + '.gguf';
|
|
375
|
+
const extensionMatch = servers.find(
|
|
376
|
+
s => s.modelName.toLowerCase() === withExtension.toLowerCase()
|
|
377
|
+
);
|
|
378
|
+
if (extensionMatch) return extensionMatch;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Try normalized matching (handles case, extension, and underscore/hyphen variations)
|
|
382
|
+
const normalizedMatch = servers.find(
|
|
383
|
+
s => normalize(s.modelName) === normalizedRequest
|
|
384
|
+
);
|
|
385
|
+
if (normalizedMatch) return normalizedMatch;
|
|
386
|
+
|
|
387
|
+
return null;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Main entry point
|
|
392
|
+
async function main() {
|
|
393
|
+
try {
|
|
394
|
+
const server = new RouterServer();
|
|
395
|
+
await server.start();
|
|
396
|
+
} catch (error) {
|
|
397
|
+
console.error('[Router] Failed to start:', error);
|
|
398
|
+
process.exit(1);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Only run if this is the main module
|
|
403
|
+
if (require.main === module) {
|
|
404
|
+
main();
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
export { RouterServer };
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export type RouterStatus = 'running' | 'stopped' | 'crashed';
|
|
2
|
+
|
|
3
|
+
export interface RouterConfig {
|
|
4
|
+
id: 'router';
|
|
5
|
+
port: number;
|
|
6
|
+
host: string;
|
|
7
|
+
|
|
8
|
+
// State tracking
|
|
9
|
+
status: RouterStatus;
|
|
10
|
+
pid?: number;
|
|
11
|
+
createdAt: string;
|
|
12
|
+
lastStarted?: string;
|
|
13
|
+
lastStopped?: string;
|
|
14
|
+
|
|
15
|
+
// launchctl metadata
|
|
16
|
+
plistPath: string;
|
|
17
|
+
label: 'com.llama.router';
|
|
18
|
+
stdoutPath: string;
|
|
19
|
+
stderrPath: string;
|
|
20
|
+
|
|
21
|
+
// Router settings
|
|
22
|
+
healthCheckInterval: number; // ms between health checks (default: 5000)
|
|
23
|
+
requestTimeout: number; // ms for backend requests (default: 120000)
|
|
24
|
+
}
|