@appkit/llamacpp-cli 1.7.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +84 -0
  3. package/dist/cli.js +80 -0
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/config.d.ts +1 -0
  6. package/dist/commands/config.d.ts.map +1 -1
  7. package/dist/commands/config.js +167 -12
  8. package/dist/commands/config.js.map +1 -1
  9. package/dist/commands/router/config.d.ts +10 -0
  10. package/dist/commands/router/config.d.ts.map +1 -0
  11. package/dist/commands/router/config.js +95 -0
  12. package/dist/commands/router/config.js.map +1 -0
  13. package/dist/commands/router/restart.d.ts +2 -0
  14. package/dist/commands/router/restart.d.ts.map +1 -0
  15. package/dist/commands/router/restart.js +39 -0
  16. package/dist/commands/router/restart.js.map +1 -0
  17. package/dist/commands/router/start.d.ts +2 -0
  18. package/dist/commands/router/start.d.ts.map +1 -0
  19. package/dist/commands/router/start.js +60 -0
  20. package/dist/commands/router/start.js.map +1 -0
  21. package/dist/commands/router/status.d.ts +2 -0
  22. package/dist/commands/router/status.d.ts.map +1 -0
  23. package/dist/commands/router/status.js +116 -0
  24. package/dist/commands/router/status.js.map +1 -0
  25. package/dist/commands/router/stop.d.ts +2 -0
  26. package/dist/commands/router/stop.d.ts.map +1 -0
  27. package/dist/commands/router/stop.js +36 -0
  28. package/dist/commands/router/stop.js.map +1 -0
  29. package/dist/lib/router-manager.d.ts +103 -0
  30. package/dist/lib/router-manager.d.ts.map +1 -0
  31. package/dist/lib/router-manager.js +393 -0
  32. package/dist/lib/router-manager.js.map +1 -0
  33. package/dist/lib/router-server.d.ts +52 -0
  34. package/dist/lib/router-server.d.ts.map +1 -0
  35. package/dist/lib/router-server.js +373 -0
  36. package/dist/lib/router-server.js.map +1 -0
  37. package/dist/types/router-config.d.ts +18 -0
  38. package/dist/types/router-config.d.ts.map +1 -0
  39. package/dist/types/router-config.js +3 -0
  40. package/dist/types/router-config.js.map +1 -0
  41. package/package.json +1 -1
  42. package/src/cli.ts +81 -0
  43. package/src/commands/config.ts +146 -14
  44. package/src/commands/router/config.ts +109 -0
  45. package/src/commands/router/restart.ts +36 -0
  46. package/src/commands/router/start.ts +60 -0
  47. package/src/commands/router/status.ts +119 -0
  48. package/src/commands/router/stop.ts +33 -0
  49. package/src/lib/router-manager.ts +413 -0
  50. package/src/lib/router-server.ts +407 -0
  51. package/src/types/router-config.ts +24 -0
@@ -0,0 +1,407 @@
1
+ #!/usr/bin/env node
2
+
3
+ import * as http from 'http';
4
+ import * as https from 'https';
5
+ import { URL } from 'url';
6
+ import * as fs from 'fs/promises';
7
+ import * as path from 'path';
8
+ import { RouterConfig } from '../types/router-config';
9
+ import { ServerConfig } from '../types/server-config';
10
+ import { readJson, fileExists, getConfigDir, getServersDir } from '../utils/file-utils';
11
+
12
+ interface ErrorResponse {
13
+ error: string;
14
+ details?: string;
15
+ }
16
+
17
+ interface ModelInfo {
18
+ id: string;
19
+ object: 'model';
20
+ created: number;
21
+ owned_by: string;
22
+ }
23
+
24
+ interface ModelsResponse {
25
+ object: 'list';
26
+ data: ModelInfo[];
27
+ }
28
+
29
+ /**
30
+ * Router HTTP server - proxies requests to backend llama.cpp servers
31
+ */
32
+ class RouterServer {
33
+ private config!: RouterConfig;
34
+ private server!: http.Server;
35
+
36
+ async initialize(): Promise<void> {
37
+ // Load router config
38
+ const configPath = path.join(getConfigDir(), 'router.json');
39
+ if (!(await fileExists(configPath))) {
40
+ throw new Error('Router configuration not found');
41
+ }
42
+ this.config = await readJson<RouterConfig>(configPath);
43
+
44
+ // Create HTTP server
45
+ this.server = http.createServer(async (req, res) => {
46
+ await this.handleRequest(req, res);
47
+ });
48
+
49
+ // Graceful shutdown
50
+ process.on('SIGTERM', async () => {
51
+ console.error('[Router] Received SIGTERM, shutting down gracefully...');
52
+ this.server.close(() => {
53
+ console.error('[Router] Server closed');
54
+ process.exit(0);
55
+ });
56
+ });
57
+
58
+ process.on('SIGINT', async () => {
59
+ console.error('[Router] Received SIGINT, shutting down gracefully...');
60
+ this.server.close(() => {
61
+ console.error('[Router] Server closed');
62
+ process.exit(0);
63
+ });
64
+ });
65
+ }
66
+
67
+ async start(): Promise<void> {
68
+ await this.initialize();
69
+
70
+ this.server.listen(this.config.port, this.config.host, () => {
71
+ console.error(`[Router] Listening on http://${this.config.host}:${this.config.port}`);
72
+ console.error(`[Router] PID: ${process.pid}`);
73
+ });
74
+ }
75
+
76
+ /**
77
+ * Main request handler
78
+ */
79
+ private async handleRequest(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
80
+ // Log request
81
+ console.error(`[Router] ${req.method} ${req.url}`);
82
+
83
+ // CORS headers
84
+ res.setHeader('Access-Control-Allow-Origin', '*');
85
+ res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
86
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
87
+
88
+ // Handle OPTIONS preflight
89
+ if (req.method === 'OPTIONS') {
90
+ res.writeHead(200);
91
+ res.end();
92
+ return;
93
+ }
94
+
95
+ try {
96
+ // Route based on path
97
+ if (req.url === '/health' && req.method === 'GET') {
98
+ await this.handleHealth(req, res);
99
+ } else if (req.url === '/v1/models' && req.method === 'GET') {
100
+ await this.handleModels(req, res);
101
+ } else if (req.url === '/v1/chat/completions' && req.method === 'POST') {
102
+ await this.handleChatCompletions(req, res);
103
+ } else if (req.url === '/v1/embeddings' && req.method === 'POST') {
104
+ await this.handleEmbeddings(req, res);
105
+ } else {
106
+ this.sendError(res, 404, 'Not Found', `Unknown endpoint: ${req.url}`);
107
+ }
108
+ } catch (error) {
109
+ console.error('[Router] Error handling request:', error);
110
+ this.sendError(res, 500, 'Internal Server Error', (error as Error).message);
111
+ }
112
+ }
113
+
114
+ /**
115
+ * Health check endpoint
116
+ */
117
+ private async handleHealth(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
118
+ res.writeHead(200, { 'Content-Type': 'application/json' });
119
+ res.end(JSON.stringify({
120
+ status: 'healthy',
121
+ uptime: process.uptime(),
122
+ timestamp: new Date().toISOString(),
123
+ }));
124
+ }
125
+
126
+ /**
127
+ * List models endpoint - aggregate from all running servers
128
+ */
129
+ private async handleModels(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
130
+ const servers = await this.getAllServers();
131
+ const runningServers = servers.filter(s => s.status === 'running');
132
+
133
+ const models: ModelInfo[] = runningServers.map(server => ({
134
+ id: server.modelName,
135
+ object: 'model',
136
+ created: Math.floor(new Date(server.createdAt).getTime() / 1000),
137
+ owned_by: 'llamacpp',
138
+ }));
139
+
140
+ const response: ModelsResponse = {
141
+ object: 'list',
142
+ data: models,
143
+ };
144
+
145
+ res.writeHead(200, { 'Content-Type': 'application/json' });
146
+ res.end(JSON.stringify(response));
147
+ }
148
+
149
+ /**
150
+ * Chat completions endpoint - route to backend server
151
+ */
152
+ private async handleChatCompletions(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
153
+ // Parse request body
154
+ const body = await this.readBody(req);
155
+ let requestData: any;
156
+ try {
157
+ requestData = JSON.parse(body);
158
+ } catch (error) {
159
+ this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
160
+ return;
161
+ }
162
+
163
+ // Extract model name
164
+ const modelName = requestData.model;
165
+ if (!modelName) {
166
+ this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
167
+ return;
168
+ }
169
+
170
+ // Find server for model
171
+ const server = await this.findServerForModel(modelName);
172
+ if (!server) {
173
+ this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
174
+ return;
175
+ }
176
+
177
+ if (server.status !== 'running') {
178
+ this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
179
+ return;
180
+ }
181
+
182
+ // Proxy request to backend
183
+ const backendUrl = `http://${server.host}:${server.port}/v1/chat/completions`;
184
+ await this.proxyRequest(backendUrl, requestData, req, res);
185
+ }
186
+
187
+ /**
188
+ * Embeddings endpoint - route to backend server
189
+ */
190
+ private async handleEmbeddings(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
191
+ // Parse request body
192
+ const body = await this.readBody(req);
193
+ let requestData: any;
194
+ try {
195
+ requestData = JSON.parse(body);
196
+ } catch (error) {
197
+ this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
198
+ return;
199
+ }
200
+
201
+ // Extract model name
202
+ const modelName = requestData.model;
203
+ if (!modelName) {
204
+ this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
205
+ return;
206
+ }
207
+
208
+ // Find server for model
209
+ const server = await this.findServerForModel(modelName);
210
+ if (!server) {
211
+ this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
212
+ return;
213
+ }
214
+
215
+ if (server.status !== 'running') {
216
+ this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
217
+ return;
218
+ }
219
+
220
+ // Check if server has embeddings enabled
221
+ if (!server.embeddings) {
222
+ this.sendError(res, 400, 'Bad Request', `Server for model "${modelName}" does not have embeddings enabled`);
223
+ return;
224
+ }
225
+
226
+ // Proxy request to backend
227
+ const backendUrl = `http://${server.host}:${server.port}/v1/embeddings`;
228
+ await this.proxyRequest(backendUrl, requestData, req, res);
229
+ }
230
+
231
+ /**
232
+ * Proxy a request to a backend server
233
+ */
234
+ private async proxyRequest(
235
+ backendUrl: string,
236
+ requestData: any,
237
+ originalReq: http.IncomingMessage,
238
+ res: http.ServerResponse
239
+ ): Promise<void> {
240
+ const url = new URL(backendUrl);
241
+ const isHttps = url.protocol === 'https:';
242
+ const httpModule = isHttps ? https : http;
243
+
244
+ const requestBody = JSON.stringify(requestData);
245
+
246
+ const options: http.RequestOptions = {
247
+ hostname: url.hostname,
248
+ port: url.port || (isHttps ? 443 : 80),
249
+ path: url.pathname + url.search,
250
+ method: 'POST',
251
+ headers: {
252
+ 'Content-Type': 'application/json',
253
+ 'Content-Length': Buffer.byteLength(requestBody),
254
+ },
255
+ timeout: this.config.requestTimeout,
256
+ };
257
+
258
+ return new Promise((resolve, reject) => {
259
+ const proxyReq = httpModule.request(options, (proxyRes) => {
260
+ // Forward status and headers
261
+ res.writeHead(proxyRes.statusCode || 200, proxyRes.headers);
262
+
263
+ // Stream response
264
+ proxyRes.pipe(res);
265
+
266
+ proxyRes.on('end', () => {
267
+ resolve();
268
+ });
269
+ });
270
+
271
+ proxyReq.on('error', (error) => {
272
+ console.error('[Router] Proxy request failed:', error);
273
+ if (!res.headersSent) {
274
+ this.sendError(res, 502, 'Bad Gateway', 'Failed to connect to backend server');
275
+ }
276
+ reject(error);
277
+ });
278
+
279
+ proxyReq.on('timeout', () => {
280
+ console.error('[Router] Proxy request timed out');
281
+ proxyReq.destroy();
282
+ if (!res.headersSent) {
283
+ this.sendError(res, 504, 'Gateway Timeout', 'Backend server did not respond in time');
284
+ }
285
+ reject(new Error('Request timeout'));
286
+ });
287
+
288
+ // Send request body
289
+ proxyReq.write(requestBody);
290
+ proxyReq.end();
291
+ });
292
+ }
293
+
294
+ /**
295
+ * Read request body as string
296
+ */
297
+ private async readBody(req: http.IncomingMessage): Promise<string> {
298
+ return new Promise((resolve, reject) => {
299
+ const chunks: Buffer[] = [];
300
+ req.on('data', (chunk: Buffer) => chunks.push(chunk));
301
+ req.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
302
+ req.on('error', reject);
303
+ });
304
+ }
305
+
306
+ /**
307
+ * Send error response
308
+ */
309
+ private sendError(res: http.ServerResponse, statusCode: number, error: string, details?: string): void {
310
+ if (res.headersSent) return;
311
+
312
+ const response: ErrorResponse = { error };
313
+ if (details) response.details = details;
314
+
315
+ res.writeHead(statusCode, { 'Content-Type': 'application/json' });
316
+ res.end(JSON.stringify(response));
317
+ }
318
+
319
+ /**
320
+ * Get all server configurations
321
+ */
322
+ private async getAllServers(): Promise<ServerConfig[]> {
323
+ const serversDir = getServersDir();
324
+ try {
325
+ const files = await fs.readdir(serversDir);
326
+ const configFiles = files.filter(f => f.endsWith('.json'));
327
+
328
+ const servers: ServerConfig[] = [];
329
+ for (const file of configFiles) {
330
+ const filePath = path.join(serversDir, file);
331
+ try {
332
+ const config = await readJson<ServerConfig>(filePath);
333
+ servers.push(config);
334
+ } catch (error) {
335
+ console.error(`[Router] Failed to load server config ${file}:`, error);
336
+ }
337
+ }
338
+
339
+ return servers;
340
+ } catch (error) {
341
+ console.error('[Router] Failed to read servers directory:', error);
342
+ return [];
343
+ }
344
+ }
345
+
346
+ /**
347
+ * Find a server by model name
348
+ */
349
+ private async findServerForModel(modelName: string): Promise<ServerConfig | null> {
350
+ const servers = await this.getAllServers();
351
+
352
+ // Normalize a model name for flexible matching (lowercase, no extension, normalize separators)
353
+ const normalize = (name: string): string => {
354
+ return name
355
+ .toLowerCase()
356
+ .replace(/\.gguf$/i, '')
357
+ .replace(/[_-]/g, '-'); // Normalize underscores and hyphens to hyphens
358
+ };
359
+
360
+ const normalizedRequest = normalize(modelName);
361
+
362
+ // Try exact match first
363
+ const exactMatch = servers.find(s => s.modelName === modelName);
364
+ if (exactMatch) return exactMatch;
365
+
366
+ // Try case-insensitive match
367
+ const caseInsensitiveMatch = servers.find(
368
+ s => s.modelName.toLowerCase() === modelName.toLowerCase()
369
+ );
370
+ if (caseInsensitiveMatch) return caseInsensitiveMatch;
371
+
372
+ // Try adding .gguf extension if not present
373
+ if (!modelName.endsWith('.gguf')) {
374
+ const withExtension = modelName + '.gguf';
375
+ const extensionMatch = servers.find(
376
+ s => s.modelName.toLowerCase() === withExtension.toLowerCase()
377
+ );
378
+ if (extensionMatch) return extensionMatch;
379
+ }
380
+
381
+ // Try normalized matching (handles case, extension, and underscore/hyphen variations)
382
+ const normalizedMatch = servers.find(
383
+ s => normalize(s.modelName) === normalizedRequest
384
+ );
385
+ if (normalizedMatch) return normalizedMatch;
386
+
387
+ return null;
388
+ }
389
+ }
390
+
391
+ // Main entry point
392
+ async function main() {
393
+ try {
394
+ const server = new RouterServer();
395
+ await server.start();
396
+ } catch (error) {
397
+ console.error('[Router] Failed to start:', error);
398
+ process.exit(1);
399
+ }
400
+ }
401
+
402
+ // Only run if this is the main module
403
+ if (require.main === module) {
404
+ main();
405
+ }
406
+
407
+ export { RouterServer };
@@ -0,0 +1,24 @@
1
+ export type RouterStatus = 'running' | 'stopped' | 'crashed';
2
+
3
+ export interface RouterConfig {
4
+ id: 'router';
5
+ port: number;
6
+ host: string;
7
+
8
+ // State tracking
9
+ status: RouterStatus;
10
+ pid?: number;
11
+ createdAt: string;
12
+ lastStarted?: string;
13
+ lastStopped?: string;
14
+
15
+ // launchctl metadata
16
+ plistPath: string;
17
+ label: 'com.llama.router';
18
+ stdoutPath: string;
19
+ stderrPath: string;
20
+
21
+ // Router settings
22
+ healthCheckInterval: number; // ms between health checks (default: 5000)
23
+ requestTimeout: number; // ms for backend requests (default: 120000)
24
+ }