@appkit/llamacpp-cli 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -168
- package/dist/cli.js +35 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/launch/claude.d.ts +6 -0
- package/dist/commands/launch/claude.d.ts.map +1 -0
- package/dist/commands/launch/claude.js +277 -0
- package/dist/commands/launch/claude.js.map +1 -0
- package/dist/lib/integration-checker.d.ts +26 -0
- package/dist/lib/integration-checker.d.ts.map +1 -0
- package/dist/lib/integration-checker.js +77 -0
- package/dist/lib/integration-checker.js.map +1 -0
- package/dist/lib/router-manager.d.ts +4 -0
- package/dist/lib/router-manager.d.ts.map +1 -1
- package/dist/lib/router-manager.js +10 -0
- package/dist/lib/router-manager.js.map +1 -1
- package/dist/lib/router-server.d.ts +13 -0
- package/dist/lib/router-server.d.ts.map +1 -1
- package/dist/lib/router-server.js +267 -7
- package/dist/lib/router-server.js.map +1 -1
- package/dist/types/integration-config.d.ts +28 -0
- package/dist/types/integration-config.d.ts.map +1 -0
- package/dist/types/integration-config.js +3 -0
- package/dist/types/integration-config.js.map +1 -0
- package/package.json +10 -2
- package/web/dist/assets/index-Bin89Lwr.css +1 -0
- package/web/dist/assets/index-CVmonw3T.js +17 -0
- package/web/{index.html → dist/index.html} +2 -1
- package/.versionrc.json +0 -16
- package/CHANGELOG.md +0 -213
- package/docs/images/.gitkeep +0 -1
- package/docs/images/web-ui-servers.png +0 -0
- package/src/cli.ts +0 -523
- package/src/commands/admin/config.ts +0 -121
- package/src/commands/admin/logs.ts +0 -91
- package/src/commands/admin/restart.ts +0 -26
- package/src/commands/admin/start.ts +0 -27
- package/src/commands/admin/status.ts +0 -84
- package/src/commands/admin/stop.ts +0 -16
- package/src/commands/config-global.ts +0 -38
- package/src/commands/config.ts +0 -323
- package/src/commands/create.ts +0 -183
- package/src/commands/delete.ts +0 -74
- package/src/commands/list.ts +0 -37
- package/src/commands/logs-all.ts +0 -251
- package/src/commands/logs.ts +0 -345
- package/src/commands/monitor.ts +0 -110
- package/src/commands/ps.ts +0 -84
- package/src/commands/pull.ts +0 -44
- package/src/commands/rm.ts +0 -107
- package/src/commands/router/config.ts +0 -116
- package/src/commands/router/logs.ts +0 -256
- package/src/commands/router/restart.ts +0 -36
- package/src/commands/router/start.ts +0 -60
- package/src/commands/router/status.ts +0 -119
- package/src/commands/router/stop.ts +0 -33
- package/src/commands/run.ts +0 -233
- package/src/commands/search.ts +0 -107
- package/src/commands/server-show.ts +0 -161
- package/src/commands/show.ts +0 -207
- package/src/commands/start.ts +0 -101
- package/src/commands/stop.ts +0 -39
- package/src/commands/tui.ts +0 -25
- package/src/lib/admin-manager.ts +0 -435
- package/src/lib/admin-server.ts +0 -1243
- package/src/lib/config-generator.ts +0 -130
- package/src/lib/download-job-manager.ts +0 -213
- package/src/lib/history-manager.ts +0 -172
- package/src/lib/launchctl-manager.ts +0 -225
- package/src/lib/metrics-aggregator.ts +0 -257
- package/src/lib/model-downloader.ts +0 -328
- package/src/lib/model-scanner.ts +0 -157
- package/src/lib/model-search.ts +0 -114
- package/src/lib/models-dir-setup.ts +0 -46
- package/src/lib/port-manager.ts +0 -80
- package/src/lib/router-logger.ts +0 -201
- package/src/lib/router-manager.ts +0 -414
- package/src/lib/router-server.ts +0 -538
- package/src/lib/state-manager.ts +0 -206
- package/src/lib/status-checker.ts +0 -113
- package/src/lib/system-collector.ts +0 -315
- package/src/tui/ConfigApp.ts +0 -1085
- package/src/tui/HistoricalMonitorApp.ts +0 -587
- package/src/tui/ModelsApp.ts +0 -368
- package/src/tui/MonitorApp.ts +0 -386
- package/src/tui/MultiServerMonitorApp.ts +0 -1833
- package/src/tui/RootNavigator.ts +0 -74
- package/src/tui/SearchApp.ts +0 -511
- package/src/tui/SplashScreen.ts +0 -149
- package/src/types/admin-config.ts +0 -25
- package/src/types/global-config.ts +0 -26
- package/src/types/history-types.ts +0 -39
- package/src/types/model-info.ts +0 -8
- package/src/types/monitor-types.ts +0 -162
- package/src/types/router-config.ts +0 -25
- package/src/types/server-config.ts +0 -46
- package/src/utils/downsample-utils.ts +0 -128
- package/src/utils/file-utils.ts +0 -146
- package/src/utils/format-utils.ts +0 -98
- package/src/utils/log-parser.ts +0 -284
- package/src/utils/log-utils.ts +0 -178
- package/src/utils/process-utils.ts +0 -316
- package/src/utils/prompt-utils.ts +0 -47
- package/test-load.sh +0 -100
- package/tsconfig.json +0 -20
- package/web/eslint.config.js +0 -23
- package/web/llamacpp-web-dist.tar.gz +0 -0
- package/web/package-lock.json +0 -4017
- package/web/package.json +0 -38
- package/web/postcss.config.js +0 -6
- package/web/src/App.css +0 -42
- package/web/src/App.tsx +0 -86
- package/web/src/assets/react.svg +0 -1
- package/web/src/components/ApiKeyPrompt.tsx +0 -71
- package/web/src/components/CreateServerModal.tsx +0 -372
- package/web/src/components/DownloadProgress.tsx +0 -123
- package/web/src/components/Nav.tsx +0 -89
- package/web/src/components/RouterConfigModal.tsx +0 -240
- package/web/src/components/SearchModal.tsx +0 -306
- package/web/src/components/ServerConfigModal.tsx +0 -291
- package/web/src/hooks/useApi.ts +0 -259
- package/web/src/index.css +0 -42
- package/web/src/lib/api.ts +0 -226
- package/web/src/main.tsx +0 -10
- package/web/src/pages/Dashboard.tsx +0 -103
- package/web/src/pages/Models.tsx +0 -258
- package/web/src/pages/Router.tsx +0 -270
- package/web/src/pages/RouterLogs.tsx +0 -201
- package/web/src/pages/ServerLogs.tsx +0 -553
- package/web/src/pages/Servers.tsx +0 -358
- package/web/src/types/api.ts +0 -140
- package/web/tailwind.config.js +0 -31
- package/web/tsconfig.app.json +0 -28
- package/web/tsconfig.json +0 -7
- package/web/tsconfig.node.json +0 -26
- package/web/vite.config.ts +0 -25
- /package/web/{public → dist}/vite.svg +0 -0
package/src/lib/router-server.ts
DELETED
|
@@ -1,538 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
import * as http from 'http';
|
|
4
|
-
import * as https from 'https';
|
|
5
|
-
import { URL } from 'url';
|
|
6
|
-
import * as fs from 'fs/promises';
|
|
7
|
-
import * as path from 'path';
|
|
8
|
-
import { RouterConfig } from '../types/router-config';
|
|
9
|
-
import { ServerConfig } from '../types/server-config';
|
|
10
|
-
import { readJson, fileExists, getConfigDir, getServersDir } from '../utils/file-utils';
|
|
11
|
-
import { RouterLogger, RequestTimer, RouterLogEntry } from './router-logger';
|
|
12
|
-
|
|
13
|
-
interface ErrorResponse {
|
|
14
|
-
error: string;
|
|
15
|
-
details?: string;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
interface ModelInfo {
|
|
19
|
-
id: string;
|
|
20
|
-
object: 'model';
|
|
21
|
-
created: number;
|
|
22
|
-
owned_by: string;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
interface ModelsResponse {
|
|
26
|
-
object: 'list';
|
|
27
|
-
data: ModelInfo[];
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Router HTTP server - proxies requests to backend llama.cpp servers
|
|
32
|
-
*/
|
|
33
|
-
class RouterServer {
|
|
34
|
-
private config!: RouterConfig;
|
|
35
|
-
private server!: http.Server;
|
|
36
|
-
private logger!: RouterLogger;
|
|
37
|
-
|
|
38
|
-
async initialize(): Promise<void> {
|
|
39
|
-
// Load router config
|
|
40
|
-
const configPath = path.join(getConfigDir(), 'router.json');
|
|
41
|
-
if (!(await fileExists(configPath))) {
|
|
42
|
-
throw new Error('Router configuration not found');
|
|
43
|
-
}
|
|
44
|
-
this.config = await readJson<RouterConfig>(configPath);
|
|
45
|
-
|
|
46
|
-
// Initialize logger with verbose setting
|
|
47
|
-
this.logger = new RouterLogger(this.config.verbose);
|
|
48
|
-
|
|
49
|
-
// Rotate log file if needed
|
|
50
|
-
await this.logger.rotateIfNeeded();
|
|
51
|
-
|
|
52
|
-
// Create HTTP server
|
|
53
|
-
this.server = http.createServer(async (req, res) => {
|
|
54
|
-
await this.handleRequest(req, res);
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
// Graceful shutdown
|
|
58
|
-
process.on('SIGTERM', async () => {
|
|
59
|
-
console.error('[Router] Received SIGTERM, shutting down gracefully...');
|
|
60
|
-
this.server.close(() => {
|
|
61
|
-
console.error('[Router] Server closed');
|
|
62
|
-
process.exit(0);
|
|
63
|
-
});
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
process.on('SIGINT', async () => {
|
|
67
|
-
console.error('[Router] Received SIGINT, shutting down gracefully...');
|
|
68
|
-
this.server.close(() => {
|
|
69
|
-
console.error('[Router] Server closed');
|
|
70
|
-
process.exit(0);
|
|
71
|
-
});
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
async start(): Promise<void> {
|
|
76
|
-
await this.initialize();
|
|
77
|
-
|
|
78
|
-
this.server.listen(this.config.port, this.config.host, () => {
|
|
79
|
-
console.error(`[Router] Listening on http://${this.config.host}:${this.config.port}`);
|
|
80
|
-
console.error(`[Router] PID: ${process.pid}`);
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
/**
|
|
85
|
-
* Main request handler
|
|
86
|
-
*/
|
|
87
|
-
private async handleRequest(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
88
|
-
// CORS headers
|
|
89
|
-
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
90
|
-
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
|
91
|
-
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
|
|
92
|
-
|
|
93
|
-
// Handle OPTIONS preflight
|
|
94
|
-
if (req.method === 'OPTIONS') {
|
|
95
|
-
res.writeHead(200);
|
|
96
|
-
res.end();
|
|
97
|
-
return;
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
// Route based on path
|
|
102
|
-
if (req.url === '/health' && req.method === 'GET') {
|
|
103
|
-
await this.handleHealth(req, res);
|
|
104
|
-
} else if (req.url === '/v1/models' && req.method === 'GET') {
|
|
105
|
-
await this.handleModels(req, res);
|
|
106
|
-
} else if (req.url === '/v1/chat/completions' && req.method === 'POST') {
|
|
107
|
-
await this.handleChatCompletions(req, res);
|
|
108
|
-
} else if (req.url === '/v1/embeddings' && req.method === 'POST') {
|
|
109
|
-
await this.handleEmbeddings(req, res);
|
|
110
|
-
} else {
|
|
111
|
-
this.sendError(res, 404, 'Not Found', `Unknown endpoint: ${req.url}`);
|
|
112
|
-
}
|
|
113
|
-
} catch (error) {
|
|
114
|
-
console.error('[Router] Error handling request:', error);
|
|
115
|
-
this.sendError(res, 500, 'Internal Server Error', (error as Error).message);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* Health check endpoint
|
|
121
|
-
*/
|
|
122
|
-
private async handleHealth(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
123
|
-
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
124
|
-
res.end(JSON.stringify({
|
|
125
|
-
status: 'healthy',
|
|
126
|
-
uptime: process.uptime(),
|
|
127
|
-
timestamp: new Date().toISOString(),
|
|
128
|
-
}));
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* List models endpoint - aggregate from all running servers
|
|
133
|
-
*/
|
|
134
|
-
private async handleModels(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
135
|
-
const servers = await this.getAllServers();
|
|
136
|
-
const runningServers = servers.filter(s => s.status === 'running');
|
|
137
|
-
|
|
138
|
-
const models: ModelInfo[] = runningServers.map(server => ({
|
|
139
|
-
id: server.modelName,
|
|
140
|
-
object: 'model',
|
|
141
|
-
created: Math.floor(new Date(server.createdAt).getTime() / 1000),
|
|
142
|
-
owned_by: 'llamacpp',
|
|
143
|
-
}));
|
|
144
|
-
|
|
145
|
-
const response: ModelsResponse = {
|
|
146
|
-
object: 'list',
|
|
147
|
-
data: models,
|
|
148
|
-
};
|
|
149
|
-
|
|
150
|
-
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
151
|
-
res.end(JSON.stringify(response));
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
/**
|
|
155
|
-
* Chat completions endpoint - route to backend server
|
|
156
|
-
*/
|
|
157
|
-
private async handleChatCompletions(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
158
|
-
const timer = new RequestTimer();
|
|
159
|
-
let modelName = 'unknown';
|
|
160
|
-
let statusCode = 500;
|
|
161
|
-
let errorMsg: string | undefined;
|
|
162
|
-
let promptPreview: string | undefined;
|
|
163
|
-
|
|
164
|
-
try {
|
|
165
|
-
// Parse request body
|
|
166
|
-
const body = await this.readBody(req);
|
|
167
|
-
let requestData: any;
|
|
168
|
-
try {
|
|
169
|
-
requestData = JSON.parse(body);
|
|
170
|
-
} catch (error) {
|
|
171
|
-
statusCode = 400;
|
|
172
|
-
errorMsg = 'Invalid JSON in request body';
|
|
173
|
-
this.sendError(res, statusCode, 'Bad Request', errorMsg);
|
|
174
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg);
|
|
175
|
-
return;
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
// Extract model name and prompt preview
|
|
179
|
-
modelName = requestData.model || 'unknown';
|
|
180
|
-
promptPreview = this.extractPromptPreview(requestData);
|
|
181
|
-
|
|
182
|
-
if (!requestData.model) {
|
|
183
|
-
statusCode = 400;
|
|
184
|
-
errorMsg = 'Missing "model" field in request';
|
|
185
|
-
this.sendError(res, statusCode, 'Bad Request', errorMsg);
|
|
186
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
187
|
-
return;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// Find server for model
|
|
191
|
-
const server = await this.findServerForModel(modelName);
|
|
192
|
-
if (!server) {
|
|
193
|
-
statusCode = 404;
|
|
194
|
-
errorMsg = `No server found for model: ${modelName}`;
|
|
195
|
-
this.sendError(res, statusCode, 'Not Found', errorMsg);
|
|
196
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
197
|
-
return;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
if (server.status !== 'running') {
|
|
201
|
-
statusCode = 503;
|
|
202
|
-
errorMsg = `Server for model "${modelName}" is not running`;
|
|
203
|
-
this.sendError(res, statusCode, 'Service Unavailable', errorMsg);
|
|
204
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Proxy request to backend
|
|
209
|
-
const backendUrl = `http://${server.host}:${server.port}/v1/chat/completions`;
|
|
210
|
-
await this.proxyRequest(backendUrl, requestData, req, res);
|
|
211
|
-
|
|
212
|
-
// Log success
|
|
213
|
-
statusCode = 200;
|
|
214
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
|
|
215
|
-
} catch (error) {
|
|
216
|
-
errorMsg = (error as Error).message;
|
|
217
|
-
await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
218
|
-
throw error;
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
/**
|
|
223
|
-
* Embeddings endpoint - route to backend server
|
|
224
|
-
*/
|
|
225
|
-
private async handleEmbeddings(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
|
|
226
|
-
const timer = new RequestTimer();
|
|
227
|
-
let modelName = 'unknown';
|
|
228
|
-
let statusCode = 500;
|
|
229
|
-
let errorMsg: string | undefined;
|
|
230
|
-
let promptPreview: string | undefined;
|
|
231
|
-
|
|
232
|
-
try {
|
|
233
|
-
// Parse request body
|
|
234
|
-
const body = await this.readBody(req);
|
|
235
|
-
let requestData: any;
|
|
236
|
-
try {
|
|
237
|
-
requestData = JSON.parse(body);
|
|
238
|
-
} catch (error) {
|
|
239
|
-
statusCode = 400;
|
|
240
|
-
errorMsg = 'Invalid JSON in request body';
|
|
241
|
-
this.sendError(res, statusCode, 'Bad Request', errorMsg);
|
|
242
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg);
|
|
243
|
-
return;
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// Extract model name and prompt preview
|
|
247
|
-
modelName = requestData.model || 'unknown';
|
|
248
|
-
promptPreview = this.extractPromptPreview(requestData);
|
|
249
|
-
|
|
250
|
-
if (!requestData.model) {
|
|
251
|
-
statusCode = 400;
|
|
252
|
-
errorMsg = 'Missing "model" field in request';
|
|
253
|
-
this.sendError(res, statusCode, 'Bad Request', errorMsg);
|
|
254
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
255
|
-
return;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// Find server for model
|
|
259
|
-
const server = await this.findServerForModel(modelName);
|
|
260
|
-
if (!server) {
|
|
261
|
-
statusCode = 404;
|
|
262
|
-
errorMsg = `No server found for model: ${modelName}`;
|
|
263
|
-
this.sendError(res, statusCode, 'Not Found', errorMsg);
|
|
264
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
265
|
-
return;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (server.status !== 'running') {
|
|
269
|
-
statusCode = 503;
|
|
270
|
-
errorMsg = `Server for model "${modelName}" is not running`;
|
|
271
|
-
this.sendError(res, statusCode, 'Service Unavailable', errorMsg);
|
|
272
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
|
|
273
|
-
return;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
// Check if server has embeddings enabled
|
|
277
|
-
if (!server.embeddings) {
|
|
278
|
-
statusCode = 400;
|
|
279
|
-
errorMsg = `Server for model "${modelName}" does not have embeddings enabled`;
|
|
280
|
-
this.sendError(res, statusCode, 'Bad Request', errorMsg);
|
|
281
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
|
|
282
|
-
return;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
// Proxy request to backend
|
|
286
|
-
const backendUrl = `http://${server.host}:${server.port}/v1/embeddings`;
|
|
287
|
-
await this.proxyRequest(backendUrl, requestData, req, res);
|
|
288
|
-
|
|
289
|
-
// Log success
|
|
290
|
-
statusCode = 200;
|
|
291
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
|
|
292
|
-
} catch (error) {
|
|
293
|
-
errorMsg = (error as Error).message;
|
|
294
|
-
await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
295
|
-
throw error;
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
/**
|
|
300
|
-
* Proxy a request to a backend server
|
|
301
|
-
*/
|
|
302
|
-
private async proxyRequest(
|
|
303
|
-
backendUrl: string,
|
|
304
|
-
requestData: any,
|
|
305
|
-
originalReq: http.IncomingMessage,
|
|
306
|
-
res: http.ServerResponse
|
|
307
|
-
): Promise<void> {
|
|
308
|
-
const url = new URL(backendUrl);
|
|
309
|
-
const isHttps = url.protocol === 'https:';
|
|
310
|
-
const httpModule = isHttps ? https : http;
|
|
311
|
-
|
|
312
|
-
const requestBody = JSON.stringify(requestData);
|
|
313
|
-
|
|
314
|
-
const options: http.RequestOptions = {
|
|
315
|
-
hostname: url.hostname,
|
|
316
|
-
port: url.port || (isHttps ? 443 : 80),
|
|
317
|
-
path: url.pathname + url.search,
|
|
318
|
-
method: 'POST',
|
|
319
|
-
headers: {
|
|
320
|
-
'Content-Type': 'application/json',
|
|
321
|
-
'Content-Length': Buffer.byteLength(requestBody),
|
|
322
|
-
},
|
|
323
|
-
timeout: this.config.requestTimeout,
|
|
324
|
-
};
|
|
325
|
-
|
|
326
|
-
return new Promise((resolve, reject) => {
|
|
327
|
-
const proxyReq = httpModule.request(options, (proxyRes) => {
|
|
328
|
-
// Forward status and headers
|
|
329
|
-
res.writeHead(proxyRes.statusCode || 200, proxyRes.headers);
|
|
330
|
-
|
|
331
|
-
// Stream response
|
|
332
|
-
proxyRes.pipe(res);
|
|
333
|
-
|
|
334
|
-
proxyRes.on('end', () => {
|
|
335
|
-
resolve();
|
|
336
|
-
});
|
|
337
|
-
});
|
|
338
|
-
|
|
339
|
-
proxyReq.on('error', (error) => {
|
|
340
|
-
console.error('[Router] Proxy request failed:', error);
|
|
341
|
-
if (!res.headersSent) {
|
|
342
|
-
this.sendError(res, 502, 'Bad Gateway', 'Failed to connect to backend server');
|
|
343
|
-
}
|
|
344
|
-
reject(error);
|
|
345
|
-
});
|
|
346
|
-
|
|
347
|
-
proxyReq.on('timeout', () => {
|
|
348
|
-
console.error('[Router] Proxy request timed out');
|
|
349
|
-
proxyReq.destroy();
|
|
350
|
-
if (!res.headersSent) {
|
|
351
|
-
this.sendError(res, 504, 'Gateway Timeout', 'Backend server did not respond in time');
|
|
352
|
-
}
|
|
353
|
-
reject(new Error('Request timeout'));
|
|
354
|
-
});
|
|
355
|
-
|
|
356
|
-
// Send request body
|
|
357
|
-
proxyReq.write(requestBody);
|
|
358
|
-
proxyReq.end();
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
/**
|
|
363
|
-
* Read request body as string
|
|
364
|
-
*/
|
|
365
|
-
private async readBody(req: http.IncomingMessage): Promise<string> {
|
|
366
|
-
return new Promise((resolve, reject) => {
|
|
367
|
-
const chunks: Buffer[] = [];
|
|
368
|
-
req.on('data', (chunk: Buffer) => chunks.push(chunk));
|
|
369
|
-
req.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')));
|
|
370
|
-
req.on('error', reject);
|
|
371
|
-
});
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
/**
|
|
375
|
-
* Send error response
|
|
376
|
-
*/
|
|
377
|
-
private sendError(res: http.ServerResponse, statusCode: number, error: string, details?: string): void {
|
|
378
|
-
if (res.headersSent) return;
|
|
379
|
-
|
|
380
|
-
const response: ErrorResponse = { error };
|
|
381
|
-
if (details) response.details = details;
|
|
382
|
-
|
|
383
|
-
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
|
384
|
-
res.end(JSON.stringify(response));
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
/**
|
|
388
|
-
* Get all server configurations
|
|
389
|
-
*/
|
|
390
|
-
private async getAllServers(): Promise<ServerConfig[]> {
|
|
391
|
-
const serversDir = getServersDir();
|
|
392
|
-
try {
|
|
393
|
-
const files = await fs.readdir(serversDir);
|
|
394
|
-
const configFiles = files.filter(f => f.endsWith('.json'));
|
|
395
|
-
|
|
396
|
-
const servers: ServerConfig[] = [];
|
|
397
|
-
for (const file of configFiles) {
|
|
398
|
-
const filePath = path.join(serversDir, file);
|
|
399
|
-
try {
|
|
400
|
-
const config = await readJson<ServerConfig>(filePath);
|
|
401
|
-
servers.push(config);
|
|
402
|
-
} catch (error) {
|
|
403
|
-
console.error(`[Router] Failed to load server config ${file}:`, error);
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
return servers;
|
|
408
|
-
} catch (error) {
|
|
409
|
-
console.error('[Router] Failed to read servers directory:', error);
|
|
410
|
-
return [];
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
/**
|
|
415
|
-
* Helper method to log a request
|
|
416
|
-
*/
|
|
417
|
-
private async logRequest(
|
|
418
|
-
model: string,
|
|
419
|
-
endpoint: string,
|
|
420
|
-
statusCode: number,
|
|
421
|
-
durationMs: number,
|
|
422
|
-
error?: string,
|
|
423
|
-
backend?: string,
|
|
424
|
-
prompt?: string
|
|
425
|
-
): Promise<void> {
|
|
426
|
-
const entry: RouterLogEntry = {
|
|
427
|
-
timestamp: RequestTimer.now(),
|
|
428
|
-
model,
|
|
429
|
-
endpoint,
|
|
430
|
-
method: 'POST',
|
|
431
|
-
status: statusCode >= 200 && statusCode < 300 ? 'success' : 'error',
|
|
432
|
-
statusCode,
|
|
433
|
-
durationMs,
|
|
434
|
-
error,
|
|
435
|
-
backend,
|
|
436
|
-
prompt,
|
|
437
|
-
};
|
|
438
|
-
|
|
439
|
-
await this.logger.logRequest(entry);
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
/**
|
|
443
|
-
* Extract prompt preview from request data (first 50 chars)
|
|
444
|
-
*/
|
|
445
|
-
private extractPromptPreview(requestData: any): string | undefined {
|
|
446
|
-
try {
|
|
447
|
-
// For chat completions, get the last user message
|
|
448
|
-
if (requestData.messages && Array.isArray(requestData.messages)) {
|
|
449
|
-
const lastUserMessage = [...requestData.messages]
|
|
450
|
-
.reverse()
|
|
451
|
-
.find((msg: any) => msg.role === 'user');
|
|
452
|
-
|
|
453
|
-
if (lastUserMessage?.content) {
|
|
454
|
-
const content = typeof lastUserMessage.content === 'string'
|
|
455
|
-
? lastUserMessage.content
|
|
456
|
-
: JSON.stringify(lastUserMessage.content);
|
|
457
|
-
return content.substring(0, 50).replace(/\n/g, ' ');
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
// For embeddings, get the input text
|
|
462
|
-
if (requestData.input) {
|
|
463
|
-
const input = typeof requestData.input === 'string'
|
|
464
|
-
? requestData.input
|
|
465
|
-
: Array.isArray(requestData.input)
|
|
466
|
-
? requestData.input[0]
|
|
467
|
-
: JSON.stringify(requestData.input);
|
|
468
|
-
return input.substring(0, 50).replace(/\n/g, ' ');
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
return undefined;
|
|
472
|
-
} catch {
|
|
473
|
-
return undefined;
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
/**
|
|
478
|
-
* Find a server by model name
|
|
479
|
-
*/
|
|
480
|
-
private async findServerForModel(modelName: string): Promise<ServerConfig | null> {
|
|
481
|
-
const servers = await this.getAllServers();
|
|
482
|
-
|
|
483
|
-
// Normalize a model name for flexible matching (lowercase, no extension, normalize separators)
|
|
484
|
-
const normalize = (name: string): string => {
|
|
485
|
-
return name
|
|
486
|
-
.toLowerCase()
|
|
487
|
-
.replace(/\.gguf$/i, '')
|
|
488
|
-
.replace(/[_-]/g, '-'); // Normalize underscores and hyphens to hyphens
|
|
489
|
-
};
|
|
490
|
-
|
|
491
|
-
const normalizedRequest = normalize(modelName);
|
|
492
|
-
|
|
493
|
-
// Try exact match first
|
|
494
|
-
const exactMatch = servers.find(s => s.modelName === modelName);
|
|
495
|
-
if (exactMatch) return exactMatch;
|
|
496
|
-
|
|
497
|
-
// Try case-insensitive match
|
|
498
|
-
const caseInsensitiveMatch = servers.find(
|
|
499
|
-
s => s.modelName.toLowerCase() === modelName.toLowerCase()
|
|
500
|
-
);
|
|
501
|
-
if (caseInsensitiveMatch) return caseInsensitiveMatch;
|
|
502
|
-
|
|
503
|
-
// Try adding .gguf extension if not present
|
|
504
|
-
if (!modelName.endsWith('.gguf')) {
|
|
505
|
-
const withExtension = modelName + '.gguf';
|
|
506
|
-
const extensionMatch = servers.find(
|
|
507
|
-
s => s.modelName.toLowerCase() === withExtension.toLowerCase()
|
|
508
|
-
);
|
|
509
|
-
if (extensionMatch) return extensionMatch;
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
// Try normalized matching (handles case, extension, and underscore/hyphen variations)
|
|
513
|
-
const normalizedMatch = servers.find(
|
|
514
|
-
s => normalize(s.modelName) === normalizedRequest
|
|
515
|
-
);
|
|
516
|
-
if (normalizedMatch) return normalizedMatch;
|
|
517
|
-
|
|
518
|
-
return null;
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
// Main entry point
|
|
523
|
-
async function main() {
|
|
524
|
-
try {
|
|
525
|
-
const server = new RouterServer();
|
|
526
|
-
await server.start();
|
|
527
|
-
} catch (error) {
|
|
528
|
-
console.error('[Router] Failed to start:', error);
|
|
529
|
-
process.exit(1);
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
// Only run if this is the main module
|
|
534
|
-
if (require.main === module) {
|
|
535
|
-
main();
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
export { RouterServer };
|