@appkit/llamacpp-cli 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +171 -42
  3. package/dist/cli.js +75 -10
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/completion.d.ts +9 -0
  6. package/dist/commands/completion.d.ts.map +1 -0
  7. package/dist/commands/completion.js +83 -0
  8. package/dist/commands/completion.js.map +1 -0
  9. package/dist/commands/monitor.js +1 -1
  10. package/dist/commands/monitor.js.map +1 -1
  11. package/dist/commands/ps.d.ts +1 -3
  12. package/dist/commands/ps.d.ts.map +1 -1
  13. package/dist/commands/ps.js +36 -115
  14. package/dist/commands/ps.js.map +1 -1
  15. package/dist/commands/router/config.d.ts +1 -0
  16. package/dist/commands/router/config.d.ts.map +1 -1
  17. package/dist/commands/router/config.js +7 -2
  18. package/dist/commands/router/config.js.map +1 -1
  19. package/dist/commands/router/logs.d.ts +12 -0
  20. package/dist/commands/router/logs.d.ts.map +1 -0
  21. package/dist/commands/router/logs.js +238 -0
  22. package/dist/commands/router/logs.js.map +1 -0
  23. package/dist/commands/tui.d.ts +2 -0
  24. package/dist/commands/tui.d.ts.map +1 -0
  25. package/dist/commands/tui.js +27 -0
  26. package/dist/commands/tui.js.map +1 -0
  27. package/dist/lib/completion.d.ts +5 -0
  28. package/dist/lib/completion.d.ts.map +1 -0
  29. package/dist/lib/completion.js +195 -0
  30. package/dist/lib/completion.js.map +1 -0
  31. package/dist/lib/model-downloader.d.ts +5 -1
  32. package/dist/lib/model-downloader.d.ts.map +1 -1
  33. package/dist/lib/model-downloader.js +53 -20
  34. package/dist/lib/model-downloader.js.map +1 -1
  35. package/dist/lib/router-logger.d.ts +61 -0
  36. package/dist/lib/router-logger.d.ts.map +1 -0
  37. package/dist/lib/router-logger.js +200 -0
  38. package/dist/lib/router-logger.js.map +1 -0
  39. package/dist/lib/router-manager.d.ts.map +1 -1
  40. package/dist/lib/router-manager.js +1 -0
  41. package/dist/lib/router-manager.js.map +1 -1
  42. package/dist/lib/router-server.d.ts +9 -0
  43. package/dist/lib/router-server.d.ts.map +1 -1
  44. package/dist/lib/router-server.js +169 -57
  45. package/dist/lib/router-server.js.map +1 -1
  46. package/dist/tui/ConfigApp.d.ts +7 -0
  47. package/dist/tui/ConfigApp.d.ts.map +1 -0
  48. package/dist/tui/ConfigApp.js +1002 -0
  49. package/dist/tui/ConfigApp.js.map +1 -0
  50. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -1
  51. package/dist/tui/HistoricalMonitorApp.js +85 -49
  52. package/dist/tui/HistoricalMonitorApp.js.map +1 -1
  53. package/dist/tui/ModelsApp.d.ts +7 -0
  54. package/dist/tui/ModelsApp.d.ts.map +1 -0
  55. package/dist/tui/ModelsApp.js +362 -0
  56. package/dist/tui/ModelsApp.js.map +1 -0
  57. package/dist/tui/MultiServerMonitorApp.d.ts +6 -1
  58. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
  59. package/dist/tui/MultiServerMonitorApp.js +1038 -122
  60. package/dist/tui/MultiServerMonitorApp.js.map +1 -1
  61. package/dist/tui/RootNavigator.d.ts +7 -0
  62. package/dist/tui/RootNavigator.d.ts.map +1 -0
  63. package/dist/tui/RootNavigator.js +55 -0
  64. package/dist/tui/RootNavigator.js.map +1 -0
  65. package/dist/tui/SearchApp.d.ts +6 -0
  66. package/dist/tui/SearchApp.d.ts.map +1 -0
  67. package/dist/tui/SearchApp.js +451 -0
  68. package/dist/tui/SearchApp.js.map +1 -0
  69. package/dist/tui/SplashScreen.d.ts +16 -0
  70. package/dist/tui/SplashScreen.d.ts.map +1 -0
  71. package/dist/tui/SplashScreen.js +129 -0
  72. package/dist/tui/SplashScreen.js.map +1 -0
  73. package/dist/types/router-config.d.ts +1 -0
  74. package/dist/types/router-config.d.ts.map +1 -1
  75. package/package.json +1 -1
  76. package/src/cli.ts +41 -10
  77. package/src/commands/monitor.ts +1 -1
  78. package/src/commands/ps.ts +44 -133
  79. package/src/commands/router/config.ts +9 -2
  80. package/src/commands/router/logs.ts +256 -0
  81. package/src/commands/tui.ts +25 -0
  82. package/src/lib/model-downloader.ts +57 -20
  83. package/src/lib/router-logger.ts +201 -0
  84. package/src/lib/router-manager.ts +1 -0
  85. package/src/lib/router-server.ts +193 -62
  86. package/src/tui/ConfigApp.ts +1085 -0
  87. package/src/tui/HistoricalMonitorApp.ts +88 -49
  88. package/src/tui/ModelsApp.ts +368 -0
  89. package/src/tui/MultiServerMonitorApp.ts +1163 -122
  90. package/src/tui/RootNavigator.ts +74 -0
  91. package/src/tui/SearchApp.ts +511 -0
  92. package/src/tui/SplashScreen.ts +149 -0
  93. package/src/types/router-config.ts +1 -0
@@ -13,6 +13,11 @@ export interface DownloadProgress {
13
13
  speed: string;
14
14
  }
15
15
 
16
+ export interface DownloadOptions {
17
+ silent?: boolean; // Suppress console output (for TUI)
18
+ signal?: AbortSignal; // Abort signal for cancellation
19
+ }
20
+
16
21
  export class ModelDownloader {
17
22
  private modelsDir?: string;
18
23
  private getModelsDirFn?: () => Promise<string>;
@@ -68,7 +73,8 @@ export class ModelDownloader {
68
73
  private downloadFile(
69
74
  url: string,
70
75
  destPath: string,
71
- onProgress?: (downloaded: number, total: number) => void
76
+ onProgress?: (downloaded: number, total: number) => void,
77
+ signal?: AbortSignal
72
78
  ): Promise<void> {
73
79
  return new Promise((resolve, reject) => {
74
80
  const file = fs.createWriteStream(destPath);
@@ -77,6 +83,7 @@ export class ModelDownloader {
77
83
  let lastUpdateTime = Date.now();
78
84
  let lastDownloadedBytes = 0;
79
85
  let completed = false;
86
+ let request: ReturnType<typeof https.get> | null = null;
80
87
 
81
88
  const cleanup = (sigintHandler?: () => void) => {
82
89
  if (sigintHandler) {
@@ -95,22 +102,37 @@ export class ModelDownloader {
95
102
  };
96
103
 
97
104
  const sigintHandler = () => {
98
- request.destroy();
105
+ if (request) request.destroy();
99
106
  handleError(new Error('Download interrupted by user'), sigintHandler);
100
107
  };
101
108
 
102
- const request = https.get(url, { agent: new https.Agent({ keepAlive: false }) }, (response) => {
109
+ // Handle abort signal
110
+ const abortHandler = () => {
111
+ if (request) request.destroy();
112
+ handleError(new Error('Download cancelled'), sigintHandler);
113
+ };
114
+
115
+ if (signal) {
116
+ if (signal.aborted) {
117
+ handleError(new Error('Download cancelled'), sigintHandler);
118
+ return;
119
+ }
120
+ signal.addEventListener('abort', abortHandler, { once: true });
121
+ }
122
+
123
+ request = https.get(url, { agent: new https.Agent({ keepAlive: false }) }, (response) => {
103
124
  // Handle redirects (301, 302, 307, 308)
104
125
  if (response.statusCode === 301 || response.statusCode === 302 ||
105
126
  response.statusCode === 307 || response.statusCode === 308) {
106
127
  const redirectUrl = response.headers.location;
107
128
  if (redirectUrl) {
108
129
  cleanup(sigintHandler);
130
+ if (signal) signal.removeEventListener('abort', abortHandler);
109
131
  // Wait for file to close before starting new download
110
132
  file.close(() => {
111
133
  fs.unlink(destPath, () => {
112
134
  // Start recursive download only after cleanup is complete
113
- this.downloadFile(redirectUrl, destPath, onProgress)
135
+ this.downloadFile(redirectUrl, destPath, onProgress, signal)
114
136
  .then(resolve)
115
137
  .catch(reject);
116
138
  });
@@ -154,6 +176,7 @@ export class ModelDownloader {
154
176
  // Use callback to ensure close completes before resolving
155
177
  file.close((err) => {
156
178
  cleanup(sigintHandler);
179
+ if (signal) signal.removeEventListener('abort', abortHandler);
157
180
  if (err) reject(err);
158
181
  else resolve();
159
182
  });
@@ -161,10 +184,12 @@ export class ModelDownloader {
161
184
  });
162
185
 
163
186
  request.on('error', (err) => {
187
+ if (signal) signal.removeEventListener('abort', abortHandler);
164
188
  handleError(err, sigintHandler);
165
189
  });
166
190
 
167
191
  file.on('error', (err) => {
192
+ if (signal) signal.removeEventListener('abort', abortHandler);
168
193
  handleError(err, sigintHandler);
169
194
  });
170
195
 
@@ -200,15 +225,21 @@ export class ModelDownloader {
200
225
  repoId: string,
201
226
  filename: string,
202
227
  onProgress?: (progress: DownloadProgress) => void,
203
- modelsDir?: string
228
+ modelsDir?: string,
229
+ options?: DownloadOptions
204
230
  ): Promise<string> {
231
+ const silent = options?.silent ?? false;
232
+ const signal = options?.signal;
233
+
205
234
  // Use provided models directory or get from config
206
235
  const targetDir = modelsDir || await this.getModelsDirectory();
207
236
 
208
- console.log(chalk.blue(`📥 Downloading ${filename} from Hugging Face...`));
209
- console.log(chalk.dim(`Repository: ${repoId}`));
210
- console.log(chalk.dim(`Destination: ${targetDir}`));
211
- console.log();
237
+ if (!silent) {
238
+ console.log(chalk.blue(`📥 Downloading ${filename} from Hugging Face...`));
239
+ console.log(chalk.dim(`Repository: ${repoId}`));
240
+ console.log(chalk.dim(`Destination: ${targetDir}`));
241
+ console.log();
242
+ }
212
243
 
213
244
  // Build download URL
214
245
  const url = this.buildDownloadUrl(repoId, filename);
@@ -216,8 +247,10 @@ export class ModelDownloader {
216
247
 
217
248
  // Check if file already exists
218
249
  if (fs.existsSync(destPath)) {
219
- console.log(chalk.yellow(`⚠️ File already exists: ${filename}`));
220
- console.log(chalk.dim(' Remove it first or choose a different filename'));
250
+ if (!silent) {
251
+ console.log(chalk.yellow(`⚠️ File already exists: ${filename}`));
252
+ console.log(chalk.dim(' Remove it first or choose a different filename'));
253
+ }
221
254
  throw new Error('File already exists');
222
255
  }
223
256
 
@@ -237,8 +270,10 @@ export class ModelDownloader {
237
270
  lastTime = now;
238
271
  lastDownloaded = downloaded;
239
272
 
240
- // Display progress bar
241
- this.displayProgress(downloaded, total, filename);
273
+ // Display progress bar (only if not silent)
274
+ if (!silent) {
275
+ this.displayProgress(downloaded, total, filename);
276
+ }
242
277
 
243
278
  // Call user progress callback if provided
244
279
  if (onProgress) {
@@ -250,15 +285,17 @@ export class ModelDownloader {
250
285
  speed: `${formatBytes(speed)}/s`,
251
286
  });
252
287
  }
253
- });
288
+ }, signal);
254
289
 
255
- // Clear progress line and show completion
256
- process.stdout.write('\r\x1b[K');
257
- console.log(chalk.green('✅ Download complete!'));
290
+ if (!silent) {
291
+ // Clear progress line and show completion
292
+ process.stdout.write('\r\x1b[K');
293
+ console.log(chalk.green('✅ Download complete!'));
258
294
 
259
- const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
260
- console.log(chalk.dim(` Time: ${totalTime}s`));
261
- console.log(chalk.dim(` Location: ${destPath}`));
295
+ const totalTime = ((Date.now() - startTime) / 1000).toFixed(1);
296
+ console.log(chalk.dim(` Time: ${totalTime}s`));
297
+ console.log(chalk.dim(` Location: ${destPath}`));
298
+ }
262
299
 
263
300
  return destPath;
264
301
  }
@@ -0,0 +1,201 @@
1
+ import * as fs from 'fs/promises';
2
+ import * as path from 'path';
3
+ import { getLogsDir } from '../utils/file-utils';
4
+
5
+ export interface RouterLogEntry {
6
+ timestamp: string;
7
+ model: string;
8
+ endpoint: string;
9
+ method: string;
10
+ status: 'success' | 'error';
11
+ statusCode: number;
12
+ durationMs: number;
13
+ error?: string;
14
+ backend?: string; // e.g., "localhost:9001"
15
+ prompt?: string; // First part of the prompt/message
16
+ }
17
+
18
+ export class RouterLogger {
19
+ private logFilePath: string;
20
+ private verbose: boolean;
21
+
22
+ constructor(verbose: boolean = false) {
23
+ this.verbose = verbose;
24
+ this.logFilePath = path.join(getLogsDir(), 'router.log');
25
+ }
26
+
27
+ /**
28
+ * Log a request with timing and outcome
29
+ */
30
+ async logRequest(entry: RouterLogEntry): Promise<void> {
31
+ // Human-readable format for console
32
+ const humanLog = this.formatHumanReadable(entry);
33
+
34
+ // Output request activity to stdout (separate from system messages on stderr)
35
+ console.log(humanLog);
36
+
37
+ // Verbose mode: append detailed JSON to log file
38
+ if (this.verbose) {
39
+ const jsonLog = JSON.stringify(entry) + '\n';
40
+ try {
41
+ await fs.appendFile(this.logFilePath, jsonLog, 'utf-8');
42
+ } catch (error) {
43
+ console.error('[Router Logger] Failed to write to log file:', error);
44
+ }
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Format log entry for human reading (console output)
50
+ */
51
+ private formatHumanReadable(entry: RouterLogEntry): string {
52
+ const { timestamp, model, endpoint, method, status, statusCode, durationMs, error, backend, prompt } = entry;
53
+
54
+ // Color coding based on status (using ANSI codes)
55
+ const statusColor = status === 'success' ? '\x1b[32m' : '\x1b[31m'; // Green or Red
56
+ const resetColor = '\x1b[0m';
57
+
58
+ // Base log format (no [Router] prefix, no icons)
59
+ let log = `${statusColor}${statusCode}${resetColor} ${method} ${endpoint} → ${model}`;
60
+
61
+ // Add backend if available
62
+ if (backend) {
63
+ log += ` (${backend})`;
64
+ }
65
+
66
+ // Add duration
67
+ log += ` ${durationMs}ms`;
68
+
69
+ // Add prompt preview if available
70
+ if (prompt) {
71
+ log += ` | "${prompt}"`;
72
+ }
73
+
74
+ // Add error if present
75
+ if (error) {
76
+ log += ` | Error: ${error}`;
77
+ }
78
+
79
+ return log;
80
+ }
81
+
82
+ /**
83
+ * Format log entry for LLM parsing (verbose JSON format)
84
+ */
85
+ static formatForLLM(entry: RouterLogEntry): string {
86
+ return JSON.stringify(entry, null, 2);
87
+ }
88
+
89
+ /**
90
+ * Read log file and return all entries (for verbose mode)
91
+ */
92
+ async readLogs(limit?: number): Promise<RouterLogEntry[]> {
93
+ try {
94
+ const content = await fs.readFile(this.logFilePath, 'utf-8');
95
+ const lines = content.trim().split('\n').filter(line => line);
96
+
97
+ // Parse JSON entries
98
+ const entries = lines
99
+ .map(line => {
100
+ try {
101
+ return JSON.parse(line) as RouterLogEntry;
102
+ } catch {
103
+ return null;
104
+ }
105
+ })
106
+ .filter((entry): entry is RouterLogEntry => entry !== null);
107
+
108
+ // Apply limit if specified
109
+ if (limit && limit > 0) {
110
+ return entries.slice(-limit);
111
+ }
112
+
113
+ return entries;
114
+ } catch (error) {
115
+ // Log file doesn't exist or can't be read
116
+ return [];
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Clear the log file
122
+ */
123
+ async clearLogs(): Promise<void> {
124
+ try {
125
+ await fs.writeFile(this.logFilePath, '', 'utf-8');
126
+ console.error('[Router Logger] Log file cleared');
127
+ } catch (error) {
128
+ console.error('[Router Logger] Failed to clear log file:', error);
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Get log file size
134
+ */
135
+ async getLogFileSize(): Promise<number> {
136
+ try {
137
+ const stats = await fs.stat(this.logFilePath);
138
+ return stats.size;
139
+ } catch {
140
+ return 0;
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Rotate log file if it exceeds threshold
146
+ */
147
+ async rotateIfNeeded(thresholdMB: number = 100): Promise<boolean> {
148
+ const size = await this.getLogFileSize();
149
+ const thresholdBytes = thresholdMB * 1024 * 1024;
150
+
151
+ if (size > thresholdBytes) {
152
+ try {
153
+ // Generate timestamp
154
+ const timestamp = new Date()
155
+ .toISOString()
156
+ .replace(/T/, '-')
157
+ .replace(/:/g, '-')
158
+ .replace(/\..+/, '');
159
+
160
+ const logsDir = getLogsDir();
161
+ const archivedPath = path.join(logsDir, `router.${timestamp}.log`);
162
+
163
+ // Rename current log to archived version
164
+ await fs.rename(this.logFilePath, archivedPath);
165
+
166
+ console.error(`[Router Logger] Rotated log file to ${archivedPath}`);
167
+ return true;
168
+ } catch (error) {
169
+ console.error('[Router Logger] Failed to rotate log file:', error);
170
+ return false;
171
+ }
172
+ }
173
+
174
+ return false;
175
+ }
176
+ }
177
+
178
+ /**
179
+ * Utility class for tracking request timing
180
+ */
181
+ export class RequestTimer {
182
+ private startTime: number;
183
+
184
+ constructor() {
185
+ this.startTime = Date.now();
186
+ }
187
+
188
+ /**
189
+ * Get elapsed time in milliseconds
190
+ */
191
+ elapsed(): number {
192
+ return Date.now() - this.startTime;
193
+ }
194
+
195
+ /**
196
+ * Get current ISO timestamp
197
+ */
198
+ static now(): string {
199
+ return new Date().toISOString();
200
+ }
201
+ }
@@ -56,6 +56,7 @@ export class RouterManager {
56
56
  stderrPath: path.join(this.logsDir, 'router.stderr'),
57
57
  healthCheckInterval: 5000,
58
58
  requestTimeout: 120000,
59
+ verbose: false,
59
60
  status: 'stopped',
60
61
  createdAt: new Date().toISOString(),
61
62
  };
@@ -8,6 +8,7 @@ import * as path from 'path';
8
8
  import { RouterConfig } from '../types/router-config';
9
9
  import { ServerConfig } from '../types/server-config';
10
10
  import { readJson, fileExists, getConfigDir, getServersDir } from '../utils/file-utils';
11
+ import { RouterLogger, RequestTimer, RouterLogEntry } from './router-logger';
11
12
 
12
13
  interface ErrorResponse {
13
14
  error: string;
@@ -32,6 +33,7 @@ interface ModelsResponse {
32
33
  class RouterServer {
33
34
  private config!: RouterConfig;
34
35
  private server!: http.Server;
36
+ private logger!: RouterLogger;
35
37
 
36
38
  async initialize(): Promise<void> {
37
39
  // Load router config
@@ -41,6 +43,12 @@ class RouterServer {
41
43
  }
42
44
  this.config = await readJson<RouterConfig>(configPath);
43
45
 
46
+ // Initialize logger with verbose setting
47
+ this.logger = new RouterLogger(this.config.verbose);
48
+
49
+ // Rotate log file if needed
50
+ await this.logger.rotateIfNeeded();
51
+
44
52
  // Create HTTP server
45
53
  this.server = http.createServer(async (req, res) => {
46
54
  await this.handleRequest(req, res);
@@ -77,9 +85,6 @@ class RouterServer {
77
85
  * Main request handler
78
86
  */
79
87
  private async handleRequest(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
80
- // Log request
81
- console.error(`[Router] ${req.method} ${req.url}`);
82
-
83
88
  // CORS headers
84
89
  res.setHeader('Access-Control-Allow-Origin', '*');
85
90
  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
@@ -150,82 +155,145 @@ class RouterServer {
150
155
  * Chat completions endpoint - route to backend server
151
156
  */
152
157
  private async handleChatCompletions(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
153
- // Parse request body
154
- const body = await this.readBody(req);
155
- let requestData: any;
158
+ const timer = new RequestTimer();
159
+ let modelName = 'unknown';
160
+ let statusCode = 500;
161
+ let errorMsg: string | undefined;
162
+ let promptPreview: string | undefined;
163
+
156
164
  try {
157
- requestData = JSON.parse(body);
158
- } catch (error) {
159
- this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
160
- return;
161
- }
165
+ // Parse request body
166
+ const body = await this.readBody(req);
167
+ let requestData: any;
168
+ try {
169
+ requestData = JSON.parse(body);
170
+ } catch (error) {
171
+ statusCode = 400;
172
+ errorMsg = 'Invalid JSON in request body';
173
+ this.sendError(res, statusCode, 'Bad Request', errorMsg);
174
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg);
175
+ return;
176
+ }
162
177
 
163
- // Extract model name
164
- const modelName = requestData.model;
165
- if (!modelName) {
166
- this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
167
- return;
168
- }
178
+ // Extract model name and prompt preview
179
+ modelName = requestData.model || 'unknown';
180
+ promptPreview = this.extractPromptPreview(requestData);
169
181
 
170
- // Find server for model
171
- const server = await this.findServerForModel(modelName);
172
- if (!server) {
173
- this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
174
- return;
175
- }
182
+ if (!requestData.model) {
183
+ statusCode = 400;
184
+ errorMsg = 'Missing "model" field in request';
185
+ this.sendError(res, statusCode, 'Bad Request', errorMsg);
186
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
187
+ return;
188
+ }
176
189
 
177
- if (server.status !== 'running') {
178
- this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
179
- return;
180
- }
190
+ // Find server for model
191
+ const server = await this.findServerForModel(modelName);
192
+ if (!server) {
193
+ statusCode = 404;
194
+ errorMsg = `No server found for model: ${modelName}`;
195
+ this.sendError(res, statusCode, 'Not Found', errorMsg);
196
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
197
+ return;
198
+ }
181
199
 
182
- // Proxy request to backend
183
- const backendUrl = `http://${server.host}:${server.port}/v1/chat/completions`;
184
- await this.proxyRequest(backendUrl, requestData, req, res);
200
+ if (server.status !== 'running') {
201
+ statusCode = 503;
202
+ errorMsg = `Server for model "${modelName}" is not running`;
203
+ this.sendError(res, statusCode, 'Service Unavailable', errorMsg);
204
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
205
+ return;
206
+ }
207
+
208
+ // Proxy request to backend
209
+ const backendUrl = `http://${server.host}:${server.port}/v1/chat/completions`;
210
+ await this.proxyRequest(backendUrl, requestData, req, res);
211
+
212
+ // Log success
213
+ statusCode = 200;
214
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
215
+ } catch (error) {
216
+ errorMsg = (error as Error).message;
217
+ await this.logRequest(modelName, '/v1/chat/completions', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
218
+ throw error;
219
+ }
185
220
  }
186
221
 
187
222
  /**
188
223
  * Embeddings endpoint - route to backend server
189
224
  */
190
225
  private async handleEmbeddings(req: http.IncomingMessage, res: http.ServerResponse): Promise<void> {
191
- // Parse request body
192
- const body = await this.readBody(req);
193
- let requestData: any;
226
+ const timer = new RequestTimer();
227
+ let modelName = 'unknown';
228
+ let statusCode = 500;
229
+ let errorMsg: string | undefined;
230
+ let promptPreview: string | undefined;
231
+
194
232
  try {
195
- requestData = JSON.parse(body);
196
- } catch (error) {
197
- this.sendError(res, 400, 'Bad Request', 'Invalid JSON in request body');
198
- return;
199
- }
233
+ // Parse request body
234
+ const body = await this.readBody(req);
235
+ let requestData: any;
236
+ try {
237
+ requestData = JSON.parse(body);
238
+ } catch (error) {
239
+ statusCode = 400;
240
+ errorMsg = 'Invalid JSON in request body';
241
+ this.sendError(res, statusCode, 'Bad Request', errorMsg);
242
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg);
243
+ return;
244
+ }
200
245
 
201
- // Extract model name
202
- const modelName = requestData.model;
203
- if (!modelName) {
204
- this.sendError(res, 400, 'Bad Request', 'Missing "model" field in request');
205
- return;
206
- }
246
+ // Extract model name and prompt preview
247
+ modelName = requestData.model || 'unknown';
248
+ promptPreview = this.extractPromptPreview(requestData);
207
249
 
208
- // Find server for model
209
- const server = await this.findServerForModel(modelName);
210
- if (!server) {
211
- this.sendError(res, 404, 'Not Found', `No server found for model: ${modelName}`);
212
- return;
213
- }
250
+ if (!requestData.model) {
251
+ statusCode = 400;
252
+ errorMsg = 'Missing "model" field in request';
253
+ this.sendError(res, statusCode, 'Bad Request', errorMsg);
254
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
255
+ return;
256
+ }
214
257
 
215
- if (server.status !== 'running') {
216
- this.sendError(res, 503, 'Service Unavailable', `Server for model "${modelName}" is not running`);
217
- return;
218
- }
258
+ // Find server for model
259
+ const server = await this.findServerForModel(modelName);
260
+ if (!server) {
261
+ statusCode = 404;
262
+ errorMsg = `No server found for model: ${modelName}`;
263
+ this.sendError(res, statusCode, 'Not Found', errorMsg);
264
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
265
+ return;
266
+ }
219
267
 
220
- // Check if server has embeddings enabled
221
- if (!server.embeddings) {
222
- this.sendError(res, 400, 'Bad Request', `Server for model "${modelName}" does not have embeddings enabled`);
223
- return;
224
- }
268
+ if (server.status !== 'running') {
269
+ statusCode = 503;
270
+ errorMsg = `Server for model "${modelName}" is not running`;
271
+ this.sendError(res, statusCode, 'Service Unavailable', errorMsg);
272
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
273
+ return;
274
+ }
275
+
276
+ // Check if server has embeddings enabled
277
+ if (!server.embeddings) {
278
+ statusCode = 400;
279
+ errorMsg = `Server for model "${modelName}" does not have embeddings enabled`;
280
+ this.sendError(res, statusCode, 'Bad Request', errorMsg);
281
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, `${server.host}:${server.port}`, promptPreview);
282
+ return;
283
+ }
225
284
 
226
- // Proxy request to backend
227
- const backendUrl = `http://${server.host}:${server.port}/v1/embeddings`;
228
- await this.proxyRequest(backendUrl, requestData, req, res);
285
+ // Proxy request to backend
286
+ const backendUrl = `http://${server.host}:${server.port}/v1/embeddings`;
287
+ await this.proxyRequest(backendUrl, requestData, req, res);
288
+
289
+ // Log success
290
+ statusCode = 200;
291
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
292
+ } catch (error) {
293
+ errorMsg = (error as Error).message;
294
+ await this.logRequest(modelName, '/v1/embeddings', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
295
+ throw error;
296
+ }
229
297
  }
230
298
 
231
299
  /**
@@ -343,6 +411,69 @@ class RouterServer {
343
411
  }
344
412
  }
345
413
 
414
+ /**
415
+ * Helper method to log a request
416
+ */
417
+ private async logRequest(
418
+ model: string,
419
+ endpoint: string,
420
+ statusCode: number,
421
+ durationMs: number,
422
+ error?: string,
423
+ backend?: string,
424
+ prompt?: string
425
+ ): Promise<void> {
426
+ const entry: RouterLogEntry = {
427
+ timestamp: RequestTimer.now(),
428
+ model,
429
+ endpoint,
430
+ method: 'POST',
431
+ status: statusCode >= 200 && statusCode < 300 ? 'success' : 'error',
432
+ statusCode,
433
+ durationMs,
434
+ error,
435
+ backend,
436
+ prompt,
437
+ };
438
+
439
+ await this.logger.logRequest(entry);
440
+ }
441
+
442
+ /**
443
+ * Extract prompt preview from request data (first 50 chars)
444
+ */
445
+ private extractPromptPreview(requestData: any): string | undefined {
446
+ try {
447
+ // For chat completions, get the last user message
448
+ if (requestData.messages && Array.isArray(requestData.messages)) {
449
+ const lastUserMessage = [...requestData.messages]
450
+ .reverse()
451
+ .find((msg: any) => msg.role === 'user');
452
+
453
+ if (lastUserMessage?.content) {
454
+ const content = typeof lastUserMessage.content === 'string'
455
+ ? lastUserMessage.content
456
+ : JSON.stringify(lastUserMessage.content);
457
+ return content.substring(0, 50).replace(/\n/g, ' ');
458
+ }
459
+ }
460
+
461
+ // For embeddings, get the input text
462
+ if (requestData.input) {
463
+ const input = typeof requestData.input === 'string'
464
+ ? requestData.input
465
+ : Array.isArray(requestData.input)
466
+ ? requestData.input[0]
467
+ : JSON.stringify(requestData.input);
468
+ return input.substring(0, 50).replace(/\n/g, ' ');
469
+ }
470
+
471
+ return undefined;
472
+ } catch {
473
+ return undefined;
474
+ }
475
+ }
476
+
346
477
  /**
347
478
  * Find a server by model name
348
479
  */