@appkit/llamacpp-cli 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +87 -1
  3. package/dist/cli.js +14 -0
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/monitor.d.ts +2 -0
  6. package/dist/commands/monitor.d.ts.map +1 -0
  7. package/dist/commands/monitor.js +76 -0
  8. package/dist/commands/monitor.js.map +1 -0
  9. package/dist/lib/metrics-aggregator.d.ts +39 -0
  10. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  11. package/dist/lib/metrics-aggregator.js +200 -0
  12. package/dist/lib/metrics-aggregator.js.map +1 -0
  13. package/dist/lib/system-collector.d.ts +75 -0
  14. package/dist/lib/system-collector.d.ts.map +1 -0
  15. package/dist/lib/system-collector.js +310 -0
  16. package/dist/lib/system-collector.js.map +1 -0
  17. package/dist/tui/MonitorApp.d.ts +4 -0
  18. package/dist/tui/MonitorApp.d.ts.map +1 -0
  19. package/dist/tui/MonitorApp.js +293 -0
  20. package/dist/tui/MonitorApp.js.map +1 -0
  21. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  22. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  23. package/dist/tui/MultiServerMonitorApp.js +496 -0
  24. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  25. package/dist/tui/components/ErrorState.d.ts +8 -0
  26. package/dist/tui/components/ErrorState.d.ts.map +1 -0
  27. package/dist/tui/components/ErrorState.js +22 -0
  28. package/dist/tui/components/ErrorState.js.map +1 -0
  29. package/dist/tui/components/LoadingState.d.ts +8 -0
  30. package/dist/tui/components/LoadingState.d.ts.map +1 -0
  31. package/dist/tui/components/LoadingState.js +21 -0
  32. package/dist/tui/components/LoadingState.js.map +1 -0
  33. package/dist/types/monitor-types.d.ts +122 -0
  34. package/dist/types/monitor-types.d.ts.map +1 -0
  35. package/dist/types/monitor-types.js +3 -0
  36. package/dist/types/monitor-types.js.map +1 -0
  37. package/dist/utils/process-utils.d.ts +16 -1
  38. package/dist/utils/process-utils.d.ts.map +1 -1
  39. package/dist/utils/process-utils.js +144 -27
  40. package/dist/utils/process-utils.js.map +1 -1
  41. package/package.json +4 -2
  42. package/src/cli.ts +14 -0
  43. package/src/commands/monitor.ts +90 -0
  44. package/src/lib/metrics-aggregator.ts +244 -0
  45. package/src/lib/system-collector.ts +312 -0
  46. package/src/tui/MonitorApp.ts +361 -0
  47. package/src/tui/MultiServerMonitorApp.ts +547 -0
  48. package/src/types/monitor-types.ts +161 -0
  49. package/src/utils/process-utils.ts +160 -26
@@ -0,0 +1,547 @@
1
+ import blessed from 'blessed';
2
+ import { ServerConfig } from '../types/server-config.js';
3
+ import { MetricsAggregator } from '../lib/metrics-aggregator.js';
4
+ import { SystemCollector } from '../lib/system-collector.js';
5
+ import { MonitorData, SystemMetrics } from '../types/monitor-types.js';
6
+
7
+ type ViewMode = 'list' | 'detail';
8
+
9
+ interface ServerMonitorData {
10
+ server: ServerConfig;
11
+ data: MonitorData | null;
12
+ error: string | null;
13
+ }
14
+
15
+ export async function createMultiServerMonitorUI(
16
+ screen: blessed.Widgets.Screen,
17
+ servers: ServerConfig[]
18
+ ): Promise<void> {
19
+ let updateInterval = 2000;
20
+ let intervalId: NodeJS.Timeout | null = null;
21
+ let viewMode: ViewMode = 'list';
22
+ let selectedServerIndex = 0;
23
+ let isLoading = false;
24
+ let lastSystemMetrics: SystemMetrics | null = null;
25
+
26
+ // Spinner animation
27
+ const spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
28
+ let spinnerFrameIndex = 0;
29
+ let spinnerIntervalId: NodeJS.Timeout | null = null;
30
+
31
+ const systemCollector = new SystemCollector();
32
+ const aggregators = new Map<string, MetricsAggregator>();
33
+ const serverDataMap = new Map<string, ServerMonitorData>();
34
+
35
+ // Initialize aggregators for each server
36
+ for (const server of servers) {
37
+ aggregators.set(server.id, new MetricsAggregator(server));
38
+ serverDataMap.set(server.id, {
39
+ server,
40
+ data: null,
41
+ error: null,
42
+ });
43
+ }
44
+
45
+ // Single scrollable content box
46
+ const contentBox = blessed.box({
47
+ top: 0,
48
+ left: 0,
49
+ width: '100%',
50
+ height: '100%',
51
+ tags: true,
52
+ scrollable: true,
53
+ alwaysScroll: true,
54
+ keys: true,
55
+ vi: true,
56
+ mouse: true,
57
+ scrollbar: {
58
+ ch: '█',
59
+ style: {
60
+ fg: 'blue',
61
+ },
62
+ },
63
+ });
64
+ screen.append(contentBox);
65
+
66
+ // Helper to create progress bar
67
+ function createProgressBar(percentage: number, width: number = 30): string {
68
+ const filled = Math.round((percentage / 100) * width);
69
+ const empty = width - filled;
70
+ return '[' + '█'.repeat(Math.max(0, filled)) + '░'.repeat(Math.max(0, empty)) + ']';
71
+ }
72
+
73
+ // Render system resources section
74
+ function renderSystemResources(systemMetrics: SystemMetrics | null): string {
75
+ let content = '';
76
+
77
+ content += '{bold}System Resources{/bold}\n';
78
+ const termWidth = (screen.width as number) || 80;
79
+ const divider = '─'.repeat(termWidth - 2);
80
+ content += divider + '\n';
81
+
82
+ if (systemMetrics) {
83
+ if (systemMetrics.gpuUsage !== undefined) {
84
+ const bar = createProgressBar(systemMetrics.gpuUsage);
85
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.gpuUsage)}%`;
86
+
87
+ if (systemMetrics.temperature !== undefined) {
88
+ content += ` - ${Math.round(systemMetrics.temperature)}°C`;
89
+ }
90
+
91
+ content += '\n';
92
+ }
93
+
94
+ if (systemMetrics.cpuUsage !== undefined) {
95
+ const bar = createProgressBar(systemMetrics.cpuUsage);
96
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.cpuUsage)}%\n`;
97
+ }
98
+
99
+ if (systemMetrics.aneUsage !== undefined && systemMetrics.aneUsage > 1) {
100
+ const bar = createProgressBar(systemMetrics.aneUsage);
101
+ content += `ANE: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.aneUsage)}%\n`;
102
+ }
103
+
104
+ if (systemMetrics.memoryTotal > 0) {
105
+ const memoryUsedGB = systemMetrics.memoryUsed / (1024 ** 3);
106
+ const memoryTotalGB = systemMetrics.memoryTotal / (1024 ** 3);
107
+ const memoryPercentage = (systemMetrics.memoryUsed / systemMetrics.memoryTotal) * 100;
108
+ const bar = createProgressBar(memoryPercentage);
109
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${Math.round(memoryPercentage)}% `;
110
+ content += `(${memoryUsedGB.toFixed(1)} / ${memoryTotalGB.toFixed(1)} GB)\n`;
111
+ }
112
+
113
+ if (systemMetrics.warnings && systemMetrics.warnings.length > 0) {
114
+ content += `\n{yellow-fg}⚠ ${systemMetrics.warnings.join(', ')}{/yellow-fg}\n`;
115
+ }
116
+ } else {
117
+ content += '{gray-fg}Collecting system metrics...{/gray-fg}\n';
118
+ }
119
+
120
+ return content;
121
+ }
122
+
123
+ // Show loading spinner
124
+ function showLoading(): void {
125
+ if (isLoading) return; // Already loading
126
+
127
+ isLoading = true;
128
+ spinnerFrameIndex = 0;
129
+
130
+ // Start spinner animation (80ms per frame = smooth rotation)
131
+ spinnerIntervalId = setInterval(() => {
132
+ spinnerFrameIndex = (spinnerFrameIndex + 1) % spinnerFrames.length;
133
+
134
+ // Re-render current view with updated spinner frame
135
+ let content = '';
136
+ if (viewMode === 'list') {
137
+ content = renderListView(lastSystemMetrics);
138
+ } else {
139
+ content = renderDetailView(lastSystemMetrics);
140
+ }
141
+ contentBox.setContent(content);
142
+ screen.render();
143
+ }, 80);
144
+
145
+ // Immediate first render
146
+ let content = '';
147
+ if (viewMode === 'list') {
148
+ content = renderListView(lastSystemMetrics);
149
+ } else {
150
+ content = renderDetailView(lastSystemMetrics);
151
+ }
152
+ contentBox.setContent(content);
153
+ screen.render();
154
+ }
155
+
156
+ // Hide loading spinner
157
+ function hideLoading(): void {
158
+ isLoading = false;
159
+ if (spinnerIntervalId) {
160
+ clearInterval(spinnerIntervalId);
161
+ spinnerIntervalId = null;
162
+ }
163
+ }
164
+
165
+ // Render list view
166
+ function renderListView(systemMetrics: SystemMetrics | null): string {
167
+ const termWidth = (screen.width as number) || 80;
168
+ const divider = '─'.repeat(termWidth - 2);
169
+ let content = '';
170
+
171
+ // Header
172
+ content += '{bold}{blue-fg}═══ llama.cpp Multi-Server Monitor ═══{/blue-fg}{/bold}\n';
173
+
174
+ // Status line with optional spinner
175
+ const statusPlainText = 'Press 1-9 for details | [F] Filter | [Q] Quit';
176
+ const spinnerChar = isLoading ? spinnerFrames[spinnerFrameIndex] : '';
177
+ const spinnerText = spinnerChar ? ` {cyan-fg}${spinnerChar}{/cyan-fg}` : '';
178
+
179
+ content += `{gray-fg}${statusPlainText}${spinnerText}{/gray-fg}\n\n`;
180
+
181
+ // System resources
182
+ content += renderSystemResources(systemMetrics);
183
+ content += '\n';
184
+
185
+ // Server list header
186
+ const runningCount = servers.filter(s => s.status === 'running').length;
187
+ const stoppedCount = servers.filter(s => s.status !== 'running').length;
188
+ content += `{bold}Servers (${runningCount} running, ${stoppedCount} stopped){/bold}\n`;
189
+ content += '{gray-fg}Press number for details{/gray-fg}\n';
190
+ content += divider + '\n';
191
+
192
+ // Table header
193
+ content += '{bold}# │ Server ID │ Port │ Status │ Slots │ tok/s │ Memory{/bold}\n';
194
+ content += divider + '\n';
195
+
196
+ // Server rows
197
+ servers.forEach((server, index) => {
198
+ const serverData = serverDataMap.get(server.id);
199
+ const num = index + 1;
200
+
201
+ // Server ID (truncate if needed)
202
+ const serverId = server.id.padEnd(16).substring(0, 16);
203
+
204
+ // Port
205
+ const port = server.port.toString().padStart(4);
206
+
207
+ // Status
208
+ let status = '';
209
+ if (serverData?.data) {
210
+ if (serverData.data.server.healthy) {
211
+ status = '{green-fg}● RUN{/green-fg} ';
212
+ } else {
213
+ status = '{red-fg}● ERR{/red-fg} ';
214
+ }
215
+ } else if (server.status === 'running') {
216
+ status = '{yellow-fg}● ...{/yellow-fg} ';
217
+ } else {
218
+ status = '{gray-fg}○ STOP{/gray-fg}';
219
+ }
220
+
221
+ // Slots
222
+ let slots = '- ';
223
+ if (serverData?.data?.server) {
224
+ const active = serverData.data.server.activeSlots;
225
+ const total = serverData.data.server.totalSlots;
226
+ slots = `${active}/${total}`.padStart(5);
227
+ }
228
+
229
+ // tok/s
230
+ let tokensPerSec = '- ';
231
+ if (serverData?.data?.server.avgGenerateSpeed !== undefined &&
232
+ serverData.data.server.avgGenerateSpeed > 0) {
233
+ tokensPerSec = Math.round(serverData.data.server.avgGenerateSpeed).toString().padStart(6);
234
+ }
235
+
236
+ // Memory (actual process memory from top command)
237
+ let memory = '- ';
238
+ if (serverData?.data?.server.processMemory) {
239
+ const bytes = serverData.data.server.processMemory;
240
+ // Format as GB/MB depending on size
241
+ if (bytes >= 1024 * 1024 * 1024) {
242
+ const gb = (bytes / (1024 * 1024 * 1024)).toFixed(1);
243
+ memory = `${gb} GB`.padStart(7);
244
+ } else {
245
+ const mb = Math.round(bytes / (1024 * 1024));
246
+ memory = `${mb} MB`.padStart(7);
247
+ }
248
+ }
249
+
250
+ content += `${num} │ ${serverId} │ ${port} │ ${status} │ ${slots} │ ${tokensPerSec} │ ${memory}\n`;
251
+ });
252
+
253
+ // Footer
254
+ content += '\n' + divider + '\n';
255
+ content += `{gray-fg}Updated: ${new Date().toLocaleTimeString()} | `;
256
+ content += `Interval: ${updateInterval}ms | [R]efresh [+/-]Speed{/gray-fg}`;
257
+
258
+ return content;
259
+ }
260
+
261
+ // Render detail view for selected server
262
+ function renderDetailView(systemMetrics: SystemMetrics | null): string {
263
+ const server = servers[selectedServerIndex];
264
+ const serverData = serverDataMap.get(server.id);
265
+ const termWidth = (screen.width as number) || 80;
266
+ const divider = '─'.repeat(termWidth - 2);
267
+ let content = '';
268
+
269
+ // Header
270
+ content += `{bold}{blue-fg}═══ Server #${selectedServerIndex + 1}: ${server.id} (${server.port}) ═══{/blue-fg}{/bold}\n`;
271
+
272
+ // Status line with optional spinner
273
+ const statusPlainText = '[ESC] Back to list | [Q] Quit';
274
+ const spinnerChar = isLoading ? spinnerFrames[spinnerFrameIndex] : '';
275
+ const spinnerText = spinnerChar ? ` {cyan-fg}${spinnerChar}{/cyan-fg}` : '';
276
+
277
+ content += `{gray-fg}${statusPlainText}${spinnerText}{/gray-fg}\n\n`;
278
+
279
+ // System resources
280
+ content += renderSystemResources(systemMetrics);
281
+ content += '\n';
282
+
283
+ if (!serverData?.data) {
284
+ content += '{yellow-fg}Loading server data...{/yellow-fg}\n';
285
+ return content;
286
+ }
287
+
288
+ const data = serverData.data;
289
+
290
+ // Server Information
291
+ content += '{bold}Server Information{/bold}\n';
292
+ content += divider + '\n';
293
+
294
+ const statusIcon = data.server.healthy ? '{green-fg}●{/green-fg}' : '{red-fg}●{/red-fg}';
295
+ const statusText = data.server.healthy ? 'RUNNING' : 'UNHEALTHY';
296
+ content += `Status: ${statusIcon} ${statusText}`;
297
+
298
+ if (data.server.uptime) {
299
+ content += ` Uptime: ${data.server.uptime}`;
300
+ }
301
+ content += '\n';
302
+
303
+ content += `Model: ${server.modelName}`;
304
+ if (data.server.contextSize) {
305
+ content += ` Context: ${data.server.contextSize} tokens`;
306
+ }
307
+ content += '\n';
308
+
309
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
310
+ const displayHost = server.host || '127.0.0.1';
311
+ content += `Endpoint: http://${displayHost}:${server.port}`;
312
+
313
+ // Add actual process memory (if available)
314
+ if (data.server.processMemory) {
315
+ const bytes = data.server.processMemory;
316
+ let memStr;
317
+ if (bytes >= 1024 * 1024 * 1024) {
318
+ memStr = `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
319
+ } else {
320
+ memStr = `${Math.round(bytes / (1024 * 1024))} MB`;
321
+ }
322
+ content += ` Memory: ${memStr}\n`;
323
+ } else {
324
+ content += '\n';
325
+ }
326
+
327
+ content += `Slots: ${data.server.activeSlots} active / ${data.server.totalSlots} total\n`;
328
+ content += '\n';
329
+
330
+ // Request Metrics
331
+ if (data.server.totalSlots > 0) {
332
+ content += '{bold}Request Metrics{/bold}\n';
333
+ content += divider + '\n';
334
+ content += `Active: ${data.server.activeSlots} / ${data.server.totalSlots}\n`;
335
+ content += `Idle: ${data.server.idleSlots} / ${data.server.totalSlots}\n`;
336
+
337
+ if (data.server.avgPromptSpeed !== undefined && data.server.avgPromptSpeed > 0) {
338
+ content += `Prompt: ${Math.round(data.server.avgPromptSpeed)} tokens/sec\n`;
339
+ }
340
+
341
+ if (data.server.avgGenerateSpeed !== undefined && data.server.avgGenerateSpeed > 0) {
342
+ content += `Generate: ${Math.round(data.server.avgGenerateSpeed)} tokens/sec\n`;
343
+ }
344
+
345
+ content += '\n';
346
+ }
347
+
348
+ // Active Slots Detail
349
+ if (data.server.slots.length > 0) {
350
+ const activeSlots = data.server.slots.filter(s => s.state === 'processing');
351
+
352
+ if (activeSlots.length > 0) {
353
+ content += '{bold}Active Slots{/bold}\n';
354
+ content += divider + '\n';
355
+
356
+ activeSlots.forEach((slot) => {
357
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
358
+
359
+ if (slot.timings?.predicted_per_second) {
360
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
361
+ }
362
+
363
+ if (slot.n_decoded !== undefined) {
364
+ content += ` - ${slot.n_decoded}`;
365
+ if (slot.n_ctx) {
366
+ content += ` / ${slot.n_ctx}`;
367
+ }
368
+ content += ' tokens';
369
+ }
370
+
371
+ content += '\n';
372
+ });
373
+
374
+ content += '\n';
375
+ }
376
+ }
377
+
378
+ // Footer
379
+ content += divider + '\n';
380
+ content += `{gray-fg}Updated: ${data.lastUpdated.toLocaleTimeString()} | `;
381
+ content += `Interval: ${updateInterval}ms | [R]efresh [+/-]Speed{/gray-fg}`;
382
+
383
+ return content;
384
+ }
385
+
386
+ // Fetch and update display
387
+ async function fetchData() {
388
+ try {
389
+ // Collect system metrics ONCE for all servers (not per-server)
390
+ // This prevents spawning multiple macmon processes
391
+ const systemMetricsPromise = systemCollector.collectSystemMetrics();
392
+
393
+ // Batch collect process memory for ALL servers in one top call
394
+ // This prevents spawning multiple top processes (5x speedup)
395
+ const { getBatchProcessMemory } = await import('../utils/process-utils.js');
396
+ const pids = servers.filter(s => s.pid).map(s => s.pid!);
397
+ const memoryMapPromise = pids.length > 0
398
+ ? getBatchProcessMemory(pids)
399
+ : Promise.resolve(new Map<number, number | null>());
400
+
401
+ // Wait for memory batch to complete
402
+ const memoryMap = await memoryMapPromise;
403
+
404
+ // Collect server metrics only (NOT system metrics) for each server
405
+ const promises = servers.map(async (server) => {
406
+ const aggregator = aggregators.get(server.id)!;
407
+ try {
408
+ // Use collectServerMetrics instead of collectMonitorData
409
+ // to avoid spawning macmon per server
410
+ // Pass pre-fetched memory to avoid spawning top per server
411
+ const serverMetrics = await aggregator.collectServerMetrics(
412
+ server,
413
+ server.pid ? memoryMap.get(server.pid) ?? null : null
414
+ );
415
+
416
+ // Build MonitorData manually with shared system metrics
417
+ const data: MonitorData = {
418
+ server: serverMetrics,
419
+ system: undefined, // Will be set after system metrics resolve
420
+ lastUpdated: new Date(),
421
+ updateInterval,
422
+ consecutiveFailures: 0,
423
+ };
424
+
425
+ serverDataMap.set(server.id, {
426
+ server,
427
+ data,
428
+ error: null,
429
+ });
430
+ } catch (err) {
431
+ serverDataMap.set(server.id, {
432
+ server,
433
+ data: null,
434
+ error: err instanceof Error ? err.message : 'Unknown error',
435
+ });
436
+ }
437
+ });
438
+
439
+ // Wait for both system metrics and server metrics to complete
440
+ const systemMetrics = await systemMetricsPromise;
441
+ await Promise.all(promises);
442
+
443
+ // Store system metrics for loading state
444
+ lastSystemMetrics = systemMetrics;
445
+
446
+ // Update all server data with shared system metrics
447
+ for (const serverData of serverDataMap.values()) {
448
+ if (serverData.data) {
449
+ serverData.data.system = systemMetrics;
450
+ }
451
+ }
452
+
453
+ // Render once with complete data
454
+ let content = '';
455
+ if (viewMode === 'list') {
456
+ content = renderListView(systemMetrics);
457
+ } else {
458
+ content = renderDetailView(systemMetrics);
459
+ }
460
+
461
+ contentBox.setContent(content);
462
+ screen.render();
463
+
464
+ // Clear loading state
465
+ hideLoading();
466
+
467
+ } catch (err) {
468
+ const errorMsg = err instanceof Error ? err.message : 'Unknown error';
469
+ contentBox.setContent(
470
+ '{bold}{red-fg}Error{/red-fg}{/bold}\n\n' +
471
+ `{red-fg}${errorMsg}{/red-fg}\n\n` +
472
+ '{gray-fg}Press [R] to retry or [Q] to quit{/gray-fg}'
473
+ );
474
+ screen.render();
475
+
476
+ // Clear loading state on error too
477
+ isLoading = false;
478
+ }
479
+ }
480
+
481
+ // Polling
482
+ function startPolling() {
483
+ if (intervalId) clearInterval(intervalId);
484
+ fetchData();
485
+ intervalId = setInterval(fetchData, updateInterval);
486
+ }
487
+
488
+ // Keyboard shortcuts - List view
489
+ screen.key(['1', '2', '3', '4', '5', '6', '7', '8', '9'], (ch) => {
490
+ const index = parseInt(ch, 10) - 1;
491
+ if (index >= 0 && index < servers.length) {
492
+ showLoading();
493
+ selectedServerIndex = index;
494
+ viewMode = 'detail';
495
+ fetchData();
496
+ }
497
+ });
498
+
499
+ // Keyboard shortcuts - Detail view
500
+ screen.key(['escape'], () => {
501
+ if (viewMode === 'detail') {
502
+ showLoading();
503
+ viewMode = 'list';
504
+ fetchData();
505
+ }
506
+ });
507
+
508
+ // Keyboard shortcuts - Common
509
+ screen.key(['r', 'R'], () => {
510
+ showLoading();
511
+ fetchData();
512
+ });
513
+
514
+ screen.key(['+', '='], () => {
515
+ updateInterval = Math.max(500, updateInterval - 500);
516
+ startPolling();
517
+ });
518
+
519
+ screen.key(['-', '_'], () => {
520
+ updateInterval = Math.min(10000, updateInterval + 500);
521
+ startPolling();
522
+ });
523
+
524
+ screen.key(['q', 'Q', 'C-c'], () => {
525
+ showLoading();
526
+ if (intervalId) clearInterval(intervalId);
527
+ if (spinnerIntervalId) clearInterval(spinnerIntervalId);
528
+ // Small delay to show the loading state before exit
529
+ setTimeout(() => {
530
+ screen.destroy();
531
+ process.exit(0);
532
+ }, 100);
533
+ });
534
+
535
+ // Initial display
536
+ contentBox.setContent('{cyan-fg}⏳ Connecting to servers...{/cyan-fg}');
537
+ screen.render();
538
+
539
+ startPolling();
540
+
541
+ // Cleanup
542
+ screen.on('destroy', () => {
543
+ if (intervalId) clearInterval(intervalId);
544
+ // Note: macmon child processes will automatically die when parent exits
545
+ // since they're spawned with detached: false
546
+ });
547
+ }
@@ -0,0 +1,161 @@
1
+ import { ServerConfig } from './server-config.js';
2
+
3
+ // llama.cpp API response types
4
+
5
+ export interface HealthResponse {
6
+ status: string;
7
+ error?: string;
8
+ }
9
+
10
+ export interface PropsResponse {
11
+ default_generation_settings: {
12
+ n_ctx: number;
13
+ n_predict: number;
14
+ model: string;
15
+ seed: number;
16
+ temperature: number;
17
+ top_k: number;
18
+ top_p: number;
19
+ min_p: number;
20
+ n_keep: number;
21
+ stream: boolean;
22
+ };
23
+ total_slots: number;
24
+ model_loaded: boolean;
25
+ model_path: string;
26
+ model_alias?: string;
27
+ }
28
+
29
+ export interface SlotInfo {
30
+ id: number;
31
+ state: 'idle' | 'processing';
32
+ task_id?: number;
33
+ prompt?: string;
34
+ n_prompt_tokens?: number;
35
+ n_decoded?: number;
36
+ n_ctx: number;
37
+ truncated?: boolean;
38
+ stopped_eos?: boolean;
39
+ stopped_word?: boolean;
40
+ stopped_limit?: boolean;
41
+ stopping_word?: string;
42
+ tokens_predicted?: number;
43
+ tokens_evaluated?: number;
44
+ generation_settings?: {
45
+ n_ctx: number;
46
+ n_predict: number;
47
+ seed: number;
48
+ temperature: number;
49
+ top_k: number;
50
+ top_p: number;
51
+ };
52
+ prompt_tokens_processed?: number;
53
+ t_prompt_processing?: number; // Time in ms
54
+ t_token_generation?: number; // Time in ms
55
+ timings?: {
56
+ prompt_n: number;
57
+ prompt_ms: number;
58
+ prompt_per_token_ms: number;
59
+ prompt_per_second: number;
60
+ predicted_n: number;
61
+ predicted_ms: number;
62
+ predicted_per_token_ms: number;
63
+ predicted_per_second: number;
64
+ };
65
+ }
66
+
67
+ export interface SlotsResponse {
68
+ slots: SlotInfo[];
69
+ }
70
+
71
+ // System metrics types
72
+
73
+ export interface SystemMetrics {
74
+ // GPU/CPU/ANE (from macmon if available)
75
+ gpuUsage?: number; // Percentage (0-100)
76
+ cpuUsage?: number; // Percentage (0-100)
77
+ cpuCores?: number; // Number of cores
78
+ aneUsage?: number; // Apple Neural Engine percentage (0-100)
79
+ temperature?: number; // GPU temperature in Celsius
80
+
81
+ // Memory (from vm_stat or macmon)
82
+ memoryUsed: number; // Bytes
83
+ memoryTotal: number; // Bytes
84
+ swapUsed?: number; // Bytes
85
+ processMemory?: number; // Bytes (specific to llama-server process)
86
+
87
+ // Metadata
88
+ timestamp: number;
89
+ source: 'macmon' | 'vm_stat' | 'none';
90
+ warnings?: string[]; // e.g., "macmon not available, showing memory only"
91
+ }
92
+
93
+ // Aggregated metrics for TUI display
94
+
95
+ export interface ServerMetrics {
96
+ // Server identification
97
+ server: ServerConfig;
98
+
99
+ // Health status
100
+ healthy: boolean;
101
+ uptime?: string; // Human-readable (e.g., "2h 34m 12s")
102
+ error?: string;
103
+
104
+ // Model information
105
+ modelLoaded: boolean;
106
+ modelName: string;
107
+ contextSize: number;
108
+ totalSlots: number;
109
+
110
+ // Request metrics
111
+ activeSlots: number;
112
+ idleSlots: number;
113
+ slots: SlotInfo[];
114
+
115
+ // Performance metrics (derived from slots)
116
+ avgPromptSpeed?: number; // Tokens per second
117
+ avgGenerateSpeed?: number; // Tokens per second
118
+ requestsPerMinute?: number; // Estimated from slot activity
119
+ avgLatency?: number; // Milliseconds
120
+
121
+ // Cache metrics (if available from /metrics endpoint)
122
+ cacheHitRate?: number; // Percentage
123
+
124
+ // Process metrics
125
+ processMemory?: number; // Bytes (actual RSS from top command)
126
+
127
+ // Timestamp
128
+ timestamp: number;
129
+ stale: boolean; // True if data is from last successful fetch
130
+ }
131
+
132
+ export interface MonitorData {
133
+ server: ServerMetrics;
134
+ system?: SystemMetrics;
135
+ lastUpdated: Date;
136
+ updateInterval: number; // Milliseconds
137
+ consecutiveFailures: number;
138
+ }
139
+
140
+ // Error and loading states
141
+
142
+ export interface ErrorState {
143
+ error: string;
144
+ canRetry: boolean;
145
+ suggestions?: string[];
146
+ }
147
+
148
+ export interface LoadingState {
149
+ message: string;
150
+ progress?: number; // 0-100 if determinate
151
+ }
152
+
153
+ // Collection result (for graceful degradation)
154
+
155
+ export interface CollectionResult<T> {
156
+ success: boolean;
157
+ data: T | null;
158
+ error?: string;
159
+ warnings?: string[];
160
+ stale?: boolean;
161
+ }