@appkit/llamacpp-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/MONITORING-ACCURACY-FIX.md +199 -0
  3. package/PER-PROCESS-METRICS.md +190 -0
  4. package/README.md +136 -1
  5. package/dist/cli.js +21 -4
  6. package/dist/cli.js.map +1 -1
  7. package/dist/commands/create.d.ts.map +1 -1
  8. package/dist/commands/create.js +12 -3
  9. package/dist/commands/create.js.map +1 -1
  10. package/dist/commands/monitor.d.ts +2 -0
  11. package/dist/commands/monitor.d.ts.map +1 -0
  12. package/dist/commands/monitor.js +126 -0
  13. package/dist/commands/monitor.js.map +1 -0
  14. package/dist/commands/ps.d.ts +3 -1
  15. package/dist/commands/ps.d.ts.map +1 -1
  16. package/dist/commands/ps.js +75 -5
  17. package/dist/commands/ps.js.map +1 -1
  18. package/dist/commands/server-show.d.ts.map +1 -1
  19. package/dist/commands/server-show.js +10 -3
  20. package/dist/commands/server-show.js.map +1 -1
  21. package/dist/commands/start.d.ts.map +1 -1
  22. package/dist/commands/start.js +14 -2
  23. package/dist/commands/start.js.map +1 -1
  24. package/dist/lib/history-manager.d.ts +46 -0
  25. package/dist/lib/history-manager.d.ts.map +1 -0
  26. package/dist/lib/history-manager.js +157 -0
  27. package/dist/lib/history-manager.js.map +1 -0
  28. package/dist/lib/metrics-aggregator.d.ts +40 -0
  29. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  30. package/dist/lib/metrics-aggregator.js +211 -0
  31. package/dist/lib/metrics-aggregator.js.map +1 -0
  32. package/dist/lib/system-collector.d.ts +80 -0
  33. package/dist/lib/system-collector.d.ts.map +1 -0
  34. package/dist/lib/system-collector.js +311 -0
  35. package/dist/lib/system-collector.js.map +1 -0
  36. package/dist/tui/HistoricalMonitorApp.d.ts +5 -0
  37. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -0
  38. package/dist/tui/HistoricalMonitorApp.js +490 -0
  39. package/dist/tui/HistoricalMonitorApp.js.map +1 -0
  40. package/dist/tui/MonitorApp.d.ts +4 -0
  41. package/dist/tui/MonitorApp.d.ts.map +1 -0
  42. package/dist/tui/MonitorApp.js +315 -0
  43. package/dist/tui/MonitorApp.js.map +1 -0
  44. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  45. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  46. package/dist/tui/MultiServerMonitorApp.js +712 -0
  47. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  48. package/dist/types/history-types.d.ts +30 -0
  49. package/dist/types/history-types.d.ts.map +1 -0
  50. package/dist/types/history-types.js +11 -0
  51. package/dist/types/history-types.js.map +1 -0
  52. package/dist/types/monitor-types.d.ts +123 -0
  53. package/dist/types/monitor-types.d.ts.map +1 -0
  54. package/dist/types/monitor-types.js +3 -0
  55. package/dist/types/monitor-types.js.map +1 -0
  56. package/dist/types/server-config.d.ts +1 -0
  57. package/dist/types/server-config.d.ts.map +1 -1
  58. package/dist/types/server-config.js.map +1 -1
  59. package/dist/utils/downsample-utils.d.ts +35 -0
  60. package/dist/utils/downsample-utils.d.ts.map +1 -0
  61. package/dist/utils/downsample-utils.js +107 -0
  62. package/dist/utils/downsample-utils.js.map +1 -0
  63. package/dist/utils/file-utils.d.ts +6 -0
  64. package/dist/utils/file-utils.d.ts.map +1 -1
  65. package/dist/utils/file-utils.js +38 -0
  66. package/dist/utils/file-utils.js.map +1 -1
  67. package/dist/utils/process-utils.d.ts +35 -2
  68. package/dist/utils/process-utils.d.ts.map +1 -1
  69. package/dist/utils/process-utils.js +220 -25
  70. package/dist/utils/process-utils.js.map +1 -1
  71. package/docs/images/.gitkeep +1 -0
  72. package/package.json +5 -1
  73. package/src/cli.ts +21 -4
  74. package/src/commands/create.ts +14 -4
  75. package/src/commands/monitor.ts +110 -0
  76. package/src/commands/ps.ts +88 -5
  77. package/src/commands/server-show.ts +10 -3
  78. package/src/commands/start.ts +15 -2
  79. package/src/lib/history-manager.ts +172 -0
  80. package/src/lib/metrics-aggregator.ts +257 -0
  81. package/src/lib/system-collector.ts +315 -0
  82. package/src/tui/HistoricalMonitorApp.ts +548 -0
  83. package/src/tui/MonitorApp.ts +386 -0
  84. package/src/tui/MultiServerMonitorApp.ts +792 -0
  85. package/src/types/history-types.ts +39 -0
  86. package/src/types/monitor-types.ts +162 -0
  87. package/src/types/server-config.ts +1 -0
  88. package/src/utils/downsample-utils.ts +128 -0
  89. package/src/utils/file-utils.ts +40 -0
  90. package/src/utils/process-utils.ts +243 -25
  91. package/test-load.sh +100 -0
@@ -0,0 +1,792 @@
1
+ import blessed from 'blessed';
2
+ import { ServerConfig } from '../types/server-config.js';
3
+ import { MetricsAggregator } from '../lib/metrics-aggregator.js';
4
+ import { SystemCollector } from '../lib/system-collector.js';
5
+ import { MonitorData, SystemMetrics } from '../types/monitor-types.js';
6
+ import { HistoryManager } from '../lib/history-manager.js';
7
+ import { createHistoricalUI, createMultiServerHistoricalUI } from './HistoricalMonitorApp.js';
8
+
9
+ type ViewMode = 'list' | 'detail';
10
+
11
+ interface ServerMonitorData {
12
+ server: ServerConfig;
13
+ data: MonitorData | null;
14
+ error: string | null;
15
+ }
16
+
17
+ export async function createMultiServerMonitorUI(
18
+ screen: blessed.Widgets.Screen,
19
+ servers: ServerConfig[],
20
+ _fromPs: boolean = false,
21
+ directJumpIndex?: number
22
+ ): Promise<void> {
23
+ let updateInterval = 2000;
24
+ let intervalId: NodeJS.Timeout | null = null;
25
+ let viewMode: ViewMode = directJumpIndex !== undefined ? 'detail' : 'list';
26
+ let selectedServerIndex = directJumpIndex ?? 0;
27
+ let selectedRowIndex = directJumpIndex ?? 0; // Track which row is highlighted in list view
28
+ let isLoading = false;
29
+ let lastSystemMetrics: SystemMetrics | null = null;
30
+ let cameFromDirectJump = directJumpIndex !== undefined; // Track if we entered via ps <id>
31
+ let inHistoricalView = false; // Track whether we're in historical view to prevent key conflicts
32
+
33
+ // Spinner animation
34
+ const spinnerFrames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
35
+ let spinnerFrameIndex = 0;
36
+ let spinnerIntervalId: NodeJS.Timeout | null = null;
37
+
38
+ const systemCollector = new SystemCollector();
39
+ const aggregators = new Map<string, MetricsAggregator>();
40
+ const historyManagers = new Map<string, HistoryManager>();
41
+ const serverDataMap = new Map<string, ServerMonitorData>();
42
+
43
+ // Initialize aggregators and history managers for each server
44
+ for (const server of servers) {
45
+ aggregators.set(server.id, new MetricsAggregator(server));
46
+ historyManagers.set(server.id, new HistoryManager(server.id));
47
+ serverDataMap.set(server.id, {
48
+ server,
49
+ data: null,
50
+ error: null,
51
+ });
52
+ }
53
+
54
+ // Single scrollable content box
55
+ const contentBox = blessed.box({
56
+ top: 0,
57
+ left: 0,
58
+ width: '100%',
59
+ height: '100%',
60
+ tags: true,
61
+ scrollable: true,
62
+ alwaysScroll: true,
63
+ keys: true,
64
+ vi: true,
65
+ mouse: true,
66
+ scrollbar: {
67
+ ch: '█',
68
+ style: {
69
+ fg: 'blue',
70
+ },
71
+ },
72
+ });
73
+ screen.append(contentBox);
74
+
75
+ // Helper to create progress bar
76
+ function createProgressBar(percentage: number, width: number = 30): string {
77
+ const filled = Math.round((percentage / 100) * width);
78
+ const empty = width - filled;
79
+ return '[' + '█'.repeat(Math.max(0, filled)) + '░'.repeat(Math.max(0, empty)) + ']';
80
+ }
81
+
82
+ // Render system resources section (system-wide for list view)
83
+ function renderSystemResources(systemMetrics: SystemMetrics | null): string {
84
+ let content = '';
85
+
86
+ content += '{bold}System Resources{/bold}\n';
87
+ const termWidth = (screen.width as number) || 80;
88
+ const divider = '─'.repeat(termWidth - 2);
89
+ content += divider + '\n';
90
+
91
+ if (systemMetrics) {
92
+ if (systemMetrics.gpuUsage !== undefined) {
93
+ const bar = createProgressBar(systemMetrics.gpuUsage);
94
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.gpuUsage)}%`;
95
+
96
+ if (systemMetrics.temperature !== undefined) {
97
+ content += ` - ${Math.round(systemMetrics.temperature)}°C`;
98
+ }
99
+
100
+ content += '\n';
101
+ }
102
+
103
+ if (systemMetrics.cpuUsage !== undefined) {
104
+ const bar = createProgressBar(systemMetrics.cpuUsage);
105
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.cpuUsage)}%\n`;
106
+ }
107
+
108
+ if (systemMetrics.aneUsage !== undefined && systemMetrics.aneUsage > 1) {
109
+ const bar = createProgressBar(systemMetrics.aneUsage);
110
+ content += `ANE: {cyan-fg}${bar}{/cyan-fg} ${Math.round(systemMetrics.aneUsage)}%\n`;
111
+ }
112
+
113
+ if (systemMetrics.memoryTotal > 0) {
114
+ const memoryUsedGB = systemMetrics.memoryUsed / (1024 ** 3);
115
+ const memoryTotalGB = systemMetrics.memoryTotal / (1024 ** 3);
116
+ const memoryPercentage = (systemMetrics.memoryUsed / systemMetrics.memoryTotal) * 100;
117
+ const bar = createProgressBar(memoryPercentage);
118
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${Math.round(memoryPercentage)}% `;
119
+ content += `(${memoryUsedGB.toFixed(1)} / ${memoryTotalGB.toFixed(1)} GB)\n`;
120
+ }
121
+
122
+ if (systemMetrics.warnings && systemMetrics.warnings.length > 0) {
123
+ content += `\n{yellow-fg}⚠ ${systemMetrics.warnings.join(', ')}{/yellow-fg}\n`;
124
+ }
125
+ } else {
126
+ content += '{gray-fg}Collecting system metrics...{/gray-fg}\n';
127
+ }
128
+
129
+ return content;
130
+ }
131
+
132
+ // Render aggregate model resources (all running servers in list view)
133
+ function renderAggregateModelResources(): string {
134
+ let content = '';
135
+
136
+ content += '{bold}Model Resources{/bold}\n';
137
+ const termWidth = (screen.width as number) || 80;
138
+ const divider = '─'.repeat(termWidth - 2);
139
+ content += divider + '\n';
140
+
141
+ // Aggregate CPU and memory across all running servers (skip stopped servers)
142
+ let totalCpu = 0;
143
+ let totalMemoryBytes = 0;
144
+ let serverCount = 0;
145
+
146
+ for (const serverData of serverDataMap.values()) {
147
+ // Only count running servers with valid data
148
+ if (serverData.server.status === 'running' && serverData.data?.server && !serverData.data.server.stale) {
149
+ if (serverData.data.server.processCpuUsage !== undefined) {
150
+ totalCpu += serverData.data.server.processCpuUsage;
151
+ serverCount++;
152
+ }
153
+ if (serverData.data.server.processMemory !== undefined) {
154
+ totalMemoryBytes += serverData.data.server.processMemory;
155
+ }
156
+ }
157
+ }
158
+
159
+ if (serverCount === 0) {
160
+ content += '{gray-fg}No running servers{/gray-fg}\n';
161
+ return content;
162
+ }
163
+
164
+ // CPU: Sum of all process CPU percentages
165
+ const cpuBar = createProgressBar(Math.min(totalCpu, 100));
166
+ content += `CPU: {cyan-fg}${cpuBar}{/cyan-fg} ${Math.round(totalCpu)}%`;
167
+ content += ` {gray-fg}(${serverCount} ${serverCount === 1 ? 'server' : 'servers'}){/gray-fg}\n`;
168
+
169
+ // Memory: Sum of all process memory
170
+ const totalMemoryGB = totalMemoryBytes / (1024 ** 3);
171
+ const estimatedMaxGB = serverCount * 8; // Assume ~8GB per server max
172
+ const memoryPercentage = Math.min((totalMemoryGB / estimatedMaxGB) * 100, 100);
173
+ const memoryBar = createProgressBar(memoryPercentage);
174
+ content += `Memory: {cyan-fg}${memoryBar}{/cyan-fg} ${totalMemoryGB.toFixed(2)} GB`;
175
+ content += ` {gray-fg}(${serverCount} ${serverCount === 1 ? 'server' : 'servers'}){/gray-fg}\n`;
176
+
177
+ return content;
178
+ }
179
+
180
+ // Render model resources section (per-process for detail view)
181
+ function renderModelResources(data: MonitorData): string {
182
+ let content = '';
183
+
184
+ content += '{bold}Model Resources{/bold}\n';
185
+ const termWidth = (screen.width as number) || 80;
186
+ const divider = '─'.repeat(termWidth - 2);
187
+ content += divider + '\n';
188
+
189
+ // GPU: System-wide (can't get per-process on macOS)
190
+ if (data.system && data.system.gpuUsage !== undefined) {
191
+ const bar = createProgressBar(data.system.gpuUsage);
192
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.system.gpuUsage)}% {gray-fg}(system){/gray-fg}`;
193
+
194
+ if (data.system.temperature !== undefined) {
195
+ content += ` - ${Math.round(data.system.temperature)}°C`;
196
+ }
197
+
198
+ content += '\n';
199
+ }
200
+
201
+ // CPU: Per-process
202
+ if (data.server.processCpuUsage !== undefined) {
203
+ const bar = createProgressBar(data.server.processCpuUsage);
204
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.server.processCpuUsage)}%\n`;
205
+ }
206
+
207
+ // Memory: Per-process
208
+ if (data.server.processMemory !== undefined) {
209
+ const memoryGB = data.server.processMemory / (1024 ** 3);
210
+ const estimatedMax = 8;
211
+ const memoryPercentage = Math.min((memoryGB / estimatedMax) * 100, 100);
212
+ const bar = createProgressBar(memoryPercentage);
213
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${memoryGB.toFixed(2)} GB\n`;
214
+ }
215
+
216
+ if (data.system && data.system.warnings && data.system.warnings.length > 0) {
217
+ content += `\n{yellow-fg}⚠ ${data.system.warnings.join(', ')}{/yellow-fg}\n`;
218
+ }
219
+
220
+ return content;
221
+ }
222
+
223
+ // Show loading spinner
224
+ function showLoading(): void {
225
+ if (isLoading) return; // Already loading
226
+
227
+ isLoading = true;
228
+ spinnerFrameIndex = 0;
229
+
230
+ // Start spinner animation (80ms per frame = smooth rotation)
231
+ spinnerIntervalId = setInterval(() => {
232
+ spinnerFrameIndex = (spinnerFrameIndex + 1) % spinnerFrames.length;
233
+
234
+ // Re-render current view with updated spinner frame
235
+ let content = '';
236
+ if (viewMode === 'list') {
237
+ content = renderListView(lastSystemMetrics);
238
+ } else {
239
+ content = renderDetailView(lastSystemMetrics);
240
+ }
241
+ contentBox.setContent(content);
242
+ screen.render();
243
+ }, 80);
244
+
245
+ // Immediate first render
246
+ let content = '';
247
+ if (viewMode === 'list') {
248
+ content = renderListView(lastSystemMetrics);
249
+ } else {
250
+ content = renderDetailView(lastSystemMetrics);
251
+ }
252
+ contentBox.setContent(content);
253
+ screen.render();
254
+ }
255
+
256
+ // Hide loading spinner
257
+ function hideLoading(): void {
258
+ isLoading = false;
259
+ if (spinnerIntervalId) {
260
+ clearInterval(spinnerIntervalId);
261
+ spinnerIntervalId = null;
262
+ }
263
+ }
264
+
265
+ // Render list view
266
+ function renderListView(systemMetrics: SystemMetrics | null): string {
267
+ const termWidth = (screen.width as number) || 80;
268
+ const divider = '─'.repeat(termWidth - 2);
269
+ let content = '';
270
+
271
+ // Header
272
+ content += '{bold}{blue-fg}═══ llama.cpp{/blue-fg}{/bold}\n\n';
273
+
274
+ // System resources
275
+ content += renderSystemResources(systemMetrics);
276
+ content += '\n';
277
+
278
+ // Aggregate model resources (CPU + memory for all running servers)
279
+ content += renderAggregateModelResources();
280
+ content += '\n';
281
+
282
+ // Server list header
283
+ const runningCount = servers.filter(s => s.status === 'running').length;
284
+ const stoppedCount = servers.filter(s => s.status !== 'running').length;
285
+ content += `{bold}Servers (${runningCount} running, ${stoppedCount} stopped){/bold}\n`;
286
+ content += '{gray-fg}Use arrow keys to navigate, Enter to view details{/gray-fg}\n';
287
+ content += divider + '\n';
288
+
289
+ // Calculate Server ID column width (variable based on screen width)
290
+ // Fixed columns breakdown:
291
+ // indicator(1) + " │ "(3) + " │ "(3) + port(4) + " │ "(3) + status(6) + "│ "(2) +
292
+ // slots(5) + " │ "(3) + tok/s(6) + " │ "(3) + memory(7) = 46
293
+ const fixedColumnsWidth = 48; // Add 2 extra for safety margin
294
+ const minServerIdWidth = 20;
295
+ const maxServerIdWidth = 60;
296
+ const serverIdWidth = Math.max(
297
+ minServerIdWidth,
298
+ Math.min(maxServerIdWidth, termWidth - fixedColumnsWidth)
299
+ );
300
+
301
+ // Table header with variable Server ID width
302
+ const serverIdHeader = 'Server ID'.padEnd(serverIdWidth);
303
+ content += `{bold} │ ${serverIdHeader}│ Port │ Status │ Slots │ tok/s │ Memory{/bold}\n`;
304
+ content += divider + '\n';
305
+
306
+ // Server rows
307
+ servers.forEach((server, index) => {
308
+ const serverData = serverDataMap.get(server.id);
309
+ const isSelected = index === selectedRowIndex;
310
+
311
+ // Selection indicator (arrow for selected row)
312
+ // Use plain arrow for selected (will be white), colored for unselected indicator
313
+ const indicator = isSelected ? '►' : ' ';
314
+
315
+ // Server ID (variable width, truncate if longer than available space)
316
+ const serverId = server.id.padEnd(serverIdWidth).substring(0, serverIdWidth);
317
+
318
+ // Port
319
+ const port = server.port.toString().padStart(4);
320
+
321
+ // Status - Check actual server status first, then health
322
+ // Build two versions: colored for normal, plain for selected
323
+ let status = '';
324
+ let statusPlain = '';
325
+ if (server.status !== 'running') {
326
+ // Server is stopped according to config
327
+ status = '{gray-fg}○ OFF{/gray-fg} ';
328
+ statusPlain = '○ OFF ';
329
+ } else if (serverData?.data) {
330
+ // Server is running and we have data
331
+ if (serverData.data.server.healthy) {
332
+ status = '{green-fg}● RUN{/green-fg} ';
333
+ statusPlain = '● RUN ';
334
+ } else {
335
+ status = '{red-fg}● ERR{/red-fg} ';
336
+ statusPlain = '● ERR ';
337
+ }
338
+ } else {
339
+ // Server is running but no data yet (still loading)
340
+ status = '{yellow-fg}● ...{/yellow-fg} ';
341
+ statusPlain = '● ... ';
342
+ }
343
+
344
+ // Slots
345
+ let slots = '- ';
346
+ if (serverData?.data?.server) {
347
+ const active = serverData.data.server.activeSlots;
348
+ const total = serverData.data.server.totalSlots;
349
+ slots = `${active}/${total}`.padStart(5);
350
+ }
351
+
352
+ // tok/s
353
+ let tokensPerSec = '- ';
354
+ if (serverData?.data?.server.avgGenerateSpeed !== undefined &&
355
+ serverData.data.server.avgGenerateSpeed > 0) {
356
+ tokensPerSec = Math.round(serverData.data.server.avgGenerateSpeed).toString().padStart(6);
357
+ }
358
+
359
+ // Memory (actual process memory from top command)
360
+ let memory = '- ';
361
+ if (serverData?.data?.server.processMemory) {
362
+ const bytes = serverData.data.server.processMemory;
363
+ // Format as GB/MB depending on size
364
+ if (bytes >= 1024 * 1024 * 1024) {
365
+ const gb = (bytes / (1024 * 1024 * 1024)).toFixed(1);
366
+ memory = `${gb} GB`.padStart(7);
367
+ } else {
368
+ const mb = Math.round(bytes / (1024 * 1024));
369
+ memory = `${mb} MB`.padStart(7);
370
+ }
371
+ }
372
+
373
+ // Build row content - use plain status for selected rows
374
+ let rowContent = '';
375
+ if (isSelected) {
376
+ // Use color code 15 (bright white) with cyan background
377
+ // When white-bg worked, it was probably auto-selecting bright white fg
378
+ rowContent = `{cyan-bg}{15-fg}${indicator} │ ${serverId} │ ${port} │ ${statusPlain}│ ${slots} │ ${tokensPerSec} │ ${memory}{/15-fg}{/cyan-bg}`;
379
+ } else {
380
+ // Use colored status for normal rows
381
+ rowContent = `${indicator} │ ${serverId} │ ${port} │ ${status}│ ${slots} │ ${tokensPerSec} │ ${memory}`;
382
+ }
383
+
384
+ content += rowContent + '\n';
385
+ });
386
+
387
+ // Footer
388
+ content += '\n' + divider + '\n';
389
+ content += `{gray-fg}Updated: ${new Date().toLocaleTimeString()} | [H]istory [Q]uit{/gray-fg}`;
390
+
391
+ return content;
392
+ }
393
+
394
+ // Render detail view for selected server
395
+ function renderDetailView(systemMetrics: SystemMetrics | null): string {
396
+ const server = servers[selectedServerIndex];
397
+ const serverData = serverDataMap.get(server.id);
398
+ const termWidth = (screen.width as number) || 80;
399
+ const divider = '─'.repeat(termWidth - 2);
400
+ let content = '';
401
+
402
+ // Header
403
+ content += `{bold}{blue-fg}═══ ${server.id} (${server.port}){/blue-fg}{/bold}\n\n`;
404
+
405
+ // Check if server is stopped
406
+ if (server.status !== 'running') {
407
+ // Show stopped server configuration (no metrics)
408
+ content += '{bold}Server Information{/bold}\n';
409
+ content += divider + '\n';
410
+ content += `Status: {gray-fg}○ STOPPED{/gray-fg}\n`;
411
+ content += `Model: ${server.modelName}\n`;
412
+ const displayHost = server.host || '127.0.0.1';
413
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
414
+ content += '\n';
415
+
416
+ content += '{bold}Configuration{/bold}\n';
417
+ content += divider + '\n';
418
+ content += `Threads: ${server.threads}\n`;
419
+ content += `Context: ${server.ctxSize} tokens\n`;
420
+ content += `GPU Layers: ${server.gpuLayers}\n`;
421
+ if (server.verbose) {
422
+ content += `Verbose: Enabled\n`;
423
+ }
424
+ if (server.customFlags && server.customFlags.length > 0) {
425
+ content += `Flags: ${server.customFlags.join(', ')}\n`;
426
+ }
427
+ content += '\n';
428
+
429
+ if (server.lastStarted) {
430
+ content += '{bold}Last Activity{/bold}\n';
431
+ content += divider + '\n';
432
+ content += `Started: ${new Date(server.lastStarted).toLocaleString()}\n`;
433
+ if (server.lastStopped) {
434
+ content += `Stopped: ${new Date(server.lastStopped).toLocaleString()}\n`;
435
+ }
436
+ content += '\n';
437
+ }
438
+
439
+ content += '{bold}Quick Actions{/bold}\n';
440
+ content += divider + '\n';
441
+ content += `{dim}Start server: llamacpp server start ${server.port}{/dim}\n`;
442
+ content += `{dim}Update config: llamacpp server config ${server.port} [options]{/dim}\n`;
443
+ content += `{dim}View logs: llamacpp server logs ${server.port}{/dim}\n`;
444
+
445
+ return content;
446
+ }
447
+
448
+ if (!serverData?.data) {
449
+ content += '{yellow-fg}Loading server data...{/yellow-fg}\n';
450
+ return content;
451
+ }
452
+
453
+ const data = serverData.data;
454
+
455
+ // Model resources (per-process)
456
+ content += renderModelResources(data);
457
+ content += '\n';
458
+
459
+ // Server Information
460
+ content += '{bold}Server Information{/bold}\n';
461
+ content += divider + '\n';
462
+
463
+ const statusIcon = data.server.healthy ? '{green-fg}●{/green-fg}' : '{red-fg}●{/red-fg}';
464
+ const statusText = data.server.healthy ? 'RUNNING' : 'UNHEALTHY';
465
+ content += `Status: ${statusIcon} ${statusText}`;
466
+
467
+ if (data.server.uptime) {
468
+ content += ` Uptime: ${data.server.uptime}`;
469
+ }
470
+ content += '\n';
471
+
472
+ content += `Model: ${server.modelName}`;
473
+ if (data.server.contextSize) {
474
+ content += ` Context: ${data.server.contextSize} tokens`;
475
+ }
476
+ content += '\n';
477
+
478
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
479
+ const displayHost = server.host || '127.0.0.1';
480
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
481
+
482
+ content += `Slots: ${data.server.activeSlots} active / ${data.server.totalSlots} total\n`;
483
+ content += '\n';
484
+
485
+ // Request Metrics
486
+ if (data.server.totalSlots > 0) {
487
+ content += '{bold}Request Metrics{/bold}\n';
488
+ content += divider + '\n';
489
+ content += `Active: ${data.server.activeSlots} / ${data.server.totalSlots}\n`;
490
+ content += `Idle: ${data.server.idleSlots} / ${data.server.totalSlots}\n`;
491
+
492
+ if (data.server.avgPromptSpeed !== undefined && data.server.avgPromptSpeed > 0) {
493
+ content += `Prompt: ${Math.round(data.server.avgPromptSpeed)} tokens/sec\n`;
494
+ }
495
+
496
+ if (data.server.avgGenerateSpeed !== undefined && data.server.avgGenerateSpeed > 0) {
497
+ content += `Generate: ${Math.round(data.server.avgGenerateSpeed)} tokens/sec\n`;
498
+ }
499
+
500
+ content += '\n';
501
+ }
502
+
503
+ // Active Slots Detail
504
+ if (data.server.slots.length > 0) {
505
+ const activeSlots = data.server.slots.filter(s => s.state === 'processing');
506
+
507
+ if (activeSlots.length > 0) {
508
+ content += '{bold}Active Slots{/bold}\n';
509
+ content += divider + '\n';
510
+
511
+ activeSlots.forEach((slot) => {
512
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
513
+
514
+ if (slot.timings?.predicted_per_second) {
515
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
516
+ }
517
+
518
+ if (slot.n_decoded !== undefined) {
519
+ content += ` - ${slot.n_decoded}`;
520
+ if (slot.n_ctx) {
521
+ content += ` / ${slot.n_ctx}`;
522
+ }
523
+ content += ' tokens';
524
+ }
525
+
526
+ content += '\n';
527
+ });
528
+
529
+ content += '\n';
530
+ }
531
+ }
532
+
533
+ // Footer
534
+ content += divider + '\n';
535
+ content += `{gray-fg}Updated: ${data.lastUpdated.toLocaleTimeString()} | [H]istory [ESC] Back [Q]uit{/gray-fg}`;
536
+
537
+ return content;
538
+ }
539
+
540
+ // Fetch and update display
541
+ async function fetchData() {
542
+ try {
543
+ // Collect system metrics ONCE for all servers (not per-server)
544
+ // This prevents spawning multiple macmon processes
545
+ const systemMetricsPromise = systemCollector.collectSystemMetrics();
546
+
547
+ // Batch collect process memory and CPU for ALL servers in parallel
548
+ // This prevents spawning multiple top processes (5x speedup)
549
+ const { getBatchProcessMemory, getBatchProcessCpu } = await import('../utils/process-utils.js');
550
+ const pids = servers.filter(s => s.pid).map(s => s.pid!);
551
+ const memoryMapPromise = pids.length > 0
552
+ ? getBatchProcessMemory(pids)
553
+ : Promise.resolve(new Map<number, number | null>());
554
+ const cpuMapPromise = pids.length > 0
555
+ ? getBatchProcessCpu(pids)
556
+ : Promise.resolve(new Map<number, number | null>());
557
+
558
+ // Wait for both batches to complete
559
+ const [memoryMap, cpuMap] = await Promise.all([memoryMapPromise, cpuMapPromise]);
560
+
561
+ // Collect server metrics only for RUNNING servers (skip stopped servers)
562
+ const promises = servers
563
+ .filter(server => server.status === 'running')
564
+ .map(async (server) => {
565
+ const aggregator = aggregators.get(server.id)!;
566
+ try {
567
+ // Use collectServerMetrics instead of collectMonitorData
568
+ // to avoid spawning macmon per server
569
+ // Pass pre-fetched memory and CPU to avoid spawning top per server
570
+ const serverMetrics = await aggregator.collectServerMetrics(
571
+ server,
572
+ server.pid ? memoryMap.get(server.pid) ?? null : null,
573
+ server.pid ? cpuMap.get(server.pid) ?? null : null
574
+ );
575
+
576
+ // Build MonitorData manually with shared system metrics
577
+ const data: MonitorData = {
578
+ server: serverMetrics,
579
+ system: undefined, // Will be set after system metrics resolve
580
+ lastUpdated: new Date(),
581
+ updateInterval,
582
+ consecutiveFailures: 0,
583
+ };
584
+
585
+ serverDataMap.set(server.id, {
586
+ server,
587
+ data,
588
+ error: null,
589
+ });
590
+ } catch (err) {
591
+ serverDataMap.set(server.id, {
592
+ server,
593
+ data: null,
594
+ error: err instanceof Error ? err.message : 'Unknown error',
595
+ });
596
+ }
597
+ });
598
+
599
+ // Set null data for stopped servers (no metrics collection)
600
+ servers
601
+ .filter(server => server.status !== 'running')
602
+ .forEach(server => {
603
+ serverDataMap.set(server.id, {
604
+ server,
605
+ data: null,
606
+ error: null,
607
+ });
608
+ });
609
+
610
+ // Wait for both system metrics and server metrics to complete
611
+ const systemMetrics = await systemMetricsPromise;
612
+ await Promise.all(promises);
613
+
614
+ // Store system metrics for loading state
615
+ lastSystemMetrics = systemMetrics;
616
+
617
+ // Update all server data with shared system metrics
618
+ for (const serverData of serverDataMap.values()) {
619
+ if (serverData.data) {
620
+ serverData.data.system = systemMetrics;
621
+ }
622
+ }
623
+
624
+ // Append to history for each server (silent failure)
625
+ // Only save history for servers that are healthy and not stale
626
+ for (const [serverId, serverData] of serverDataMap) {
627
+ if (serverData.data && !serverData.data.server.stale && serverData.data.server.healthy) {
628
+ const manager = historyManagers.get(serverId);
629
+ manager?.appendSnapshot(serverData.data.server, serverData.data.system)
630
+ .catch(err => {
631
+ // Don't interrupt monitoring on history write failure
632
+ console.error(`Failed to save history for ${serverId}:`, err);
633
+ });
634
+ }
635
+ }
636
+
637
+ // Render once with complete data
638
+ let content = '';
639
+ if (viewMode === 'list') {
640
+ content = renderListView(systemMetrics);
641
+ } else {
642
+ content = renderDetailView(systemMetrics);
643
+ }
644
+
645
+ contentBox.setContent(content);
646
+ screen.render();
647
+
648
+ // Clear loading state
649
+ hideLoading();
650
+
651
+ } catch (err) {
652
+ const errorMsg = err instanceof Error ? err.message : 'Unknown error';
653
+ contentBox.setContent(
654
+ '{bold}{red-fg}Error{/red-fg}{/bold}\n\n' +
655
+ `{red-fg}${errorMsg}{/red-fg}\n\n` +
656
+ '{gray-fg}Press [R] to retry or [Q] to quit{/gray-fg}'
657
+ );
658
+ screen.render();
659
+
660
+ // Clear loading state on error too
661
+ isLoading = false;
662
+ }
663
+ }
664
+
665
+ // Polling
666
+ function startPolling() {
667
+ if (intervalId) clearInterval(intervalId);
668
+ fetchData();
669
+ intervalId = setInterval(fetchData, updateInterval);
670
+ }
671
+
672
+ // Keyboard shortcuts - List view navigation with arrow keys
673
+ screen.key(['up', 'k'], () => {
674
+ if (viewMode === 'list') {
675
+ selectedRowIndex = Math.max(0, selectedRowIndex - 1);
676
+ // Re-render immediately for responsive feel
677
+ const content = renderListView(lastSystemMetrics);
678
+ contentBox.setContent(content);
679
+ screen.render();
680
+ }
681
+ });
682
+
683
+ screen.key(['down', 'j'], () => {
684
+ if (viewMode === 'list') {
685
+ selectedRowIndex = Math.min(servers.length - 1, selectedRowIndex + 1);
686
+ // Re-render immediately for responsive feel
687
+ const content = renderListView(lastSystemMetrics);
688
+ contentBox.setContent(content);
689
+ screen.render();
690
+ }
691
+ });
692
+
693
+ // Enter key to view details for selected server
694
+ screen.key(['enter'], () => {
695
+ if (viewMode === 'list') {
696
+ showLoading();
697
+ selectedServerIndex = selectedRowIndex;
698
+ viewMode = 'detail';
699
+ fetchData();
700
+ }
701
+ });
702
+
703
+ // Keyboard shortcuts - Detail view
704
+ screen.key(['escape'], () => {
705
+ // Don't handle ESC if we're in historical view - let historical view handle it
706
+ if (inHistoricalView) return;
707
+
708
+ if (viewMode === 'detail') {
709
+ showLoading();
710
+ viewMode = 'list';
711
+ cameFromDirectJump = false; // Clear direct jump flag when returning to list
712
+ fetchData();
713
+ } else if (viewMode === 'list') {
714
+ // ESC in list view - exit
715
+ showLoading();
716
+ if (intervalId) clearInterval(intervalId);
717
+ if (spinnerIntervalId) clearInterval(spinnerIntervalId);
718
+ setTimeout(() => {
719
+ screen.destroy();
720
+ process.exit(0);
721
+ }, 100);
722
+ }
723
+ });
724
+
725
+ // Keyboard shortcuts - Common
726
+
727
+ screen.key(['h', 'H'], async () => {
728
+ // Prevent entering historical view if already there
729
+ if (inHistoricalView) return;
730
+
731
+ // Keep polling in background for live historical updates
732
+ // Stop spinner if running
733
+ if (spinnerIntervalId) clearInterval(spinnerIntervalId);
734
+
735
+ // Remove current content box
736
+ screen.remove(contentBox);
737
+
738
+ // Mark that we're in historical view
739
+ inHistoricalView = true;
740
+
741
+ if (viewMode === 'list') {
742
+ // Show multi-server historical view
743
+ await createMultiServerHistoricalUI(screen, servers, selectedServerIndex, () => {
744
+ // Mark that we've left historical view
745
+ inHistoricalView = false;
746
+ // Re-attach content box when returning from history
747
+ screen.append(contentBox);
748
+ // Re-render the list view
749
+ const content = renderListView(lastSystemMetrics);
750
+ contentBox.setContent(content);
751
+ screen.render();
752
+ });
753
+ } else {
754
+ // Show single-server historical view for selected server
755
+ const selectedServer = servers[selectedServerIndex];
756
+ await createHistoricalUI(screen, selectedServer, () => {
757
+ // Mark that we've left historical view
758
+ inHistoricalView = false;
759
+ // Re-attach content box when returning from history
760
+ screen.append(contentBox);
761
+ // Re-render the detail view
762
+ const content = renderDetailView(lastSystemMetrics);
763
+ contentBox.setContent(content);
764
+ screen.render();
765
+ });
766
+ }
767
+ });
768
+
769
+ screen.key(['q', 'Q', 'C-c'], () => {
770
+ showLoading();
771
+ if (intervalId) clearInterval(intervalId);
772
+ if (spinnerIntervalId) clearInterval(spinnerIntervalId);
773
+ // Small delay to show the loading state before exit
774
+ setTimeout(() => {
775
+ screen.destroy();
776
+ process.exit(0);
777
+ }, 100);
778
+ });
779
+
780
+ // Initial display
781
+ contentBox.setContent('{cyan-fg}⏳ Connecting to servers...{/cyan-fg}');
782
+ screen.render();
783
+
784
+ startPolling();
785
+
786
+ // Cleanup
787
+ screen.on('destroy', () => {
788
+ if (intervalId) clearInterval(intervalId);
789
+ // Note: macmon child processes will automatically die when parent exits
790
+ // since they're spawned with detached: false
791
+ });
792
+ }