@appkit/llamacpp-cli 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/MONITORING-ACCURACY-FIX.md +199 -0
  3. package/PER-PROCESS-METRICS.md +190 -0
  4. package/README.md +136 -1
  5. package/dist/cli.js +21 -4
  6. package/dist/cli.js.map +1 -1
  7. package/dist/commands/create.d.ts.map +1 -1
  8. package/dist/commands/create.js +12 -3
  9. package/dist/commands/create.js.map +1 -1
  10. package/dist/commands/monitor.d.ts +2 -0
  11. package/dist/commands/monitor.d.ts.map +1 -0
  12. package/dist/commands/monitor.js +126 -0
  13. package/dist/commands/monitor.js.map +1 -0
  14. package/dist/commands/ps.d.ts +3 -1
  15. package/dist/commands/ps.d.ts.map +1 -1
  16. package/dist/commands/ps.js +75 -5
  17. package/dist/commands/ps.js.map +1 -1
  18. package/dist/commands/server-show.d.ts.map +1 -1
  19. package/dist/commands/server-show.js +10 -3
  20. package/dist/commands/server-show.js.map +1 -1
  21. package/dist/commands/start.d.ts.map +1 -1
  22. package/dist/commands/start.js +14 -2
  23. package/dist/commands/start.js.map +1 -1
  24. package/dist/lib/history-manager.d.ts +46 -0
  25. package/dist/lib/history-manager.d.ts.map +1 -0
  26. package/dist/lib/history-manager.js +157 -0
  27. package/dist/lib/history-manager.js.map +1 -0
  28. package/dist/lib/metrics-aggregator.d.ts +40 -0
  29. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  30. package/dist/lib/metrics-aggregator.js +211 -0
  31. package/dist/lib/metrics-aggregator.js.map +1 -0
  32. package/dist/lib/system-collector.d.ts +80 -0
  33. package/dist/lib/system-collector.d.ts.map +1 -0
  34. package/dist/lib/system-collector.js +311 -0
  35. package/dist/lib/system-collector.js.map +1 -0
  36. package/dist/tui/HistoricalMonitorApp.d.ts +5 -0
  37. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -0
  38. package/dist/tui/HistoricalMonitorApp.js +490 -0
  39. package/dist/tui/HistoricalMonitorApp.js.map +1 -0
  40. package/dist/tui/MonitorApp.d.ts +4 -0
  41. package/dist/tui/MonitorApp.d.ts.map +1 -0
  42. package/dist/tui/MonitorApp.js +315 -0
  43. package/dist/tui/MonitorApp.js.map +1 -0
  44. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  45. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  46. package/dist/tui/MultiServerMonitorApp.js +712 -0
  47. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  48. package/dist/types/history-types.d.ts +30 -0
  49. package/dist/types/history-types.d.ts.map +1 -0
  50. package/dist/types/history-types.js +11 -0
  51. package/dist/types/history-types.js.map +1 -0
  52. package/dist/types/monitor-types.d.ts +123 -0
  53. package/dist/types/monitor-types.d.ts.map +1 -0
  54. package/dist/types/monitor-types.js +3 -0
  55. package/dist/types/monitor-types.js.map +1 -0
  56. package/dist/types/server-config.d.ts +1 -0
  57. package/dist/types/server-config.d.ts.map +1 -1
  58. package/dist/types/server-config.js.map +1 -1
  59. package/dist/utils/downsample-utils.d.ts +35 -0
  60. package/dist/utils/downsample-utils.d.ts.map +1 -0
  61. package/dist/utils/downsample-utils.js +107 -0
  62. package/dist/utils/downsample-utils.js.map +1 -0
  63. package/dist/utils/file-utils.d.ts +6 -0
  64. package/dist/utils/file-utils.d.ts.map +1 -1
  65. package/dist/utils/file-utils.js +38 -0
  66. package/dist/utils/file-utils.js.map +1 -1
  67. package/dist/utils/process-utils.d.ts +35 -2
  68. package/dist/utils/process-utils.d.ts.map +1 -1
  69. package/dist/utils/process-utils.js +220 -25
  70. package/dist/utils/process-utils.js.map +1 -1
  71. package/docs/images/.gitkeep +1 -0
  72. package/package.json +5 -1
  73. package/src/cli.ts +21 -4
  74. package/src/commands/create.ts +14 -4
  75. package/src/commands/monitor.ts +110 -0
  76. package/src/commands/ps.ts +88 -5
  77. package/src/commands/server-show.ts +10 -3
  78. package/src/commands/start.ts +15 -2
  79. package/src/lib/history-manager.ts +172 -0
  80. package/src/lib/metrics-aggregator.ts +257 -0
  81. package/src/lib/system-collector.ts +315 -0
  82. package/src/tui/HistoricalMonitorApp.ts +548 -0
  83. package/src/tui/MonitorApp.ts +386 -0
  84. package/src/tui/MultiServerMonitorApp.ts +792 -0
  85. package/src/types/history-types.ts +39 -0
  86. package/src/types/monitor-types.ts +162 -0
  87. package/src/types/server-config.ts +1 -0
  88. package/src/utils/downsample-utils.ts +128 -0
  89. package/src/utils/file-utils.ts +40 -0
  90. package/src/utils/process-utils.ts +243 -25
  91. package/test-load.sh +100 -0
@@ -0,0 +1,386 @@
1
+ import blessed from 'blessed';
2
+ import { ServerConfig } from '../types/server-config.js';
3
+ import { MetricsAggregator } from '../lib/metrics-aggregator.js';
4
+ import { MonitorData } from '../types/monitor-types.js';
5
+ import { HistoryManager } from '../lib/history-manager.js';
6
+ import { createHistoricalUI } from './HistoricalMonitorApp.js';
7
+
8
+ export async function createMonitorUI(
9
+ screen: blessed.Widgets.Screen,
10
+ server: ServerConfig
11
+ ): Promise<void> {
12
+ let updateInterval = 2000;
13
+ let intervalId: NodeJS.Timeout | null = null;
14
+ let consecutiveFailures = 0;
15
+ let lastGoodData: MonitorData | null = null;
16
+ const STALE_THRESHOLD = 5;
17
+ const metricsAggregator = new MetricsAggregator(server);
18
+ const historyManager = new HistoryManager(server.id);
19
+
20
+ // Single scrollable content box
21
+ const contentBox = blessed.box({
22
+ top: 0,
23
+ left: 0,
24
+ width: '100%',
25
+ height: '100%',
26
+ tags: true,
27
+ scrollable: true,
28
+ alwaysScroll: true,
29
+ keys: true,
30
+ vi: true,
31
+ mouse: true,
32
+ scrollbar: {
33
+ ch: '█',
34
+ style: {
35
+ fg: 'blue',
36
+ },
37
+ },
38
+ });
39
+ screen.append(contentBox);
40
+
41
+ // Helper to create progress bar
42
+ function createProgressBar(percentage: number, width: number = 30): string {
43
+ const filled = Math.round((percentage / 100) * width);
44
+ const empty = width - filled;
45
+ return '[' + '█'.repeat(Math.max(0, filled)) + '░'.repeat(Math.max(0, empty)) + ']';
46
+ }
47
+
48
+ // Fetch and update display
49
+ async function fetchData() {
50
+ try {
51
+ const data = await metricsAggregator.collectMonitorData(server, updateInterval);
52
+
53
+ // Reset failure count on success
54
+ consecutiveFailures = 0;
55
+ lastGoodData = data;
56
+
57
+ // Append to history (silent failure)
58
+ // Only save history for servers that are healthy and not stale
59
+ if (!data.server.stale && data.server.healthy) {
60
+ historyManager.appendSnapshot(data.server, data.system).catch(() => {
61
+ // Don't interrupt monitoring on history write failure
62
+ });
63
+ }
64
+
65
+ const termWidth = (screen.width as number) || 80;
66
+ const divider = '─'.repeat(termWidth - 2); // Account for padding
67
+
68
+ let content = '';
69
+
70
+ // Header
71
+ content += `{bold}{blue-fg}═══ ${server.modelName} (${server.port}){/blue-fg}{/bold}\n\n`;
72
+
73
+ // Server Info
74
+ content += '{bold}Server Information{/bold}\n';
75
+ content += divider + '\n';
76
+
77
+ const statusIcon = data.server.healthy ? '{green-fg}●{/green-fg}' : '{red-fg}●{/red-fg}';
78
+ const statusText = data.server.healthy ? 'RUNNING' : 'UNHEALTHY';
79
+ content += `Status: ${statusIcon} ${statusText}\n`;
80
+
81
+ if (data.server.uptime) {
82
+ content += `Uptime: ${data.server.uptime}\n`;
83
+ }
84
+
85
+ content += `Model: ${server.modelName}\n`;
86
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
87
+ const displayHost = server.host || '127.0.0.1';
88
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
89
+ content += `Slots: ${data.server.activeSlots} active / ${data.server.totalSlots} total\n`;
90
+ content += '\n';
91
+
92
+ // Request Metrics
93
+ if (data.server.totalSlots > 0) {
94
+ content += '{bold}Request Metrics{/bold}\n';
95
+ content += divider + '\n';
96
+ content += `Active: ${data.server.activeSlots} / ${data.server.totalSlots}\n`;
97
+ content += `Idle: ${data.server.idleSlots} / ${data.server.totalSlots}\n`;
98
+
99
+ if (data.server.avgPromptSpeed !== undefined && data.server.avgPromptSpeed > 0) {
100
+ content += `Prompt: ${Math.round(data.server.avgPromptSpeed)} tokens/sec\n`;
101
+ }
102
+
103
+ if (data.server.avgGenerateSpeed !== undefined && data.server.avgGenerateSpeed > 0) {
104
+ content += `Generate: ${Math.round(data.server.avgGenerateSpeed)} tokens/sec\n`;
105
+ }
106
+
107
+ content += '\n';
108
+ }
109
+
110
+ // Active Slots Detail
111
+ if (data.server.slots.length > 0) {
112
+ const activeSlots = data.server.slots.filter(s => s.state === 'processing');
113
+
114
+ if (activeSlots.length > 0) {
115
+ content += '{bold}Active Slots{/bold}\n';
116
+ content += divider + '\n';
117
+
118
+ activeSlots.forEach((slot) => {
119
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
120
+
121
+ if (slot.timings?.predicted_per_second) {
122
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
123
+ }
124
+
125
+ if (slot.n_decoded !== undefined) {
126
+ content += ` - ${slot.n_decoded} tokens`;
127
+ }
128
+
129
+ content += '\n';
130
+ });
131
+
132
+ content += '\n';
133
+ }
134
+ }
135
+
136
+ // Model Resources (per-process metrics)
137
+ content += '{bold}Model Resources{/bold}\n';
138
+ content += divider + '\n';
139
+
140
+ // GPU: System-wide (can't get per-process on macOS)
141
+ if (data.system && data.system.gpuUsage !== undefined) {
142
+ const bar = createProgressBar(data.system.gpuUsage);
143
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.system.gpuUsage)}% {gray-fg}(system){/gray-fg}`;
144
+
145
+ if (data.system.temperature !== undefined) {
146
+ content += ` - ${Math.round(data.system.temperature)}°C`;
147
+ }
148
+
149
+ content += '\n';
150
+ }
151
+
152
+ // CPU: Per-process
153
+ if (data.server.processCpuUsage !== undefined) {
154
+ const bar = createProgressBar(data.server.processCpuUsage);
155
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(data.server.processCpuUsage)}%\n`;
156
+ }
157
+
158
+ // Memory: Per-process
159
+ if (data.server.processMemory !== undefined) {
160
+ const memoryGB = data.server.processMemory / (1024 ** 3);
161
+ // For progress bar, estimate against typical model sizes (e.g., 8GB max)
162
+ const estimatedMax = 8;
163
+ const memoryPercentage = Math.min((memoryGB / estimatedMax) * 100, 100);
164
+ const bar = createProgressBar(memoryPercentage);
165
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${memoryGB.toFixed(2)} GB\n`;
166
+ }
167
+
168
+ if (data.system && data.system.warnings && data.system.warnings.length > 0) {
169
+ content += `\n{yellow-fg}⚠ ${data.system.warnings.join(', ')}{/yellow-fg}\n`;
170
+ }
171
+
172
+ content += '\n';
173
+
174
+ // Footer
175
+ content += divider + '\n';
176
+ content += `{gray-fg}Updated: ${data.lastUpdated.toLocaleTimeString()} | `;
177
+ content += `Interval: ${updateInterval}ms | `;
178
+ content += `[H]istory [R]efresh [+/-]Speed [Q]uit{/gray-fg}`;
179
+
180
+ contentBox.setContent(content);
181
+ screen.render();
182
+
183
+ } catch (err) {
184
+ consecutiveFailures++;
185
+ const isStale = consecutiveFailures >= STALE_THRESHOLD;
186
+
187
+ // If we have last good data and we're stale, show it with indicator
188
+ if (lastGoodData && isStale) {
189
+ const termWidth = (screen.width as number) || 80;
190
+ const divider = '─'.repeat(termWidth - 2);
191
+
192
+ let content = '';
193
+
194
+ // Header with stale warning
195
+ content += `{bold}{blue-fg}═══ ${server.modelName} (${server.port}){/blue-fg}{/bold}\n`;
196
+ content += '{bold}{yellow-fg}⚠ CONNECTION LOST - SHOWING STALE DATA{/yellow-fg}{/bold}\n\n';
197
+
198
+ // Server Info
199
+ content += '{bold}Server Information{/bold}\n';
200
+ content += divider + '\n';
201
+
202
+ const statusIcon = '{yellow-fg}●{/yellow-fg}';
203
+ const statusText = 'STALE';
204
+ content += `Status: ${statusIcon} ${statusText}\n`;
205
+
206
+ if (lastGoodData.server.uptime) {
207
+ content += `Uptime: ${lastGoodData.server.uptime}\n`;
208
+ }
209
+
210
+ content += `Model: ${server.modelName}\n`;
211
+ // Handle null host (legacy configs) by defaulting to 127.0.0.1
212
+ const displayHost = server.host || '127.0.0.1';
213
+ content += `Endpoint: http://${displayHost}:${server.port}\n`;
214
+ content += `Slots: ${lastGoodData.server.activeSlots} active / ${lastGoodData.server.totalSlots} total\n\n`;
215
+
216
+ // Request Metrics
217
+ if (lastGoodData.server.totalSlots > 0) {
218
+ content += '{bold}Request Metrics{/bold} {yellow-fg}(stale){/yellow-fg}\n';
219
+ content += divider + '\n';
220
+ content += `Active: ${lastGoodData.server.activeSlots} / ${lastGoodData.server.totalSlots}\n`;
221
+ content += `Idle: ${lastGoodData.server.idleSlots} / ${lastGoodData.server.totalSlots}\n`;
222
+
223
+ if (lastGoodData.server.avgPromptSpeed !== undefined && lastGoodData.server.avgPromptSpeed > 0) {
224
+ content += `Prompt: ${Math.round(lastGoodData.server.avgPromptSpeed)} tokens/sec\n`;
225
+ }
226
+
227
+ if (lastGoodData.server.avgGenerateSpeed !== undefined && lastGoodData.server.avgGenerateSpeed > 0) {
228
+ content += `Generate: ${Math.round(lastGoodData.server.avgGenerateSpeed)} tokens/sec\n`;
229
+ }
230
+
231
+ content += '\n';
232
+ }
233
+
234
+ // Active Slots Detail
235
+ if (lastGoodData.server.slots.length > 0) {
236
+ const activeSlots = lastGoodData.server.slots.filter(s => s.state === 'processing');
237
+
238
+ if (activeSlots.length > 0) {
239
+ content += '{bold}Active Slots{/bold} {yellow-fg}(stale){/yellow-fg}\n';
240
+ content += divider + '\n';
241
+
242
+ activeSlots.forEach((slot) => {
243
+ content += `Slot #${slot.id}: {yellow-fg}PROCESSING{/yellow-fg}`;
244
+
245
+ if (slot.timings?.predicted_per_second) {
246
+ content += ` - ${Math.round(slot.timings.predicted_per_second)} tok/s`;
247
+ }
248
+
249
+ if (slot.n_decoded !== undefined) {
250
+ content += ` - ${slot.n_decoded} tokens`;
251
+ }
252
+
253
+ content += '\n';
254
+ });
255
+
256
+ content += '\n';
257
+ }
258
+ }
259
+
260
+ // Model Resources (per-process metrics)
261
+ content += '{bold}Model Resources{/bold} {yellow-fg}(stale){/yellow-fg}\n';
262
+ content += divider + '\n';
263
+
264
+ // GPU: System-wide (can't get per-process on macOS)
265
+ if (lastGoodData.system && lastGoodData.system.gpuUsage !== undefined) {
266
+ const bar = createProgressBar(lastGoodData.system.gpuUsage);
267
+ content += `GPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(lastGoodData.system.gpuUsage)}% {gray-fg}(system){/gray-fg}`;
268
+
269
+ if (lastGoodData.system.temperature !== undefined) {
270
+ content += ` - ${Math.round(lastGoodData.system.temperature)}°C`;
271
+ }
272
+
273
+ content += '\n';
274
+ }
275
+
276
+ // CPU: Per-process
277
+ if (lastGoodData.server.processCpuUsage !== undefined) {
278
+ const bar = createProgressBar(lastGoodData.server.processCpuUsage);
279
+ content += `CPU: {cyan-fg}${bar}{/cyan-fg} ${Math.round(lastGoodData.server.processCpuUsage)}%\n`;
280
+ }
281
+
282
+ // Memory: Per-process
283
+ if (lastGoodData.server.processMemory !== undefined) {
284
+ const memoryGB = lastGoodData.server.processMemory / (1024 ** 3);
285
+ const estimatedMax = 8;
286
+ const memoryPercentage = Math.min((memoryGB / estimatedMax) * 100, 100);
287
+ const bar = createProgressBar(memoryPercentage);
288
+ content += `Memory: {cyan-fg}${bar}{/cyan-fg} ${memoryGB.toFixed(2)} GB\n`;
289
+ }
290
+
291
+ if (lastGoodData.system && lastGoodData.system.warnings && lastGoodData.system.warnings.length > 0) {
292
+ content += `\n{yellow-fg}⚠ ${lastGoodData.system.warnings.join(', ')}{/yellow-fg}\n`;
293
+ }
294
+
295
+ content += '\n';
296
+
297
+ // Footer
298
+ content += divider + '\n';
299
+ content += `{yellow-fg}Last good data: ${lastGoodData.lastUpdated.toLocaleTimeString()}{/yellow-fg}\n`;
300
+ content += `{yellow-fg}Connection failures: ${consecutiveFailures}{/yellow-fg}\n`;
301
+ content += `{gray-fg}Interval: ${updateInterval}ms | [H]istory [R]efresh [+/-]Speed [Q]uit{/gray-fg}`;
302
+
303
+ contentBox.setContent(content);
304
+ screen.render();
305
+ } else if (!lastGoodData || consecutiveFailures < STALE_THRESHOLD) {
306
+ // Show connection error (either no last data or not stale yet)
307
+ const errorMsg = err instanceof Error ? err.message : 'Unknown error';
308
+ const retryMsg = consecutiveFailures < STALE_THRESHOLD
309
+ ? `Retrying... (${consecutiveFailures}/${STALE_THRESHOLD})`
310
+ : 'Connection lost';
311
+
312
+ contentBox.setContent(
313
+ '{bold}{red-fg}Connection Error{/red-fg}{/bold}\n\n' +
314
+ `{red-fg}${errorMsg}{/red-fg}\n\n` +
315
+ `{yellow-fg}${retryMsg}{/yellow-fg}\n\n` +
316
+ '{gray-fg}Press [R] to retry or [Q] to quit{/gray-fg}'
317
+ );
318
+ screen.render();
319
+ }
320
+ }
321
+ }
322
+
323
+ // Polling
324
+ function startPolling() {
325
+ if (intervalId) clearInterval(intervalId);
326
+ fetchData();
327
+ intervalId = setInterval(fetchData, updateInterval);
328
+ }
329
+
330
+ // Keyboard shortcuts
331
+ screen.key(['r', 'R'], () => {
332
+ fetchData();
333
+ });
334
+
335
+ screen.key(['+', '='], () => {
336
+ updateInterval = Math.max(500, updateInterval - 500);
337
+ startPolling();
338
+ });
339
+
340
+ screen.key(['-', '_'], () => {
341
+ updateInterval = Math.min(10000, updateInterval + 500);
342
+ startPolling();
343
+ });
344
+
345
+ // Track whether we're in historical view to prevent H key conflicts
346
+ let inHistoricalView = false;
347
+
348
+ screen.key(['h', 'H'], async () => {
349
+ // Prevent entering historical view if already there
350
+ if (inHistoricalView) return;
351
+
352
+ // Keep polling in background for live historical updates
353
+ // Remove current content box
354
+ screen.remove(contentBox);
355
+
356
+ // Mark that we're in historical view
357
+ inHistoricalView = true;
358
+
359
+ // Show historical view (polling continues in background)
360
+ await createHistoricalUI(screen, server, () => {
361
+ // Mark that we've left historical view
362
+ inHistoricalView = false;
363
+ // Re-attach content box when returning from history
364
+ screen.append(contentBox);
365
+ });
366
+ });
367
+
368
+ screen.key(['q', 'Q', 'C-c'], () => {
369
+ if (intervalId) clearInterval(intervalId);
370
+ screen.destroy();
371
+ process.exit(0);
372
+ });
373
+
374
+ // Initial display
375
+ contentBox.setContent('{cyan-fg}⏳ Connecting to server...{/cyan-fg}');
376
+ screen.render();
377
+
378
+ startPolling();
379
+
380
+ // Cleanup
381
+ screen.on('destroy', () => {
382
+ if (intervalId) clearInterval(intervalId);
383
+ // Note: macmon child processes will automatically die when parent exits
384
+ // since they're spawned with detached: false
385
+ });
386
+ }