@appkit/llamacpp-cli 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +87 -1
  3. package/dist/cli.js +14 -0
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/monitor.d.ts +2 -0
  6. package/dist/commands/monitor.d.ts.map +1 -0
  7. package/dist/commands/monitor.js +76 -0
  8. package/dist/commands/monitor.js.map +1 -0
  9. package/dist/lib/metrics-aggregator.d.ts +39 -0
  10. package/dist/lib/metrics-aggregator.d.ts.map +1 -0
  11. package/dist/lib/metrics-aggregator.js +200 -0
  12. package/dist/lib/metrics-aggregator.js.map +1 -0
  13. package/dist/lib/system-collector.d.ts +75 -0
  14. package/dist/lib/system-collector.d.ts.map +1 -0
  15. package/dist/lib/system-collector.js +310 -0
  16. package/dist/lib/system-collector.js.map +1 -0
  17. package/dist/tui/MonitorApp.d.ts +4 -0
  18. package/dist/tui/MonitorApp.d.ts.map +1 -0
  19. package/dist/tui/MonitorApp.js +293 -0
  20. package/dist/tui/MonitorApp.js.map +1 -0
  21. package/dist/tui/MultiServerMonitorApp.d.ts +4 -0
  22. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -0
  23. package/dist/tui/MultiServerMonitorApp.js +496 -0
  24. package/dist/tui/MultiServerMonitorApp.js.map +1 -0
  25. package/dist/tui/components/ErrorState.d.ts +8 -0
  26. package/dist/tui/components/ErrorState.d.ts.map +1 -0
  27. package/dist/tui/components/ErrorState.js +22 -0
  28. package/dist/tui/components/ErrorState.js.map +1 -0
  29. package/dist/tui/components/LoadingState.d.ts +8 -0
  30. package/dist/tui/components/LoadingState.d.ts.map +1 -0
  31. package/dist/tui/components/LoadingState.js +21 -0
  32. package/dist/tui/components/LoadingState.js.map +1 -0
  33. package/dist/types/monitor-types.d.ts +122 -0
  34. package/dist/types/monitor-types.d.ts.map +1 -0
  35. package/dist/types/monitor-types.js +3 -0
  36. package/dist/types/monitor-types.js.map +1 -0
  37. package/dist/utils/process-utils.d.ts +16 -1
  38. package/dist/utils/process-utils.d.ts.map +1 -1
  39. package/dist/utils/process-utils.js +144 -27
  40. package/dist/utils/process-utils.js.map +1 -1
  41. package/package.json +4 -2
  42. package/src/cli.ts +14 -0
  43. package/src/commands/monitor.ts +90 -0
  44. package/src/lib/metrics-aggregator.ts +244 -0
  45. package/src/lib/system-collector.ts +312 -0
  46. package/src/tui/MonitorApp.ts +361 -0
  47. package/src/tui/MultiServerMonitorApp.ts +547 -0
  48. package/src/types/monitor-types.ts +161 -0
  49. package/src/utils/process-utils.ts +160 -26
@@ -1,4 +1,4 @@
1
- import { exec } from 'child_process';
1
+ import { exec, spawn } from 'child_process';
2
2
  import { promisify } from 'util';
3
3
 
4
4
  export const execAsync = promisify(exec);
@@ -60,39 +60,173 @@ export async function isPortInUse(port: number): Promise<boolean> {
60
60
  }
61
61
 
62
62
  /**
63
- * Get memory usage for a process in bytes
64
- * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
65
- * Returns null if process not found or error occurs
63
+ * Spawn a streaming command, read one line, and kill it
64
+ * Useful for commands like 'macmon pipe' that stream indefinitely
65
+ * Ensures the process is killed to prevent leaks
66
66
  */
67
- export async function getProcessMemory(pid: number): Promise<number | null> {
67
+ export async function spawnAndReadOneLine(
68
+ command: string,
69
+ args: string[],
70
+ timeoutMs: number = 2000
71
+ ): Promise<string | null> {
72
+ return new Promise((resolve) => {
73
+ const child = spawn(command, args, {
74
+ stdio: ['ignore', 'pipe', 'ignore'],
75
+ detached: false, // Keep in same process group for easier cleanup
76
+ });
77
+
78
+ let resolved = false;
79
+ let output = '';
80
+
81
+ const cleanup = () => {
82
+ try {
83
+ // Try SIGKILL immediately (SIGTERM may not work for macmon)
84
+ child.kill('SIGKILL');
85
+ } catch {
86
+ // Process might already be dead
87
+ }
88
+ };
89
+
90
+ // Set timeout to kill process if it doesn't produce output
91
+ const timeout = setTimeout(() => {
92
+ if (!resolved) {
93
+ resolved = true;
94
+ cleanup();
95
+ resolve(null);
96
+ }
97
+ }, timeoutMs);
98
+
99
+ // Read stdout line by line
100
+ child.stdout?.on('data', (data) => {
101
+ if (resolved) return;
102
+
103
+ output += data.toString();
104
+
105
+ // Check if we have a complete line
106
+ const newlineIndex = output.indexOf('\n');
107
+ if (newlineIndex !== -1) {
108
+ const line = output.substring(0, newlineIndex).trim();
109
+
110
+ if (line.length > 0) {
111
+ resolved = true;
112
+ clearTimeout(timeout);
113
+ cleanup();
114
+ resolve(line);
115
+ }
116
+ }
117
+ });
118
+
119
+ // Handle process errors
120
+ child.on('error', () => {
121
+ if (!resolved) {
122
+ resolved = true;
123
+ clearTimeout(timeout);
124
+ resolve(null);
125
+ }
126
+ });
127
+
128
+ // Handle process exit
129
+ child.on('exit', () => {
130
+ if (!resolved) {
131
+ resolved = true;
132
+ clearTimeout(timeout);
133
+
134
+ // Return partial output if we have any
135
+ const line = output.trim();
136
+ resolve(line.length > 0 ? line : null);
137
+ }
138
+ });
139
+ });
140
+ }
141
+
142
+ // Process memory cache to prevent spawning too many 'top' processes
143
+ // Cache per PID with 3-second TTL
144
+ const processMemoryCache = new Map<number, { value: number | null; timestamp: number }>();
145
+ const PROCESS_MEMORY_CACHE_TTL = 3000; // 3 seconds
146
+
147
+ /**
148
+ * Batch get memory usage for multiple processes in one top call
149
+ * Much more efficient than calling getProcessMemory() multiple times
150
+ * Returns Map<pid, bytes> for all requested PIDs
151
+ */
152
+ export async function getBatchProcessMemory(pids: number[]): Promise<Map<number, number | null>> {
153
+ const result = new Map<number, number | null>();
154
+ const now = Date.now();
155
+
156
+ // Check cache and collect PIDs that need fetching
157
+ const pidsToFetch: number[] = [];
158
+ for (const pid of pids) {
159
+ const cached = processMemoryCache.get(pid);
160
+ if (cached && (now - cached.timestamp) < PROCESS_MEMORY_CACHE_TTL) {
161
+ result.set(pid, cached.value);
162
+ } else {
163
+ pidsToFetch.push(pid);
164
+ }
165
+ }
166
+
167
+ // If all PIDs were cached, return early
168
+ if (pidsToFetch.length === 0) {
169
+ return result;
170
+ }
171
+
68
172
  try {
69
- // Use top with -l 1 (one sample) to get memory stats
70
- // MEM column shows resident memory including GPU memory on macOS
71
- const output = await execCommand(`top -l 1 -pid ${pid} -stats mem`);
173
+ // Build top command with all PIDs: top -l 1 -pid X -pid Y -pid Z -stats pid,mem
174
+ const pidArgs = pidsToFetch.map(pid => `-pid ${pid}`).join(' ');
175
+ const output = await execCommand(`top -l 1 ${pidArgs} -stats pid,mem 2>/dev/null`);
72
176
 
73
- // Get the last non-empty line which contains the memory value
74
- const lines = output.split('\n').filter((line) => line.trim().length > 0);
75
- if (lines.length === 0) return null;
177
+ // Parse output: each line is "PID MEM" (e.g., "1438 299M")
178
+ const lines = output.split('\n');
179
+ for (const line of lines) {
180
+ const match = line.trim().match(/^(\d+)\s+([\d.]+)([KMGT])\s*$/);
181
+ if (!match) continue;
76
182
 
77
- const memStr = lines[lines.length - 1].trim();
183
+ const pid = parseInt(match[1], 10);
184
+ const value = parseFloat(match[2]);
185
+ const unit = match[3];
78
186
 
79
- // Parse memory string (e.g., "10.5G", "512M", "1024K", "10G")
80
- const match = memStr.match(/^([\d.]+)([KMGT])$/);
81
- if (!match) return null;
187
+ // Convert to bytes
188
+ const multipliers: { [key: string]: number } = {
189
+ K: 1024,
190
+ M: 1024 * 1024,
191
+ G: 1024 * 1024 * 1024,
192
+ T: 1024 * 1024 * 1024 * 1024,
193
+ };
82
194
 
83
- const value = parseFloat(match[1]);
84
- const unit = match[2];
195
+ const bytes = Math.round(value * multipliers[unit]);
85
196
 
86
- // Convert to bytes
87
- const multipliers: { [key: string]: number } = {
88
- K: 1024,
89
- M: 1024 * 1024,
90
- G: 1024 * 1024 * 1024,
91
- T: 1024 * 1024 * 1024 * 1024,
92
- };
197
+ // Cache and store result
198
+ processMemoryCache.set(pid, { value: bytes, timestamp: now });
199
+ result.set(pid, bytes);
200
+ }
93
201
 
94
- return Math.round(value * multipliers[unit]);
202
+ // For any PIDs that weren't in the output, cache null
203
+ for (const pid of pidsToFetch) {
204
+ if (!result.has(pid)) {
205
+ processMemoryCache.set(pid, { value: null, timestamp: now });
206
+ result.set(pid, null);
207
+ }
208
+ }
209
+
210
+ return result;
95
211
  } catch {
96
- return null;
212
+ // On error, cache null for all requested PIDs
213
+ for (const pid of pidsToFetch) {
214
+ processMemoryCache.set(pid, { value: null, timestamp: now });
215
+ result.set(pid, null);
216
+ }
217
+ return result;
97
218
  }
98
219
  }
220
+
221
+ /**
222
+ * Get memory usage for a single process in bytes
223
+ * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
224
+ * Returns null if process not found or error occurs
225
+ * Caches results for 3 seconds to prevent spawning too many top processes
226
+ *
227
+ * Note: For multiple PIDs, use getBatchProcessMemory() instead - much more efficient
228
+ */
229
+ export async function getProcessMemory(pid: number): Promise<number | null> {
230
+ const result = await getBatchProcessMemory([pid]);
231
+ return result.get(pid) ?? null;
232
+ }