@appkit/llamacpp-cli 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/README.md +52 -14
  3. package/dist/cli.js +8 -2
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/list.js +1 -1
  6. package/dist/commands/list.js.map +1 -1
  7. package/dist/commands/logs.d.ts +4 -0
  8. package/dist/commands/logs.d.ts.map +1 -1
  9. package/dist/commands/logs.js +157 -24
  10. package/dist/commands/logs.js.map +1 -1
  11. package/dist/commands/ps.d.ts.map +1 -1
  12. package/dist/commands/ps.js +11 -1
  13. package/dist/commands/ps.js.map +1 -1
  14. package/dist/commands/pull.js +1 -1
  15. package/dist/commands/pull.js.map +1 -1
  16. package/dist/commands/run.js +1 -1
  17. package/dist/commands/run.js.map +1 -1
  18. package/dist/commands/start.d.ts +2 -0
  19. package/dist/commands/start.d.ts.map +1 -1
  20. package/dist/commands/start.js +7 -3
  21. package/dist/commands/start.js.map +1 -1
  22. package/dist/lib/config-generator.d.ts +2 -0
  23. package/dist/lib/config-generator.d.ts.map +1 -1
  24. package/dist/lib/config-generator.js +6 -0
  25. package/dist/lib/config-generator.js.map +1 -1
  26. package/dist/lib/launchctl-manager.d.ts.map +1 -1
  27. package/dist/lib/launchctl-manager.js +22 -13
  28. package/dist/lib/launchctl-manager.js.map +1 -1
  29. package/dist/types/server-config.d.ts +2 -0
  30. package/dist/types/server-config.d.ts.map +1 -1
  31. package/dist/types/server-config.js.map +1 -1
  32. package/dist/utils/log-parser.d.ts +37 -0
  33. package/dist/utils/log-parser.d.ts.map +1 -0
  34. package/dist/utils/log-parser.js +164 -0
  35. package/dist/utils/log-parser.js.map +1 -0
  36. package/dist/utils/process-utils.d.ts +6 -0
  37. package/dist/utils/process-utils.d.ts.map +1 -1
  38. package/dist/utils/process-utils.js +35 -0
  39. package/dist/utils/process-utils.js.map +1 -1
  40. package/package.json +1 -1
  41. package/src/cli.ts +8 -2
  42. package/src/commands/list.ts +1 -1
  43. package/src/commands/logs.ts +133 -26
  44. package/src/commands/ps.ts +13 -2
  45. package/src/commands/pull.ts +1 -1
  46. package/src/commands/run.ts +1 -1
  47. package/src/commands/start.ts +9 -3
  48. package/src/lib/config-generator.ts +8 -0
  49. package/src/lib/launchctl-manager.ts +22 -13
  50. package/src/types/server-config.ts +2 -0
  51. package/src/utils/log-parser.ts +184 -0
  52. package/src/utils/process-utils.ts +38 -0
@@ -1,13 +1,20 @@
1
1
  import chalk from 'chalk';
2
2
  import { spawn } from 'child_process';
3
+ import * as readline from 'readline';
4
+ import * as fs from 'fs';
3
5
  import { stateManager } from '../lib/state-manager';
4
6
  import { fileExists } from '../utils/file-utils';
5
7
  import { execCommand } from '../utils/process-utils';
8
+ import { logParser } from '../utils/log-parser';
6
9
 
7
10
  interface LogsOptions {
8
11
  follow?: boolean;
9
12
  lines?: number;
10
13
  errors?: boolean;
14
+ verbose?: boolean;
15
+ http?: boolean;
16
+ stdout?: boolean;
17
+ filter?: string;
11
18
  }
12
19
 
13
20
  export async function logsCommand(identifier: string, options: LogsOptions): Promise<void> {
@@ -17,9 +24,9 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
17
24
  throw new Error(`Server not found: ${identifier}\n\nUse: llamacpp ps`);
18
25
  }
19
26
 
20
- // Determine log file
21
- const logPath = options.errors ? server.stderrPath : server.stdoutPath;
22
- const logType = options.errors ? 'errors' : 'logs';
27
+ // Determine log file (default to stderr where verbose logs go)
28
+ const logPath = options.stdout ? server.stdoutPath : server.stderrPath;
29
+ const logType = options.stdout ? 'stdout' : 'stderr';
23
30
 
24
31
  // Check if log file exists
25
32
  if (!(await fileExists(logPath))) {
@@ -28,34 +35,134 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
28
35
  return;
29
36
  }
30
37
 
31
- console.log(chalk.blue(`📋 ${options.errors ? 'Errors' : 'Logs'} for ${server.modelName}`));
38
+ // Determine filter pattern and mode
39
+ let filterPattern: string | null = null;
40
+ let filterDesc = '';
41
+ let useCompactMode = false;
42
+
43
+ if (options.verbose) {
44
+ // Show everything (no filter)
45
+ filterDesc = ' (all messages)';
46
+ } else if (options.errors) {
47
+ // Show only errors
48
+ filterPattern = 'error|Error|ERROR|failed|Failed|FAILED';
49
+ filterDesc = ' (errors only)';
50
+ } else if (options.http) {
51
+ // Full HTTP JSON logs
52
+ filterPattern = 'log_server_r';
53
+ filterDesc = ' (HTTP JSON)';
54
+ } else if (options.filter) {
55
+ // Custom filter
56
+ filterPattern = options.filter;
57
+ filterDesc = ` (filter: ${options.filter})`;
58
+ } else {
59
+ // Default: Compact one-liner format
60
+ filterPattern = 'log_server_r';
61
+ filterDesc = ' (compact)';
62
+ useCompactMode = true;
63
+ }
64
+
65
+ console.log(chalk.blue(`📋 Logs for ${server.modelName} (${logType}${filterDesc})`));
32
66
  console.log(chalk.dim(` ${logPath}\n`));
33
67
 
34
68
  if (options.follow) {
35
- // Follow logs in real-time
36
- const tail = spawn('tail', ['-f', logPath], {
37
- stdio: 'inherit',
38
- });
39
-
40
- // Handle Ctrl+C gracefully
41
- process.on('SIGINT', () => {
42
- tail.kill();
43
- console.log();
44
- process.exit(0);
45
- });
46
-
47
- // Wait for tail to exit
48
- tail.on('exit', () => {
49
- process.exit(0);
50
- });
69
+ // Follow logs in real-time with optional filtering
70
+ if (useCompactMode) {
71
+ // Compact mode with follow: parse lines in real-time
72
+ const tailProcess = spawn('tail', ['-f', logPath]);
73
+ const rl = readline.createInterface({
74
+ input: tailProcess.stdout,
75
+ crlfDelay: Infinity,
76
+ });
77
+
78
+ rl.on('line', (line) => {
79
+ if (line.includes('log_server_r')) {
80
+ logParser.processLine(line, (compactLine) => {
81
+ console.log(compactLine);
82
+ });
83
+ }
84
+ });
85
+
86
+ // Handle Ctrl+C gracefully
87
+ process.on('SIGINT', () => {
88
+ tailProcess.kill();
89
+ rl.close();
90
+ console.log();
91
+ process.exit(0);
92
+ });
93
+
94
+ tailProcess.on('exit', () => {
95
+ process.exit(0);
96
+ });
97
+ } else if (filterPattern) {
98
+ // Use tail piped to grep for filtering
99
+ const grepProcess = spawn('sh', ['-c', `tail -f "${logPath}" | grep --line-buffered -E "${filterPattern}"`], {
100
+ stdio: 'inherit',
101
+ });
102
+
103
+ // Handle Ctrl+C gracefully
104
+ process.on('SIGINT', () => {
105
+ grepProcess.kill();
106
+ console.log();
107
+ process.exit(0);
108
+ });
109
+
110
+ grepProcess.on('exit', () => {
111
+ process.exit(0);
112
+ });
113
+ } else {
114
+ // No filter, just tail
115
+ const tail = spawn('tail', ['-f', logPath], {
116
+ stdio: 'inherit',
117
+ });
118
+
119
+ process.on('SIGINT', () => {
120
+ tail.kill();
121
+ console.log();
122
+ process.exit(0);
123
+ });
124
+
125
+ tail.on('exit', () => {
126
+ process.exit(0);
127
+ });
128
+ }
51
129
  } else {
52
- // Show last N lines
130
+ // Show last N lines with optional filtering
53
131
  const lines = options.lines || 50;
54
- try {
55
- const output = await execCommand(`tail -n ${lines} "${logPath}"`);
56
- console.log(output);
57
- } catch (error) {
58
- throw new Error(`Failed to read logs: ${(error as Error).message}`);
132
+
133
+ if (useCompactMode) {
134
+ // Compact mode: read file and parse
135
+ try {
136
+ const command = `tail -n ${lines * 3} "${logPath}" | grep -E "log_server_r"`;
137
+ const output = await execCommand(command);
138
+ const logLines = output.split('\n').filter((l) => l.trim());
139
+
140
+ for (const line of logLines) {
141
+ logParser.processLine(line, (compactLine) => {
142
+ console.log(compactLine);
143
+ });
144
+ }
145
+ } catch (error) {
146
+ throw new Error(`Failed to read logs: ${(error as Error).message}`);
147
+ }
148
+ } else {
149
+ // Regular filtering
150
+ try {
151
+ let command: string;
152
+
153
+ if (filterPattern) {
154
+ // Use tail piped to grep
155
+ command = `tail -n ${lines} "${logPath}" | grep -E "${filterPattern}"`;
156
+ } else {
157
+ // No filter
158
+ command = `tail -n ${lines} "${logPath}"`;
159
+ }
160
+
161
+ const output = await execCommand(command);
162
+ console.log(output);
163
+ } catch (error) {
164
+ throw new Error(`Failed to read logs: ${(error as Error).message}`);
165
+ }
59
166
  }
60
167
  }
61
168
  }
@@ -2,7 +2,8 @@ import chalk from 'chalk';
2
2
  import Table from 'cli-table3';
3
3
  import { stateManager } from '../lib/state-manager';
4
4
  import { statusChecker } from '../lib/status-checker';
5
- import { formatUptime } from '../utils/format-utils';
5
+ import { formatUptime, formatBytes } from '../utils/format-utils';
6
+ import { getProcessMemory } from '../utils/process-utils';
6
7
 
7
8
  export async function psCommand(): Promise<void> {
8
9
  const servers = await stateManager.getAllServers();
@@ -18,7 +19,7 @@ export async function psCommand(): Promise<void> {
18
19
  const updated = await statusChecker.updateAllServerStatuses();
19
20
 
20
21
  const table = new Table({
21
- head: ['SERVER ID', 'MODEL', 'PORT', 'STATUS', 'PID', 'UPTIME'],
22
+ head: ['SERVER ID', 'MODEL', 'PORT', 'STATUS', 'PID', 'MEMORY', 'UPTIME'],
22
23
  });
23
24
 
24
25
  let runningCount = 0;
@@ -51,12 +52,22 @@ export async function psCommand(): Promise<void> {
51
52
  ? formatUptime(server.lastStarted)
52
53
  : '-';
53
54
 
55
+ // Get memory usage for running servers
56
+ let memoryText = '-';
57
+ if (server.status === 'running' && server.pid) {
58
+ const memoryBytes = await getProcessMemory(server.pid);
59
+ if (memoryBytes !== null) {
60
+ memoryText = formatBytes(memoryBytes);
61
+ }
62
+ }
63
+
54
64
  table.push([
55
65
  server.id,
56
66
  server.modelName,
57
67
  server.port.toString(),
58
68
  statusColor(statusText),
59
69
  server.pid?.toString() || '-',
70
+ memoryText,
60
71
  uptime,
61
72
  ]);
62
73
  }
@@ -30,7 +30,7 @@ export async function pullCommand(identifier: string, options: PullOptions): Pro
30
30
  const modelPath = await modelDownloader.downloadModel(parsed.repo, filename);
31
31
 
32
32
  console.log();
33
- console.log(chalk.dim(`Start server: llamacpp start ${filename}`));
33
+ console.log(chalk.dim(`Start server: llamacpp server start ${filename}`));
34
34
  } catch (error) {
35
35
  if ((error as Error).message.includes('interrupted')) {
36
36
  console.log(chalk.dim('\nDownload was interrupted. Run the same command again to retry.'));
@@ -49,7 +49,7 @@ export async function runCommand(modelIdentifier: string): Promise<void> {
49
49
  // 2. Verify server is running
50
50
  const status = await statusChecker.checkServer(server);
51
51
  if (!status.isRunning) {
52
- throw new Error(`Server exists but is not running. Start it with: llamacpp start ${server.id}`);
52
+ throw new Error(`Server exists but is not running. Start it with: llamacpp server start ${server.id}`);
53
53
  }
54
54
 
55
55
  // 3. Start REPL
@@ -15,6 +15,8 @@ interface StartOptions {
15
15
  threads?: number;
16
16
  ctxSize?: number;
17
17
  gpuLayers?: number;
18
+ logVerbosity?: number;
19
+ logTimestamps?: boolean;
18
20
  }
19
21
 
20
22
  export async function startCommand(model: string, options: StartOptions): Promise<void> {
@@ -67,6 +69,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
67
69
  threads: options.threads,
68
70
  ctxSize: options.ctxSize,
69
71
  gpuLayers: options.gpuLayers,
72
+ logVerbosity: options.logVerbosity,
73
+ logTimestamps: options.logTimestamps,
70
74
  };
71
75
 
72
76
  const config = await configGenerator.generateConfig(
@@ -84,6 +88,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
84
88
  console.log(chalk.dim(`Threads: ${config.threads}`));
85
89
  console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
86
90
  console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
91
+ console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
92
+ console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
87
93
  console.log();
88
94
 
89
95
  // 7. Ensure log directory exists
@@ -121,7 +127,7 @@ export async function startCommand(model: string, options: StartOptions): Promis
121
127
  await launchctlManager.stopService(config.label);
122
128
  await launchctlManager.unloadService(config.plistPath);
123
129
  await launchctlManager.deletePlist(config.plistPath);
124
- throw new Error('Server failed to start. Check logs with: llamacpp logs --errors');
130
+ throw new Error('Server failed to start. Check logs with: llamacpp server logs --errors');
125
131
  }
126
132
 
127
133
  // 12. Update config with running status
@@ -135,6 +141,6 @@ export async function startCommand(model: string, options: StartOptions): Promis
135
141
  console.log(chalk.green('✅ Server started successfully!'));
136
142
  console.log();
137
143
  console.log(chalk.dim(`Connect: http://localhost:${config.port}`));
138
- console.log(chalk.dim(`View logs: llamacpp logs ${config.id}`));
139
- console.log(chalk.dim(`Stop: llamacpp stop ${config.id}`));
144
+ console.log(chalk.dim(`View logs: llamacpp server logs ${config.id}`));
145
+ console.log(chalk.dim(`Stop: llamacpp server stop ${config.id}`));
140
146
  }
@@ -11,6 +11,8 @@ export interface ServerOptions {
11
11
  gpuLayers?: number;
12
12
  embeddings?: boolean;
13
13
  jinja?: boolean;
14
+ logVerbosity?: number;
15
+ logTimestamps?: boolean;
14
16
  }
15
17
 
16
18
  export interface SmartDefaults {
@@ -67,6 +69,8 @@ export class ConfigGenerator {
67
69
  const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
68
70
  const embeddings = options?.embeddings ?? true;
69
71
  const jinja = options?.jinja ?? true;
72
+ const logVerbosity = options?.logVerbosity; // Default to undefined (log everything), filter at CLI level
73
+ const logTimestamps = options?.logTimestamps ?? true; // Enable timestamps by default
70
74
 
71
75
  // Generate server ID
72
76
  const id = sanitizeModelName(modelName);
@@ -88,6 +92,8 @@ export class ConfigGenerator {
88
92
  gpuLayers,
89
93
  embeddings,
90
94
  jinja,
95
+ logVerbosity,
96
+ logTimestamps,
91
97
  status: 'stopped',
92
98
  createdAt: new Date().toISOString(),
93
99
  plistPath,
@@ -111,6 +117,8 @@ export class ConfigGenerator {
111
117
  gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
112
118
  embeddings: options?.embeddings ?? true,
113
119
  jinja: options?.jinja ?? true,
120
+ logVerbosity: options?.logVerbosity, // undefined = log everything
121
+ logTimestamps: options?.logTimestamps ?? true,
114
122
  };
115
123
  }
116
124
  }
@@ -16,6 +16,27 @@ export class LaunchctlManager {
16
16
  * Generate plist XML content for a server
17
17
  */
18
18
  generatePlist(config: ServerConfig): string {
19
+ // Build program arguments array
20
+ const args = [
21
+ '/opt/homebrew/bin/llama-server',
22
+ '--model', config.modelPath,
23
+ '--port', config.port.toString(),
24
+ '--threads', config.threads.toString(),
25
+ '--ctx-size', config.ctxSize.toString(),
26
+ '--gpu-layers', config.gpuLayers.toString(),
27
+ ];
28
+
29
+ // Add flags
30
+ if (config.embeddings) args.push('--embeddings');
31
+ if (config.jinja) args.push('--jinja');
32
+ if (config.logVerbosity !== undefined) {
33
+ args.push('--log-verbosity', config.logVerbosity.toString());
34
+ }
35
+ if (config.logTimestamps) args.push('--log-timestamps');
36
+
37
+ // Generate XML array elements
38
+ const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
39
+
19
40
  return `<?xml version="1.0" encoding="UTF-8"?>
20
41
  <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
21
42
  "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
@@ -26,19 +47,7 @@ export class LaunchctlManager {
26
47
 
27
48
  <key>ProgramArguments</key>
28
49
  <array>
29
- <string>/opt/homebrew/bin/llama-server</string>
30
- <string>--model</string>
31
- <string>${config.modelPath}</string>
32
- <string>--port</string>
33
- <string>${config.port}</string>
34
- <string>--threads</string>
35
- <string>${config.threads}</string>
36
- <string>--ctx-size</string>
37
- <string>${config.ctxSize}</string>
38
- <string>--gpu-layers</string>
39
- <string>${config.gpuLayers}</string>
40
- <string>--embeddings</string>
41
- <string>--jinja</string>
50
+ ${argsXml}
42
51
  </array>
43
52
 
44
53
  <key>RunAtLoad</key>
@@ -12,6 +12,8 @@ export interface ServerConfig {
12
12
  gpuLayers: number;
13
13
  embeddings: boolean; // Always true
14
14
  jinja: boolean; // Always true
15
+ logVerbosity?: number; // Log verbosity level (0=errors only, 1=warnings, 2=info/HTTP, 9=debug, undefined=all)
16
+ logTimestamps: boolean; // Add timestamps to log messages
15
17
 
16
18
  // State tracking
17
19
  status: ServerStatus;
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Parse and consolidate verbose llama-server logs into compact single-line format
3
+ */
4
+
5
+ interface CompactLogEntry {
6
+ timestamp: string;
7
+ method: string;
8
+ endpoint: string;
9
+ ip: string;
10
+ status: number;
11
+ userMessage: string;
12
+ tokensIn: number;
13
+ tokensOut: number;
14
+ responseTimeMs: number;
15
+ }
16
+
17
+ export class LogParser {
18
+ private buffer: string[] = [];
19
+ private isBuffering = false;
20
+
21
+ /**
22
+ * Process log lines and output compact format
23
+ */
24
+ processLine(line: string, callback: (compactLine: string) => void): void {
25
+ // Check if this is the start of an HTTP request log
26
+ if (line.includes('log_server_r: request: POST')) {
27
+ this.isBuffering = true;
28
+ this.buffer = [line];
29
+ return;
30
+ }
31
+
32
+ // If we're buffering, collect lines
33
+ if (this.isBuffering) {
34
+ this.buffer.push(line);
35
+
36
+ // Check if we have a complete request (found response line)
37
+ if (line.includes('log_server_r: response:')) {
38
+ const compactLine = this.consolidateRequest(this.buffer);
39
+ if (compactLine) {
40
+ callback(compactLine);
41
+ }
42
+ this.buffer = [];
43
+ this.isBuffering = false;
44
+ }
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Consolidate buffered request/response lines into single line
50
+ */
51
+ private consolidateRequest(lines: string[]): string | null {
52
+ try {
53
+ // Parse first line: timestamp and request info
54
+ const firstLine = lines[0];
55
+ const timestamp = this.extractTimestamp(firstLine);
56
+ const requestMatch = firstLine.match(/request: (POST|GET|PUT|DELETE) (\/[^\s]+) ([^\s]+) (\d+)/);
57
+ if (!requestMatch) return null;
58
+
59
+ const [, method, endpoint, ip, status] = requestMatch;
60
+
61
+ // Parse request JSON (second line)
62
+ const requestLine = lines.find((l) => l.includes('log_server_r: request:') && l.includes('{'));
63
+ if (!requestLine) return null;
64
+
65
+ const requestJson = this.extractJson(requestLine);
66
+ if (!requestJson) return null;
67
+
68
+ const userMessage = this.extractUserMessage(requestJson);
69
+
70
+ // Parse response JSON (last line)
71
+ const responseLine = lines.find((l) => l.includes('log_server_r: response:'));
72
+ if (!responseLine) return null;
73
+
74
+ const responseJson = this.extractJson(responseLine);
75
+ if (!responseJson) return null;
76
+
77
+ const tokensIn = responseJson.usage?.prompt_tokens || 0;
78
+ const tokensOut = responseJson.usage?.completion_tokens || 0;
79
+
80
+ // Extract response time from verbose timings
81
+ const responseTimeMs = this.extractResponseTime(responseJson);
82
+
83
+ // Format compact line
84
+ return this.formatCompactLine({
85
+ timestamp,
86
+ method,
87
+ endpoint,
88
+ ip,
89
+ status: parseInt(status, 10),
90
+ userMessage,
91
+ tokensIn,
92
+ tokensOut,
93
+ responseTimeMs,
94
+ });
95
+ } catch (error) {
96
+ return null;
97
+ }
98
+ }
99
+
100
+ /**
101
+ * Extract timestamp from log line
102
+ */
103
+ private extractTimestamp(line: string): string {
104
+ // Look for timestamp format like [2025-12-09 10:13:45]
105
+ const match = line.match(/\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]/);
106
+ if (match) {
107
+ return match[1]; // Return as-is: 2025-12-09 10:13:45
108
+ }
109
+ // If no timestamp in logs, use current time in same format
110
+ const now = new Date();
111
+ return now.toISOString().substring(0, 19).replace('T', ' '); // 2025-12-09 10:13:45
112
+ }
113
+
114
+ /**
115
+ * Extract JSON from log line
116
+ */
117
+ private extractJson(line: string): any {
118
+ const jsonStart = line.indexOf('{');
119
+ if (jsonStart === -1) return null;
120
+
121
+ try {
122
+ const jsonStr = line.substring(jsonStart);
123
+ return JSON.parse(jsonStr);
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Extract first user message from request JSON
131
+ */
132
+ private extractUserMessage(requestJson: any): string {
133
+ const messages = requestJson.messages || [];
134
+ const userMsg = messages.find((m: any) => m.role === 'user');
135
+ if (!userMsg || !userMsg.content) return '';
136
+
137
+ // Truncate to first 50 characters
138
+ const content = userMsg.content.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
139
+ return content.length > 50 ? content.substring(0, 47) + '...' : content;
140
+ }
141
+
142
+ /**
143
+ * Extract response time from response JSON
144
+ */
145
+ private extractResponseTime(responseJson: any): number {
146
+ // Check __verbose.timings first (has total time)
147
+ const verboseTimings = responseJson.__verbose?.timings;
148
+ if (verboseTimings) {
149
+ const promptMs = verboseTimings.prompt_ms || 0;
150
+ const predictedMs = verboseTimings.predicted_ms || 0;
151
+ return Math.round(promptMs + predictedMs);
152
+ }
153
+
154
+ // Fallback to top-level timings
155
+ const timings = responseJson.timings;
156
+ if (timings) {
157
+ const promptMs = timings.prompt_ms || 0;
158
+ const predictedMs = timings.predicted_ms || 0;
159
+ return Math.round(promptMs + predictedMs);
160
+ }
161
+
162
+ return 0;
163
+ }
164
+
165
+ /**
166
+ * Format compact log line
167
+ */
168
+ private formatCompactLine(entry: CompactLogEntry): string {
169
+ return [
170
+ entry.timestamp,
171
+ entry.method,
172
+ entry.endpoint,
173
+ entry.ip,
174
+ entry.status,
175
+ `"${entry.userMessage}"`,
176
+ entry.tokensIn,
177
+ entry.tokensOut,
178
+ entry.responseTimeMs,
179
+ ].join(' ');
180
+ }
181
+ }
182
+
183
+ // Export singleton instance
184
+ export const logParser = new LogParser();
@@ -58,3 +58,41 @@ export async function isPortInUse(port: number): Promise<boolean> {
58
58
  return false;
59
59
  }
60
60
  }
61
+
62
+ /**
63
+ * Get memory usage for a process in bytes
64
+ * Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
65
+ * Returns null if process not found or error occurs
66
+ */
67
+ export async function getProcessMemory(pid: number): Promise<number | null> {
68
+ try {
69
+ // Use top with -l 1 (one sample) to get memory stats
70
+ // MEM column shows resident memory including GPU memory on macOS
71
+ const output = await execCommand(`top -l 1 -pid ${pid} -stats mem`);
72
+
73
+ // Get the last non-empty line which contains the memory value
74
+ const lines = output.split('\n').filter((line) => line.trim().length > 0);
75
+ if (lines.length === 0) return null;
76
+
77
+ const memStr = lines[lines.length - 1].trim();
78
+
79
+ // Parse memory string (e.g., "10.5G", "512M", "1024K", "10G")
80
+ const match = memStr.match(/^([\d.]+)([KMGT])$/);
81
+ if (!match) return null;
82
+
83
+ const value = parseFloat(match[1]);
84
+ const unit = match[2];
85
+
86
+ // Convert to bytes
87
+ const multipliers: { [key: string]: number } = {
88
+ K: 1024,
89
+ M: 1024 * 1024,
90
+ G: 1024 * 1024 * 1024,
91
+ T: 1024 * 1024 * 1024 * 1024,
92
+ };
93
+
94
+ return Math.round(value * multipliers[unit]);
95
+ } catch {
96
+ return null;
97
+ }
98
+ }