@appkit/llamacpp-cli 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +52 -14
- package/dist/cli.js +8 -2
- package/dist/cli.js.map +1 -1
- package/dist/commands/logs.d.ts +4 -0
- package/dist/commands/logs.d.ts.map +1 -1
- package/dist/commands/logs.js +157 -24
- package/dist/commands/logs.js.map +1 -1
- package/dist/commands/ps.d.ts.map +1 -1
- package/dist/commands/ps.js +11 -1
- package/dist/commands/ps.js.map +1 -1
- package/dist/commands/start.d.ts +2 -0
- package/dist/commands/start.d.ts.map +1 -1
- package/dist/commands/start.js +4 -0
- package/dist/commands/start.js.map +1 -1
- package/dist/lib/config-generator.d.ts +2 -0
- package/dist/lib/config-generator.d.ts.map +1 -1
- package/dist/lib/config-generator.js +6 -0
- package/dist/lib/config-generator.js.map +1 -1
- package/dist/lib/launchctl-manager.d.ts.map +1 -1
- package/dist/lib/launchctl-manager.js +22 -13
- package/dist/lib/launchctl-manager.js.map +1 -1
- package/dist/types/server-config.d.ts +2 -0
- package/dist/types/server-config.d.ts.map +1 -1
- package/dist/types/server-config.js.map +1 -1
- package/dist/utils/log-parser.d.ts +37 -0
- package/dist/utils/log-parser.d.ts.map +1 -0
- package/dist/utils/log-parser.js +164 -0
- package/dist/utils/log-parser.js.map +1 -0
- package/dist/utils/process-utils.d.ts +6 -0
- package/dist/utils/process-utils.d.ts.map +1 -1
- package/dist/utils/process-utils.js +35 -0
- package/dist/utils/process-utils.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +8 -2
- package/src/commands/logs.ts +133 -26
- package/src/commands/ps.ts +13 -2
- package/src/commands/start.ts +6 -0
- package/src/lib/config-generator.ts +8 -0
- package/src/lib/launchctl-manager.ts +22 -13
- package/src/types/server-config.ts +2 -0
- package/src/utils/log-parser.ts +184 -0
- package/src/utils/process-utils.ts +38 -0
package/src/commands/start.ts
CHANGED
|
@@ -15,6 +15,8 @@ interface StartOptions {
|
|
|
15
15
|
threads?: number;
|
|
16
16
|
ctxSize?: number;
|
|
17
17
|
gpuLayers?: number;
|
|
18
|
+
logVerbosity?: number;
|
|
19
|
+
logTimestamps?: boolean;
|
|
18
20
|
}
|
|
19
21
|
|
|
20
22
|
export async function startCommand(model: string, options: StartOptions): Promise<void> {
|
|
@@ -67,6 +69,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
|
|
|
67
69
|
threads: options.threads,
|
|
68
70
|
ctxSize: options.ctxSize,
|
|
69
71
|
gpuLayers: options.gpuLayers,
|
|
72
|
+
logVerbosity: options.logVerbosity,
|
|
73
|
+
logTimestamps: options.logTimestamps,
|
|
70
74
|
};
|
|
71
75
|
|
|
72
76
|
const config = await configGenerator.generateConfig(
|
|
@@ -84,6 +88,8 @@ export async function startCommand(model: string, options: StartOptions): Promis
|
|
|
84
88
|
console.log(chalk.dim(`Threads: ${config.threads}`));
|
|
85
89
|
console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
|
|
86
90
|
console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
|
|
91
|
+
console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
|
|
92
|
+
console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
|
|
87
93
|
console.log();
|
|
88
94
|
|
|
89
95
|
// 7. Ensure log directory exists
|
|
@@ -11,6 +11,8 @@ export interface ServerOptions {
|
|
|
11
11
|
gpuLayers?: number;
|
|
12
12
|
embeddings?: boolean;
|
|
13
13
|
jinja?: boolean;
|
|
14
|
+
logVerbosity?: number;
|
|
15
|
+
logTimestamps?: boolean;
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
export interface SmartDefaults {
|
|
@@ -67,6 +69,8 @@ export class ConfigGenerator {
|
|
|
67
69
|
const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
|
|
68
70
|
const embeddings = options?.embeddings ?? true;
|
|
69
71
|
const jinja = options?.jinja ?? true;
|
|
72
|
+
const logVerbosity = options?.logVerbosity; // Default to undefined (log everything), filter at CLI level
|
|
73
|
+
const logTimestamps = options?.logTimestamps ?? true; // Enable timestamps by default
|
|
70
74
|
|
|
71
75
|
// Generate server ID
|
|
72
76
|
const id = sanitizeModelName(modelName);
|
|
@@ -88,6 +92,8 @@ export class ConfigGenerator {
|
|
|
88
92
|
gpuLayers,
|
|
89
93
|
embeddings,
|
|
90
94
|
jinja,
|
|
95
|
+
logVerbosity,
|
|
96
|
+
logTimestamps,
|
|
91
97
|
status: 'stopped',
|
|
92
98
|
createdAt: new Date().toISOString(),
|
|
93
99
|
plistPath,
|
|
@@ -111,6 +117,8 @@ export class ConfigGenerator {
|
|
|
111
117
|
gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
|
|
112
118
|
embeddings: options?.embeddings ?? true,
|
|
113
119
|
jinja: options?.jinja ?? true,
|
|
120
|
+
logVerbosity: options?.logVerbosity, // undefined = log everything
|
|
121
|
+
logTimestamps: options?.logTimestamps ?? true,
|
|
114
122
|
};
|
|
115
123
|
}
|
|
116
124
|
}
|
|
@@ -16,6 +16,27 @@ export class LaunchctlManager {
|
|
|
16
16
|
* Generate plist XML content for a server
|
|
17
17
|
*/
|
|
18
18
|
generatePlist(config: ServerConfig): string {
|
|
19
|
+
// Build program arguments array
|
|
20
|
+
const args = [
|
|
21
|
+
'/opt/homebrew/bin/llama-server',
|
|
22
|
+
'--model', config.modelPath,
|
|
23
|
+
'--port', config.port.toString(),
|
|
24
|
+
'--threads', config.threads.toString(),
|
|
25
|
+
'--ctx-size', config.ctxSize.toString(),
|
|
26
|
+
'--gpu-layers', config.gpuLayers.toString(),
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
// Add flags
|
|
30
|
+
if (config.embeddings) args.push('--embeddings');
|
|
31
|
+
if (config.jinja) args.push('--jinja');
|
|
32
|
+
if (config.logVerbosity !== undefined) {
|
|
33
|
+
args.push('--log-verbosity', config.logVerbosity.toString());
|
|
34
|
+
}
|
|
35
|
+
if (config.logTimestamps) args.push('--log-timestamps');
|
|
36
|
+
|
|
37
|
+
// Generate XML array elements
|
|
38
|
+
const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
|
|
39
|
+
|
|
19
40
|
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
20
41
|
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
|
|
21
42
|
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
@@ -26,19 +47,7 @@ export class LaunchctlManager {
|
|
|
26
47
|
|
|
27
48
|
<key>ProgramArguments</key>
|
|
28
49
|
<array>
|
|
29
|
-
|
|
30
|
-
<string>--model</string>
|
|
31
|
-
<string>${config.modelPath}</string>
|
|
32
|
-
<string>--port</string>
|
|
33
|
-
<string>${config.port}</string>
|
|
34
|
-
<string>--threads</string>
|
|
35
|
-
<string>${config.threads}</string>
|
|
36
|
-
<string>--ctx-size</string>
|
|
37
|
-
<string>${config.ctxSize}</string>
|
|
38
|
-
<string>--gpu-layers</string>
|
|
39
|
-
<string>${config.gpuLayers}</string>
|
|
40
|
-
<string>--embeddings</string>
|
|
41
|
-
<string>--jinja</string>
|
|
50
|
+
${argsXml}
|
|
42
51
|
</array>
|
|
43
52
|
|
|
44
53
|
<key>RunAtLoad</key>
|
|
@@ -12,6 +12,8 @@ export interface ServerConfig {
|
|
|
12
12
|
gpuLayers: number;
|
|
13
13
|
embeddings: boolean; // Always true
|
|
14
14
|
jinja: boolean; // Always true
|
|
15
|
+
logVerbosity?: number; // Log verbosity level (0=errors only, 1=warnings, 2=info/HTTP, 9=debug, undefined=all)
|
|
16
|
+
logTimestamps: boolean; // Add timestamps to log messages
|
|
15
17
|
|
|
16
18
|
// State tracking
|
|
17
19
|
status: ServerStatus;
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse and consolidate verbose llama-server logs into compact single-line format
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
interface CompactLogEntry {
|
|
6
|
+
timestamp: string;
|
|
7
|
+
method: string;
|
|
8
|
+
endpoint: string;
|
|
9
|
+
ip: string;
|
|
10
|
+
status: number;
|
|
11
|
+
userMessage: string;
|
|
12
|
+
tokensIn: number;
|
|
13
|
+
tokensOut: number;
|
|
14
|
+
responseTimeMs: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class LogParser {
|
|
18
|
+
private buffer: string[] = [];
|
|
19
|
+
private isBuffering = false;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Process log lines and output compact format
|
|
23
|
+
*/
|
|
24
|
+
processLine(line: string, callback: (compactLine: string) => void): void {
|
|
25
|
+
// Check if this is the start of an HTTP request log
|
|
26
|
+
if (line.includes('log_server_r: request: POST')) {
|
|
27
|
+
this.isBuffering = true;
|
|
28
|
+
this.buffer = [line];
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// If we're buffering, collect lines
|
|
33
|
+
if (this.isBuffering) {
|
|
34
|
+
this.buffer.push(line);
|
|
35
|
+
|
|
36
|
+
// Check if we have a complete request (found response line)
|
|
37
|
+
if (line.includes('log_server_r: response:')) {
|
|
38
|
+
const compactLine = this.consolidateRequest(this.buffer);
|
|
39
|
+
if (compactLine) {
|
|
40
|
+
callback(compactLine);
|
|
41
|
+
}
|
|
42
|
+
this.buffer = [];
|
|
43
|
+
this.isBuffering = false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Consolidate buffered request/response lines into single line
|
|
50
|
+
*/
|
|
51
|
+
private consolidateRequest(lines: string[]): string | null {
|
|
52
|
+
try {
|
|
53
|
+
// Parse first line: timestamp and request info
|
|
54
|
+
const firstLine = lines[0];
|
|
55
|
+
const timestamp = this.extractTimestamp(firstLine);
|
|
56
|
+
const requestMatch = firstLine.match(/request: (POST|GET|PUT|DELETE) (\/[^\s]+) ([^\s]+) (\d+)/);
|
|
57
|
+
if (!requestMatch) return null;
|
|
58
|
+
|
|
59
|
+
const [, method, endpoint, ip, status] = requestMatch;
|
|
60
|
+
|
|
61
|
+
// Parse request JSON (second line)
|
|
62
|
+
const requestLine = lines.find((l) => l.includes('log_server_r: request:') && l.includes('{'));
|
|
63
|
+
if (!requestLine) return null;
|
|
64
|
+
|
|
65
|
+
const requestJson = this.extractJson(requestLine);
|
|
66
|
+
if (!requestJson) return null;
|
|
67
|
+
|
|
68
|
+
const userMessage = this.extractUserMessage(requestJson);
|
|
69
|
+
|
|
70
|
+
// Parse response JSON (last line)
|
|
71
|
+
const responseLine = lines.find((l) => l.includes('log_server_r: response:'));
|
|
72
|
+
if (!responseLine) return null;
|
|
73
|
+
|
|
74
|
+
const responseJson = this.extractJson(responseLine);
|
|
75
|
+
if (!responseJson) return null;
|
|
76
|
+
|
|
77
|
+
const tokensIn = responseJson.usage?.prompt_tokens || 0;
|
|
78
|
+
const tokensOut = responseJson.usage?.completion_tokens || 0;
|
|
79
|
+
|
|
80
|
+
// Extract response time from verbose timings
|
|
81
|
+
const responseTimeMs = this.extractResponseTime(responseJson);
|
|
82
|
+
|
|
83
|
+
// Format compact line
|
|
84
|
+
return this.formatCompactLine({
|
|
85
|
+
timestamp,
|
|
86
|
+
method,
|
|
87
|
+
endpoint,
|
|
88
|
+
ip,
|
|
89
|
+
status: parseInt(status, 10),
|
|
90
|
+
userMessage,
|
|
91
|
+
tokensIn,
|
|
92
|
+
tokensOut,
|
|
93
|
+
responseTimeMs,
|
|
94
|
+
});
|
|
95
|
+
} catch (error) {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Extract timestamp from log line
|
|
102
|
+
*/
|
|
103
|
+
private extractTimestamp(line: string): string {
|
|
104
|
+
// Look for timestamp format like [2025-12-09 10:13:45]
|
|
105
|
+
const match = line.match(/\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\]/);
|
|
106
|
+
if (match) {
|
|
107
|
+
return match[1]; // Return as-is: 2025-12-09 10:13:45
|
|
108
|
+
}
|
|
109
|
+
// If no timestamp in logs, use current time in same format
|
|
110
|
+
const now = new Date();
|
|
111
|
+
return now.toISOString().substring(0, 19).replace('T', ' '); // 2025-12-09 10:13:45
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Extract JSON from log line
|
|
116
|
+
*/
|
|
117
|
+
private extractJson(line: string): any {
|
|
118
|
+
const jsonStart = line.indexOf('{');
|
|
119
|
+
if (jsonStart === -1) return null;
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
const jsonStr = line.substring(jsonStart);
|
|
123
|
+
return JSON.parse(jsonStr);
|
|
124
|
+
} catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Extract first user message from request JSON
|
|
131
|
+
*/
|
|
132
|
+
private extractUserMessage(requestJson: any): string {
|
|
133
|
+
const messages = requestJson.messages || [];
|
|
134
|
+
const userMsg = messages.find((m: any) => m.role === 'user');
|
|
135
|
+
if (!userMsg || !userMsg.content) return '';
|
|
136
|
+
|
|
137
|
+
// Truncate to first 50 characters
|
|
138
|
+
const content = userMsg.content.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
|
|
139
|
+
return content.length > 50 ? content.substring(0, 47) + '...' : content;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Extract response time from response JSON
|
|
144
|
+
*/
|
|
145
|
+
private extractResponseTime(responseJson: any): number {
|
|
146
|
+
// Check __verbose.timings first (has total time)
|
|
147
|
+
const verboseTimings = responseJson.__verbose?.timings;
|
|
148
|
+
if (verboseTimings) {
|
|
149
|
+
const promptMs = verboseTimings.prompt_ms || 0;
|
|
150
|
+
const predictedMs = verboseTimings.predicted_ms || 0;
|
|
151
|
+
return Math.round(promptMs + predictedMs);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Fallback to top-level timings
|
|
155
|
+
const timings = responseJson.timings;
|
|
156
|
+
if (timings) {
|
|
157
|
+
const promptMs = timings.prompt_ms || 0;
|
|
158
|
+
const predictedMs = timings.predicted_ms || 0;
|
|
159
|
+
return Math.round(promptMs + predictedMs);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return 0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Format compact log line
|
|
167
|
+
*/
|
|
168
|
+
private formatCompactLine(entry: CompactLogEntry): string {
|
|
169
|
+
return [
|
|
170
|
+
entry.timestamp,
|
|
171
|
+
entry.method,
|
|
172
|
+
entry.endpoint,
|
|
173
|
+
entry.ip,
|
|
174
|
+
entry.status,
|
|
175
|
+
`"${entry.userMessage}"`,
|
|
176
|
+
entry.tokensIn,
|
|
177
|
+
entry.tokensOut,
|
|
178
|
+
entry.responseTimeMs,
|
|
179
|
+
].join(' ');
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Export singleton instance
|
|
184
|
+
export const logParser = new LogParser();
|
|
@@ -58,3 +58,41 @@ export async function isPortInUse(port: number): Promise<boolean> {
|
|
|
58
58
|
return false;
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get memory usage for a process in bytes
|
|
64
|
+
* Uses 'top' on macOS which includes GPU/Metal memory (more accurate for llama-server)
|
|
65
|
+
* Returns null if process not found or error occurs
|
|
66
|
+
*/
|
|
67
|
+
export async function getProcessMemory(pid: number): Promise<number | null> {
|
|
68
|
+
try {
|
|
69
|
+
// Use top with -l 1 (one sample) to get memory stats
|
|
70
|
+
// MEM column shows resident memory including GPU memory on macOS
|
|
71
|
+
const output = await execCommand(`top -l 1 -pid ${pid} -stats mem`);
|
|
72
|
+
|
|
73
|
+
// Get the last non-empty line which contains the memory value
|
|
74
|
+
const lines = output.split('\n').filter((line) => line.trim().length > 0);
|
|
75
|
+
if (lines.length === 0) return null;
|
|
76
|
+
|
|
77
|
+
const memStr = lines[lines.length - 1].trim();
|
|
78
|
+
|
|
79
|
+
// Parse memory string (e.g., "10.5G", "512M", "1024K", "10G")
|
|
80
|
+
const match = memStr.match(/^([\d.]+)([KMGT])$/);
|
|
81
|
+
if (!match) return null;
|
|
82
|
+
|
|
83
|
+
const value = parseFloat(match[1]);
|
|
84
|
+
const unit = match[2];
|
|
85
|
+
|
|
86
|
+
// Convert to bytes
|
|
87
|
+
const multipliers: { [key: string]: number } = {
|
|
88
|
+
K: 1024,
|
|
89
|
+
M: 1024 * 1024,
|
|
90
|
+
G: 1024 * 1024 * 1024,
|
|
91
|
+
T: 1024 * 1024 * 1024 * 1024,
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
return Math.round(value * multipliers[unit]);
|
|
95
|
+
} catch {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
}
|