@appkit/llamacpp-cli 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +95 -16
- package/dist/cli.js +38 -8
- package/dist/cli.js.map +1 -1
- package/dist/commands/create.d.ts +10 -0
- package/dist/commands/create.d.ts.map +1 -0
- package/dist/commands/create.js +152 -0
- package/dist/commands/create.js.map +1 -0
- package/dist/commands/list.js +1 -1
- package/dist/commands/list.js.map +1 -1
- package/dist/commands/logs.d.ts.map +1 -1
- package/dist/commands/logs.js +17 -3
- package/dist/commands/logs.js.map +1 -1
- package/dist/commands/ps.js +1 -1
- package/dist/commands/ps.js.map +1 -1
- package/dist/commands/pull.js +1 -1
- package/dist/commands/pull.js.map +1 -1
- package/dist/commands/run.d.ts +5 -1
- package/dist/commands/run.d.ts.map +1 -1
- package/dist/commands/run.js +23 -4
- package/dist/commands/run.js.map +1 -1
- package/dist/commands/server-show.d.ts +2 -0
- package/dist/commands/server-show.d.ts.map +1 -0
- package/dist/commands/server-show.js +120 -0
- package/dist/commands/server-show.js.map +1 -0
- package/dist/commands/start.d.ts +1 -10
- package/dist/commands/start.d.ts.map +1 -1
- package/dist/commands/start.js +31 -121
- package/dist/commands/start.js.map +1 -1
- package/dist/lib/config-generator.d.ts +1 -2
- package/dist/lib/config-generator.d.ts.map +1 -1
- package/dist/lib/config-generator.js +3 -6
- package/dist/lib/config-generator.js.map +1 -1
- package/dist/lib/launchctl-manager.d.ts.map +1 -1
- package/dist/lib/launchctl-manager.js +3 -4
- package/dist/lib/launchctl-manager.js.map +1 -1
- package/dist/types/server-config.d.ts +1 -2
- package/dist/types/server-config.d.ts.map +1 -1
- package/dist/types/server-config.js.map +1 -1
- package/dist/utils/log-parser.d.ts +10 -0
- package/dist/utils/log-parser.d.ts.map +1 -1
- package/dist/utils/log-parser.js +51 -3
- package/dist/utils/log-parser.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +38 -8
- package/src/commands/create.ts +143 -0
- package/src/commands/list.ts +1 -1
- package/src/commands/logs.ts +20 -3
- package/src/commands/ps.ts +1 -1
- package/src/commands/pull.ts +1 -1
- package/src/commands/run.ts +28 -4
- package/src/commands/server-show.ts +125 -0
- package/src/commands/start.ts +39 -112
- package/src/lib/config-generator.ts +4 -8
- package/src/lib/launchctl-manager.ts +4 -3
- package/src/types/server-config.ts +1 -2
- package/src/utils/log-parser.ts +54 -3
package/src/commands/start.ts
CHANGED
|
@@ -1,146 +1,73 @@
|
|
|
1
1
|
import chalk from 'chalk';
|
|
2
|
-
import * as path from 'path';
|
|
3
|
-
import { modelScanner } from '../lib/model-scanner';
|
|
4
2
|
import { stateManager } from '../lib/state-manager';
|
|
5
|
-
import { configGenerator, ServerOptions } from '../lib/config-generator';
|
|
6
|
-
import { portManager } from '../lib/port-manager';
|
|
7
3
|
import { launchctlManager } from '../lib/launchctl-manager';
|
|
8
4
|
import { statusChecker } from '../lib/status-checker';
|
|
9
|
-
import { commandExists } from '../utils/process-utils';
|
|
10
|
-
import { formatBytes } from '../utils/format-utils';
|
|
11
|
-
import { ensureDir } from '../utils/file-utils';
|
|
12
5
|
|
|
13
|
-
|
|
14
|
-
port?: number;
|
|
15
|
-
threads?: number;
|
|
16
|
-
ctxSize?: number;
|
|
17
|
-
gpuLayers?: number;
|
|
18
|
-
logVerbosity?: number;
|
|
19
|
-
logTimestamps?: boolean;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export async function startCommand(model: string, options: StartOptions): Promise<void> {
|
|
6
|
+
export async function startCommand(identifier: string): Promise<void> {
|
|
23
7
|
// Initialize state manager
|
|
24
8
|
await stateManager.initialize();
|
|
25
9
|
|
|
26
|
-
// 1.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
throw new Error(`Model not found: ${model}\n\nRun: llamacpp list`);
|
|
10
|
+
// 1. Find server by identifier
|
|
11
|
+
const server = await stateManager.findServer(identifier);
|
|
12
|
+
if (!server) {
|
|
13
|
+
throw new Error(
|
|
14
|
+
`Server not found: ${identifier}\n\n` +
|
|
15
|
+
`Use: llamacpp ps\n` +
|
|
16
|
+
`Or create a new server: llamacpp server create <model>`
|
|
17
|
+
);
|
|
35
18
|
}
|
|
36
19
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
20
|
+
// 2. Check if already running
|
|
21
|
+
if (server.status === 'running') {
|
|
22
|
+
console.log(
|
|
23
|
+
chalk.yellow(
|
|
24
|
+
`⚠️ Server ${server.modelName} is already running on port ${server.port}`
|
|
25
|
+
)
|
|
26
|
+
);
|
|
27
|
+
return;
|
|
43
28
|
}
|
|
44
29
|
|
|
45
|
-
|
|
46
|
-
const modelSize = await modelScanner.getModelSize(modelName);
|
|
47
|
-
if (!modelSize) {
|
|
48
|
-
throw new Error(`Failed to read model file: ${modelPath}`);
|
|
49
|
-
}
|
|
30
|
+
console.log(chalk.blue(`▶️ Starting ${server.modelName} (port ${server.port})...`));
|
|
50
31
|
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if (!available) {
|
|
57
|
-
throw new Error(`Port ${options.port} is already in use`);
|
|
58
|
-
}
|
|
59
|
-
port = options.port;
|
|
60
|
-
} else {
|
|
61
|
-
port = await portManager.findAvailablePort();
|
|
32
|
+
// 3. Ensure plist exists (recreate if missing)
|
|
33
|
+
try {
|
|
34
|
+
await launchctlManager.createPlist(server);
|
|
35
|
+
} catch (error) {
|
|
36
|
+
// May already exist, that's okay
|
|
62
37
|
}
|
|
63
38
|
|
|
64
|
-
//
|
|
65
|
-
console.log(chalk.blue(`🚀 Starting server for ${modelName}\n`));
|
|
66
|
-
|
|
67
|
-
const serverOptions: ServerOptions = {
|
|
68
|
-
port: options.port,
|
|
69
|
-
threads: options.threads,
|
|
70
|
-
ctxSize: options.ctxSize,
|
|
71
|
-
gpuLayers: options.gpuLayers,
|
|
72
|
-
logVerbosity: options.logVerbosity,
|
|
73
|
-
logTimestamps: options.logTimestamps,
|
|
74
|
-
};
|
|
75
|
-
|
|
76
|
-
const config = await configGenerator.generateConfig(
|
|
77
|
-
modelPath,
|
|
78
|
-
modelName,
|
|
79
|
-
modelSize,
|
|
80
|
-
port,
|
|
81
|
-
serverOptions
|
|
82
|
-
);
|
|
83
|
-
|
|
84
|
-
// Display configuration
|
|
85
|
-
console.log(chalk.dim(`Model: ${modelPath}`));
|
|
86
|
-
console.log(chalk.dim(`Size: ${formatBytes(modelSize)}`));
|
|
87
|
-
console.log(chalk.dim(`Port: ${config.port}${options.port ? '' : ' (auto-assigned)'}`));
|
|
88
|
-
console.log(chalk.dim(`Threads: ${config.threads}`));
|
|
89
|
-
console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
|
|
90
|
-
console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
|
|
91
|
-
console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
|
|
92
|
-
console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
|
|
93
|
-
console.log();
|
|
94
|
-
|
|
95
|
-
// 7. Ensure log directory exists
|
|
96
|
-
await ensureDir(path.dirname(config.stdoutPath));
|
|
97
|
-
|
|
98
|
-
// 8. Create plist file
|
|
99
|
-
console.log(chalk.dim('Creating launchctl service...'));
|
|
100
|
-
await launchctlManager.createPlist(config);
|
|
101
|
-
|
|
102
|
-
// 9. Load service
|
|
39
|
+
// 4. Load service if needed
|
|
103
40
|
try {
|
|
104
|
-
await launchctlManager.loadService(
|
|
41
|
+
await launchctlManager.loadService(server.plistPath);
|
|
105
42
|
} catch (error) {
|
|
106
|
-
//
|
|
107
|
-
await launchctlManager.deletePlist(config.plistPath);
|
|
108
|
-
throw new Error(`Failed to load service: ${(error as Error).message}`);
|
|
43
|
+
// May already be loaded, that's okay
|
|
109
44
|
}
|
|
110
45
|
|
|
111
|
-
//
|
|
46
|
+
// 5. Start the service
|
|
112
47
|
try {
|
|
113
|
-
await launchctlManager.startService(
|
|
48
|
+
await launchctlManager.startService(server.label);
|
|
114
49
|
} catch (error) {
|
|
115
|
-
// Clean up if start fails
|
|
116
|
-
await launchctlManager.unloadService(config.plistPath);
|
|
117
|
-
await launchctlManager.deletePlist(config.plistPath);
|
|
118
50
|
throw new Error(`Failed to start service: ${(error as Error).message}`);
|
|
119
51
|
}
|
|
120
52
|
|
|
121
|
-
//
|
|
53
|
+
// 6. Wait for startup
|
|
122
54
|
console.log(chalk.dim('Waiting for server to start...'));
|
|
123
|
-
const started = await launchctlManager.waitForServiceStart(
|
|
55
|
+
const started = await launchctlManager.waitForServiceStart(server.label, 5000);
|
|
124
56
|
|
|
125
57
|
if (!started) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
await launchctlManager.deletePlist(config.plistPath);
|
|
130
|
-
throw new Error('Server failed to start. Check logs with: llamacpp logs --errors');
|
|
58
|
+
throw new Error(
|
|
59
|
+
`Server failed to start. Check logs with: llamacpp server logs ${server.id}`
|
|
60
|
+
);
|
|
131
61
|
}
|
|
132
62
|
|
|
133
|
-
//
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
// 13. Save server config
|
|
137
|
-
await stateManager.saveServerConfig(updatedConfig);
|
|
63
|
+
// 7. Update server status
|
|
64
|
+
await statusChecker.updateServerStatus(server);
|
|
138
65
|
|
|
139
|
-
//
|
|
66
|
+
// 8. Display success
|
|
140
67
|
console.log();
|
|
141
68
|
console.log(chalk.green('✅ Server started successfully!'));
|
|
142
69
|
console.log();
|
|
143
|
-
console.log(chalk.dim(`Connect: http://localhost:${
|
|
144
|
-
console.log(chalk.dim(`View logs: llamacpp logs ${
|
|
145
|
-
console.log(chalk.dim(`Stop: llamacpp stop ${
|
|
70
|
+
console.log(chalk.dim(`Connect: http://localhost:${server.port}`));
|
|
71
|
+
console.log(chalk.dim(`View logs: llamacpp server logs ${server.id}`));
|
|
72
|
+
console.log(chalk.dim(`Stop: llamacpp server stop ${server.id}`));
|
|
146
73
|
}
|
|
@@ -11,8 +11,7 @@ export interface ServerOptions {
|
|
|
11
11
|
gpuLayers?: number;
|
|
12
12
|
embeddings?: boolean;
|
|
13
13
|
jinja?: boolean;
|
|
14
|
-
|
|
15
|
-
logTimestamps?: boolean;
|
|
14
|
+
verbose?: boolean;
|
|
16
15
|
}
|
|
17
16
|
|
|
18
17
|
export interface SmartDefaults {
|
|
@@ -69,8 +68,7 @@ export class ConfigGenerator {
|
|
|
69
68
|
const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
|
|
70
69
|
const embeddings = options?.embeddings ?? true;
|
|
71
70
|
const jinja = options?.jinja ?? true;
|
|
72
|
-
const
|
|
73
|
-
const logTimestamps = options?.logTimestamps ?? true; // Enable timestamps by default
|
|
71
|
+
const verbose = options?.verbose ?? false; // Default to false (simple logging)
|
|
74
72
|
|
|
75
73
|
// Generate server ID
|
|
76
74
|
const id = sanitizeModelName(modelName);
|
|
@@ -92,8 +90,7 @@ export class ConfigGenerator {
|
|
|
92
90
|
gpuLayers,
|
|
93
91
|
embeddings,
|
|
94
92
|
jinja,
|
|
95
|
-
|
|
96
|
-
logTimestamps,
|
|
93
|
+
verbose,
|
|
97
94
|
status: 'stopped',
|
|
98
95
|
createdAt: new Date().toISOString(),
|
|
99
96
|
plistPath,
|
|
@@ -117,8 +114,7 @@ export class ConfigGenerator {
|
|
|
117
114
|
gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
|
|
118
115
|
embeddings: options?.embeddings ?? true,
|
|
119
116
|
jinja: options?.jinja ?? true,
|
|
120
|
-
|
|
121
|
-
logTimestamps: options?.logTimestamps ?? true,
|
|
117
|
+
verbose: options?.verbose ?? false,
|
|
122
118
|
};
|
|
123
119
|
}
|
|
124
120
|
}
|
|
@@ -29,10 +29,11 @@ export class LaunchctlManager {
|
|
|
29
29
|
// Add flags
|
|
30
30
|
if (config.embeddings) args.push('--embeddings');
|
|
31
31
|
if (config.jinja) args.push('--jinja');
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
|
|
33
|
+
// Conditionally enable verbose HTTP logging for detailed request/response info
|
|
34
|
+
if (config.verbose) {
|
|
35
|
+
args.push('--log-verbose');
|
|
34
36
|
}
|
|
35
|
-
if (config.logTimestamps) args.push('--log-timestamps');
|
|
36
37
|
|
|
37
38
|
// Generate XML array elements
|
|
38
39
|
const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
|
|
@@ -12,8 +12,7 @@ export interface ServerConfig {
|
|
|
12
12
|
gpuLayers: number;
|
|
13
13
|
embeddings: boolean; // Always true
|
|
14
14
|
jinja: boolean; // Always true
|
|
15
|
-
|
|
16
|
-
logTimestamps: boolean; // Add timestamps to log messages
|
|
15
|
+
verbose: boolean; // Enable verbose HTTP logging (--log-verbose flag)
|
|
17
16
|
|
|
18
17
|
// State tracking
|
|
19
18
|
status: ServerStatus;
|
package/src/utils/log-parser.ts
CHANGED
|
@@ -22,8 +22,22 @@ export class LogParser {
|
|
|
22
22
|
* Process log lines and output compact format
|
|
23
23
|
*/
|
|
24
24
|
processLine(line: string, callback: (compactLine: string) => void): void {
|
|
25
|
-
// Check if this is
|
|
26
|
-
|
|
25
|
+
// Check if this is a simple single-line format (no JSON, non-verbose mode)
|
|
26
|
+
// Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
|
|
27
|
+
if (line.includes('log_server_r: request:') && !line.includes('{')) {
|
|
28
|
+
// Check if this is the start of verbose format (status line before JSON)
|
|
29
|
+
// or a simple single-line log
|
|
30
|
+
if (this.isBuffering) {
|
|
31
|
+
// We're already buffering, so this is a new request - process previous buffer
|
|
32
|
+
const compactLine = this.consolidateRequest(this.buffer);
|
|
33
|
+
if (compactLine) {
|
|
34
|
+
callback(compactLine);
|
|
35
|
+
}
|
|
36
|
+
this.buffer = [];
|
|
37
|
+
this.isBuffering = false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Start buffering (might be verbose or simple)
|
|
27
41
|
this.isBuffering = true;
|
|
28
42
|
this.buffer = [line];
|
|
29
43
|
return;
|
|
@@ -33,7 +47,7 @@ export class LogParser {
|
|
|
33
47
|
if (this.isBuffering) {
|
|
34
48
|
this.buffer.push(line);
|
|
35
49
|
|
|
36
|
-
// Check if we have a complete request (found response line)
|
|
50
|
+
// Check if we have a complete request (found response line in verbose mode)
|
|
37
51
|
if (line.includes('log_server_r: response:')) {
|
|
38
52
|
const compactLine = this.consolidateRequest(this.buffer);
|
|
39
53
|
if (compactLine) {
|
|
@@ -45,6 +59,43 @@ export class LogParser {
|
|
|
45
59
|
}
|
|
46
60
|
}
|
|
47
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Flush any buffered simple format logs
|
|
64
|
+
* Call this at the end of processing to handle simple logs that don't have response lines
|
|
65
|
+
*/
|
|
66
|
+
flush(callback: (compactLine: string) => void): void {
|
|
67
|
+
if (this.isBuffering && this.buffer.length > 0) {
|
|
68
|
+
// If we only have one line, it's a simple format log
|
|
69
|
+
if (this.buffer.length === 1) {
|
|
70
|
+
const simpleLine = this.parseSimpleFormat(this.buffer[0]);
|
|
71
|
+
if (simpleLine) {
|
|
72
|
+
callback(simpleLine);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
this.buffer = [];
|
|
76
|
+
this.isBuffering = false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Parse simple single-line format (non-verbose mode)
|
|
82
|
+
* Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
|
|
83
|
+
*/
|
|
84
|
+
private parseSimpleFormat(line: string): string | null {
|
|
85
|
+
try {
|
|
86
|
+
const timestamp = this.extractTimestamp(line);
|
|
87
|
+
const requestMatch = line.match(/request: (POST|GET|PUT|DELETE) ([^\s]+) ([^\s]+) (\d+)/);
|
|
88
|
+
if (!requestMatch) return null;
|
|
89
|
+
|
|
90
|
+
const [, method, endpoint, ip, status] = requestMatch;
|
|
91
|
+
|
|
92
|
+
// Simple format doesn't include message/token details
|
|
93
|
+
return `${timestamp} ${method} ${endpoint} ${ip} ${status}`;
|
|
94
|
+
} catch (error) {
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
48
99
|
/**
|
|
49
100
|
* Consolidate buffered request/response lines into single line
|
|
50
101
|
*/
|