@appkit/llamacpp-cli 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +95 -16
  3. package/dist/cli.js +38 -8
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/create.d.ts +10 -0
  6. package/dist/commands/create.d.ts.map +1 -0
  7. package/dist/commands/create.js +152 -0
  8. package/dist/commands/create.js.map +1 -0
  9. package/dist/commands/list.js +1 -1
  10. package/dist/commands/list.js.map +1 -1
  11. package/dist/commands/logs.d.ts.map +1 -1
  12. package/dist/commands/logs.js +17 -3
  13. package/dist/commands/logs.js.map +1 -1
  14. package/dist/commands/ps.js +1 -1
  15. package/dist/commands/ps.js.map +1 -1
  16. package/dist/commands/pull.js +1 -1
  17. package/dist/commands/pull.js.map +1 -1
  18. package/dist/commands/run.d.ts +5 -1
  19. package/dist/commands/run.d.ts.map +1 -1
  20. package/dist/commands/run.js +23 -4
  21. package/dist/commands/run.js.map +1 -1
  22. package/dist/commands/server-show.d.ts +2 -0
  23. package/dist/commands/server-show.d.ts.map +1 -0
  24. package/dist/commands/server-show.js +120 -0
  25. package/dist/commands/server-show.js.map +1 -0
  26. package/dist/commands/start.d.ts +1 -10
  27. package/dist/commands/start.d.ts.map +1 -1
  28. package/dist/commands/start.js +31 -121
  29. package/dist/commands/start.js.map +1 -1
  30. package/dist/lib/config-generator.d.ts +1 -2
  31. package/dist/lib/config-generator.d.ts.map +1 -1
  32. package/dist/lib/config-generator.js +3 -6
  33. package/dist/lib/config-generator.js.map +1 -1
  34. package/dist/lib/launchctl-manager.d.ts.map +1 -1
  35. package/dist/lib/launchctl-manager.js +3 -4
  36. package/dist/lib/launchctl-manager.js.map +1 -1
  37. package/dist/types/server-config.d.ts +1 -2
  38. package/dist/types/server-config.d.ts.map +1 -1
  39. package/dist/types/server-config.js.map +1 -1
  40. package/dist/utils/log-parser.d.ts +10 -0
  41. package/dist/utils/log-parser.d.ts.map +1 -1
  42. package/dist/utils/log-parser.js +51 -3
  43. package/dist/utils/log-parser.js.map +1 -1
  44. package/package.json +1 -1
  45. package/src/cli.ts +38 -8
  46. package/src/commands/create.ts +143 -0
  47. package/src/commands/list.ts +1 -1
  48. package/src/commands/logs.ts +20 -3
  49. package/src/commands/ps.ts +1 -1
  50. package/src/commands/pull.ts +1 -1
  51. package/src/commands/run.ts +28 -4
  52. package/src/commands/server-show.ts +125 -0
  53. package/src/commands/start.ts +39 -112
  54. package/src/lib/config-generator.ts +4 -8
  55. package/src/lib/launchctl-manager.ts +4 -3
  56. package/src/types/server-config.ts +1 -2
  57. package/src/utils/log-parser.ts +54 -3
@@ -1,146 +1,73 @@
1
1
  import chalk from 'chalk';
2
- import * as path from 'path';
3
- import { modelScanner } from '../lib/model-scanner';
4
2
  import { stateManager } from '../lib/state-manager';
5
- import { configGenerator, ServerOptions } from '../lib/config-generator';
6
- import { portManager } from '../lib/port-manager';
7
3
  import { launchctlManager } from '../lib/launchctl-manager';
8
4
  import { statusChecker } from '../lib/status-checker';
9
- import { commandExists } from '../utils/process-utils';
10
- import { formatBytes } from '../utils/format-utils';
11
- import { ensureDir } from '../utils/file-utils';
12
5
 
13
- interface StartOptions {
14
- port?: number;
15
- threads?: number;
16
- ctxSize?: number;
17
- gpuLayers?: number;
18
- logVerbosity?: number;
19
- logTimestamps?: boolean;
20
- }
21
-
22
- export async function startCommand(model: string, options: StartOptions): Promise<void> {
6
+ export async function startCommand(identifier: string): Promise<void> {
23
7
  // Initialize state manager
24
8
  await stateManager.initialize();
25
9
 
26
- // 1. Check if llama-server exists
27
- if (!(await commandExists('llama-server'))) {
28
- throw new Error('llama-server not found. Install with: brew install llama.cpp');
29
- }
30
-
31
- // 2. Resolve model path
32
- const modelPath = await modelScanner.resolveModelPath(model);
33
- if (!modelPath) {
34
- throw new Error(`Model not found: ${model}\n\nRun: llamacpp list`);
10
+ // 1. Find server by identifier
11
+ const server = await stateManager.findServer(identifier);
12
+ if (!server) {
13
+ throw new Error(
14
+ `Server not found: ${identifier}\n\n` +
15
+ `Use: llamacpp ps\n` +
16
+ `Or create a new server: llamacpp server create <model>`
17
+ );
35
18
  }
36
19
 
37
- const modelName = path.basename(modelPath);
38
-
39
- // 3. Check if server already exists for this model
40
- const existingServer = await stateManager.serverExistsForModel(modelPath);
41
- if (existingServer) {
42
- throw new Error(`Server already exists for ${modelName}\n\nUse: llamacpp ps`);
20
+ // 2. Check if already running
21
+ if (server.status === 'running') {
22
+ console.log(
23
+ chalk.yellow(
24
+ `⚠️ Server ${server.modelName} is already running on port ${server.port}`
25
+ )
26
+ );
27
+ return;
43
28
  }
44
29
 
45
- // 4. Get model size
46
- const modelSize = await modelScanner.getModelSize(modelName);
47
- if (!modelSize) {
48
- throw new Error(`Failed to read model file: ${modelPath}`);
49
- }
30
+ console.log(chalk.blue(`▶️ Starting ${server.modelName} (port ${server.port})...`));
50
31
 
51
- // 5. Determine port
52
- let port: number;
53
- if (options.port) {
54
- portManager.validatePort(options.port);
55
- const available = await portManager.isPortAvailable(options.port);
56
- if (!available) {
57
- throw new Error(`Port ${options.port} is already in use`);
58
- }
59
- port = options.port;
60
- } else {
61
- port = await portManager.findAvailablePort();
32
+ // 3. Ensure plist exists (recreate if missing)
33
+ try {
34
+ await launchctlManager.createPlist(server);
35
+ } catch (error) {
36
+ // May already exist, that's okay
62
37
  }
63
38
 
64
- // 6. Generate server configuration
65
- console.log(chalk.blue(`🚀 Starting server for ${modelName}\n`));
66
-
67
- const serverOptions: ServerOptions = {
68
- port: options.port,
69
- threads: options.threads,
70
- ctxSize: options.ctxSize,
71
- gpuLayers: options.gpuLayers,
72
- logVerbosity: options.logVerbosity,
73
- logTimestamps: options.logTimestamps,
74
- };
75
-
76
- const config = await configGenerator.generateConfig(
77
- modelPath,
78
- modelName,
79
- modelSize,
80
- port,
81
- serverOptions
82
- );
83
-
84
- // Display configuration
85
- console.log(chalk.dim(`Model: ${modelPath}`));
86
- console.log(chalk.dim(`Size: ${formatBytes(modelSize)}`));
87
- console.log(chalk.dim(`Port: ${config.port}${options.port ? '' : ' (auto-assigned)'}`));
88
- console.log(chalk.dim(`Threads: ${config.threads}`));
89
- console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
90
- console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
91
- console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
92
- console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
93
- console.log();
94
-
95
- // 7. Ensure log directory exists
96
- await ensureDir(path.dirname(config.stdoutPath));
97
-
98
- // 8. Create plist file
99
- console.log(chalk.dim('Creating launchctl service...'));
100
- await launchctlManager.createPlist(config);
101
-
102
- // 9. Load service
39
+ // 4. Load service if needed
103
40
  try {
104
- await launchctlManager.loadService(config.plistPath);
41
+ await launchctlManager.loadService(server.plistPath);
105
42
  } catch (error) {
106
- // Clean up plist if load fails
107
- await launchctlManager.deletePlist(config.plistPath);
108
- throw new Error(`Failed to load service: ${(error as Error).message}`);
43
+ // May already be loaded, that's okay
109
44
  }
110
45
 
111
- // 10. Start service
46
+ // 5. Start the service
112
47
  try {
113
- await launchctlManager.startService(config.label);
48
+ await launchctlManager.startService(server.label);
114
49
  } catch (error) {
115
- // Clean up if start fails
116
- await launchctlManager.unloadService(config.plistPath);
117
- await launchctlManager.deletePlist(config.plistPath);
118
50
  throw new Error(`Failed to start service: ${(error as Error).message}`);
119
51
  }
120
52
 
121
- // 11. Wait for startup
53
+ // 6. Wait for startup
122
54
  console.log(chalk.dim('Waiting for server to start...'));
123
- const started = await launchctlManager.waitForServiceStart(config.label, 5000);
55
+ const started = await launchctlManager.waitForServiceStart(server.label, 5000);
124
56
 
125
57
  if (!started) {
126
- // Clean up if startup fails
127
- await launchctlManager.stopService(config.label);
128
- await launchctlManager.unloadService(config.plistPath);
129
- await launchctlManager.deletePlist(config.plistPath);
130
- throw new Error('Server failed to start. Check logs with: llamacpp logs --errors');
58
+ throw new Error(
59
+ `Server failed to start. Check logs with: llamacpp server logs ${server.id}`
60
+ );
131
61
  }
132
62
 
133
- // 12. Update config with running status
134
- const updatedConfig = await statusChecker.updateServerStatus(config);
135
-
136
- // 13. Save server config
137
- await stateManager.saveServerConfig(updatedConfig);
63
+ // 7. Update server status
64
+ await statusChecker.updateServerStatus(server);
138
65
 
139
- // 14. Display success message
66
+ // 8. Display success
140
67
  console.log();
141
68
  console.log(chalk.green('✅ Server started successfully!'));
142
69
  console.log();
143
- console.log(chalk.dim(`Connect: http://localhost:${config.port}`));
144
- console.log(chalk.dim(`View logs: llamacpp logs ${config.id}`));
145
- console.log(chalk.dim(`Stop: llamacpp stop ${config.id}`));
70
+ console.log(chalk.dim(`Connect: http://localhost:${server.port}`));
71
+ console.log(chalk.dim(`View logs: llamacpp server logs ${server.id}`));
72
+ console.log(chalk.dim(`Stop: llamacpp server stop ${server.id}`));
146
73
  }
@@ -11,8 +11,7 @@ export interface ServerOptions {
11
11
  gpuLayers?: number;
12
12
  embeddings?: boolean;
13
13
  jinja?: boolean;
14
- logVerbosity?: number;
15
- logTimestamps?: boolean;
14
+ verbose?: boolean;
16
15
  }
17
16
 
18
17
  export interface SmartDefaults {
@@ -69,8 +68,7 @@ export class ConfigGenerator {
69
68
  const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
70
69
  const embeddings = options?.embeddings ?? true;
71
70
  const jinja = options?.jinja ?? true;
72
- const logVerbosity = options?.logVerbosity; // Default to undefined (log everything), filter at CLI level
73
- const logTimestamps = options?.logTimestamps ?? true; // Enable timestamps by default
71
+ const verbose = options?.verbose ?? false; // Default to false (simple logging)
74
72
 
75
73
  // Generate server ID
76
74
  const id = sanitizeModelName(modelName);
@@ -92,8 +90,7 @@ export class ConfigGenerator {
92
90
  gpuLayers,
93
91
  embeddings,
94
92
  jinja,
95
- logVerbosity,
96
- logTimestamps,
93
+ verbose,
97
94
  status: 'stopped',
98
95
  createdAt: new Date().toISOString(),
99
96
  plistPath,
@@ -117,8 +114,7 @@ export class ConfigGenerator {
117
114
  gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
118
115
  embeddings: options?.embeddings ?? true,
119
116
  jinja: options?.jinja ?? true,
120
- logVerbosity: options?.logVerbosity, // undefined = log everything
121
- logTimestamps: options?.logTimestamps ?? true,
117
+ verbose: options?.verbose ?? false,
122
118
  };
123
119
  }
124
120
  }
@@ -29,10 +29,11 @@ export class LaunchctlManager {
29
29
  // Add flags
30
30
  if (config.embeddings) args.push('--embeddings');
31
31
  if (config.jinja) args.push('--jinja');
32
- if (config.logVerbosity !== undefined) {
33
- args.push('--log-verbosity', config.logVerbosity.toString());
32
+
33
+ // Conditionally enable verbose HTTP logging for detailed request/response info
34
+ if (config.verbose) {
35
+ args.push('--log-verbose');
34
36
  }
35
- if (config.logTimestamps) args.push('--log-timestamps');
36
37
 
37
38
  // Generate XML array elements
38
39
  const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
@@ -12,8 +12,7 @@ export interface ServerConfig {
12
12
  gpuLayers: number;
13
13
  embeddings: boolean; // Always true
14
14
  jinja: boolean; // Always true
15
- logVerbosity?: number; // Log verbosity level (0=errors only, 1=warnings, 2=info/HTTP, 9=debug, undefined=all)
16
- logTimestamps: boolean; // Add timestamps to log messages
15
+ verbose: boolean; // Enable verbose HTTP logging (--log-verbose flag)
17
16
 
18
17
  // State tracking
19
18
  status: ServerStatus;
@@ -22,8 +22,22 @@ export class LogParser {
22
22
  * Process log lines and output compact format
23
23
  */
24
24
  processLine(line: string, callback: (compactLine: string) => void): void {
25
- // Check if this is the start of an HTTP request log
26
- if (line.includes('log_server_r: request: POST')) {
25
+ // Check if this is a simple single-line format (no JSON, non-verbose mode)
26
+ // Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
27
+ if (line.includes('log_server_r: request:') && !line.includes('{')) {
28
+ // Check if this is the start of verbose format (status line before JSON)
29
+ // or a simple single-line log
30
+ if (this.isBuffering) {
31
+ // We're already buffering, so this is a new request - process previous buffer
32
+ const compactLine = this.consolidateRequest(this.buffer);
33
+ if (compactLine) {
34
+ callback(compactLine);
35
+ }
36
+ this.buffer = [];
37
+ this.isBuffering = false;
38
+ }
39
+
40
+ // Start buffering (might be verbose or simple)
27
41
  this.isBuffering = true;
28
42
  this.buffer = [line];
29
43
  return;
@@ -33,7 +47,7 @@ export class LogParser {
33
47
  if (this.isBuffering) {
34
48
  this.buffer.push(line);
35
49
 
36
- // Check if we have a complete request (found response line)
50
+ // Check if we have a complete request (found response line in verbose mode)
37
51
  if (line.includes('log_server_r: response:')) {
38
52
  const compactLine = this.consolidateRequest(this.buffer);
39
53
  if (compactLine) {
@@ -45,6 +59,43 @@ export class LogParser {
45
59
  }
46
60
  }
47
61
 
62
+ /**
63
+ * Flush any buffered simple format logs
64
+ * Call this at the end of processing to handle simple logs that don't have response lines
65
+ */
66
+ flush(callback: (compactLine: string) => void): void {
67
+ if (this.isBuffering && this.buffer.length > 0) {
68
+ // If we only have one line, it's a simple format log
69
+ if (this.buffer.length === 1) {
70
+ const simpleLine = this.parseSimpleFormat(this.buffer[0]);
71
+ if (simpleLine) {
72
+ callback(simpleLine);
73
+ }
74
+ }
75
+ this.buffer = [];
76
+ this.isBuffering = false;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Parse simple single-line format (non-verbose mode)
82
+ * Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
83
+ */
84
+ private parseSimpleFormat(line: string): string | null {
85
+ try {
86
+ const timestamp = this.extractTimestamp(line);
87
+ const requestMatch = line.match(/request: (POST|GET|PUT|DELETE) ([^\s]+) ([^\s]+) (\d+)/);
88
+ if (!requestMatch) return null;
89
+
90
+ const [, method, endpoint, ip, status] = requestMatch;
91
+
92
+ // Simple format doesn't include message/token details
93
+ return `${timestamp} ${method} ${endpoint} ${ip} ${status}`;
94
+ } catch (error) {
95
+ return null;
96
+ }
97
+ }
98
+
48
99
  /**
49
100
  * Consolidate buffered request/response lines into single line
50
101
  */