@appkit/llamacpp-cli 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +129 -16
  3. package/dist/cli.js +61 -8
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/config.d.ts +10 -0
  6. package/dist/commands/config.d.ts.map +1 -0
  7. package/dist/commands/config.js +136 -0
  8. package/dist/commands/config.js.map +1 -0
  9. package/dist/commands/create.d.ts +11 -0
  10. package/dist/commands/create.d.ts.map +1 -0
  11. package/dist/commands/create.js +160 -0
  12. package/dist/commands/create.js.map +1 -0
  13. package/dist/commands/list.js +1 -1
  14. package/dist/commands/list.js.map +1 -1
  15. package/dist/commands/logs.d.ts.map +1 -1
  16. package/dist/commands/logs.js +17 -3
  17. package/dist/commands/logs.js.map +1 -1
  18. package/dist/commands/ps.js +1 -1
  19. package/dist/commands/ps.js.map +1 -1
  20. package/dist/commands/pull.js +1 -1
  21. package/dist/commands/pull.js.map +1 -1
  22. package/dist/commands/run.d.ts +5 -1
  23. package/dist/commands/run.d.ts.map +1 -1
  24. package/dist/commands/run.js +22 -3
  25. package/dist/commands/run.js.map +1 -1
  26. package/dist/commands/server-show.d.ts +2 -0
  27. package/dist/commands/server-show.d.ts.map +1 -0
  28. package/dist/commands/server-show.js +121 -0
  29. package/dist/commands/server-show.js.map +1 -0
  30. package/dist/commands/start.d.ts +1 -10
  31. package/dist/commands/start.d.ts.map +1 -1
  32. package/dist/commands/start.js +31 -121
  33. package/dist/commands/start.js.map +1 -1
  34. package/dist/lib/config-generator.d.ts +2 -2
  35. package/dist/lib/config-generator.d.ts.map +1 -1
  36. package/dist/lib/config-generator.js +6 -6
  37. package/dist/lib/config-generator.js.map +1 -1
  38. package/dist/lib/launchctl-manager.d.ts.map +1 -1
  39. package/dist/lib/launchctl-manager.js +4 -4
  40. package/dist/lib/launchctl-manager.js.map +1 -1
  41. package/dist/lib/state-manager.d.ts +4 -0
  42. package/dist/lib/state-manager.d.ts.map +1 -1
  43. package/dist/lib/state-manager.js +11 -0
  44. package/dist/lib/state-manager.js.map +1 -1
  45. package/dist/types/server-config.d.ts +2 -2
  46. package/dist/types/server-config.d.ts.map +1 -1
  47. package/dist/utils/log-parser.d.ts +10 -0
  48. package/dist/utils/log-parser.d.ts.map +1 -1
  49. package/dist/utils/log-parser.js +51 -3
  50. package/dist/utils/log-parser.js.map +1 -1
  51. package/package.json +1 -1
  52. package/src/cli.ts +61 -8
  53. package/src/commands/config.ts +155 -0
  54. package/src/commands/create.ts +153 -0
  55. package/src/commands/list.ts +1 -1
  56. package/src/commands/logs.ts +20 -3
  57. package/src/commands/ps.ts +1 -1
  58. package/src/commands/pull.ts +1 -1
  59. package/src/commands/run.ts +27 -3
  60. package/src/commands/server-show.ts +126 -0
  61. package/src/commands/start.ts +39 -112
  62. package/src/lib/config-generator.ts +8 -8
  63. package/src/lib/launchctl-manager.ts +5 -3
  64. package/src/lib/state-manager.ts +12 -0
  65. package/src/types/server-config.ts +2 -2
  66. package/src/utils/log-parser.ts +54 -3
@@ -1,146 +1,73 @@
1
1
  import chalk from 'chalk';
2
- import * as path from 'path';
3
- import { modelScanner } from '../lib/model-scanner';
4
2
  import { stateManager } from '../lib/state-manager';
5
- import { configGenerator, ServerOptions } from '../lib/config-generator';
6
- import { portManager } from '../lib/port-manager';
7
3
  import { launchctlManager } from '../lib/launchctl-manager';
8
4
  import { statusChecker } from '../lib/status-checker';
9
- import { commandExists } from '../utils/process-utils';
10
- import { formatBytes } from '../utils/format-utils';
11
- import { ensureDir } from '../utils/file-utils';
12
5
 
13
- interface StartOptions {
14
- port?: number;
15
- threads?: number;
16
- ctxSize?: number;
17
- gpuLayers?: number;
18
- logVerbosity?: number;
19
- logTimestamps?: boolean;
20
- }
21
-
22
- export async function startCommand(model: string, options: StartOptions): Promise<void> {
6
+ export async function startCommand(identifier: string): Promise<void> {
23
7
  // Initialize state manager
24
8
  await stateManager.initialize();
25
9
 
26
- // 1. Check if llama-server exists
27
- if (!(await commandExists('llama-server'))) {
28
- throw new Error('llama-server not found. Install with: brew install llama.cpp');
29
- }
30
-
31
- // 2. Resolve model path
32
- const modelPath = await modelScanner.resolveModelPath(model);
33
- if (!modelPath) {
34
- throw new Error(`Model not found: ${model}\n\nRun: llamacpp list`);
10
+ // 1. Find server by identifier
11
+ const server = await stateManager.findServer(identifier);
12
+ if (!server) {
13
+ throw new Error(
14
+ `Server not found: ${identifier}\n\n` +
15
+ `Use: llamacpp ps\n` +
16
+ `Or create a new server: llamacpp server create <model>`
17
+ );
35
18
  }
36
19
 
37
- const modelName = path.basename(modelPath);
38
-
39
- // 3. Check if server already exists for this model
40
- const existingServer = await stateManager.serverExistsForModel(modelPath);
41
- if (existingServer) {
42
- throw new Error(`Server already exists for ${modelName}\n\nUse: llamacpp ps`);
20
+ // 2. Check if already running
21
+ if (server.status === 'running') {
22
+ console.log(
23
+ chalk.yellow(
24
+ `⚠️ Server ${server.modelName} is already running on port ${server.port}`
25
+ )
26
+ );
27
+ return;
43
28
  }
44
29
 
45
- // 4. Get model size
46
- const modelSize = await modelScanner.getModelSize(modelName);
47
- if (!modelSize) {
48
- throw new Error(`Failed to read model file: ${modelPath}`);
49
- }
30
+ console.log(chalk.blue(`▶️ Starting ${server.modelName} (port ${server.port})...`));
50
31
 
51
- // 5. Determine port
52
- let port: number;
53
- if (options.port) {
54
- portManager.validatePort(options.port);
55
- const available = await portManager.isPortAvailable(options.port);
56
- if (!available) {
57
- throw new Error(`Port ${options.port} is already in use`);
58
- }
59
- port = options.port;
60
- } else {
61
- port = await portManager.findAvailablePort();
32
+ // 3. Ensure plist exists (recreate if missing)
33
+ try {
34
+ await launchctlManager.createPlist(server);
35
+ } catch (error) {
36
+ // May already exist, that's okay
62
37
  }
63
38
 
64
- // 6. Generate server configuration
65
- console.log(chalk.blue(`🚀 Starting server for ${modelName}\n`));
66
-
67
- const serverOptions: ServerOptions = {
68
- port: options.port,
69
- threads: options.threads,
70
- ctxSize: options.ctxSize,
71
- gpuLayers: options.gpuLayers,
72
- logVerbosity: options.logVerbosity,
73
- logTimestamps: options.logTimestamps,
74
- };
75
-
76
- const config = await configGenerator.generateConfig(
77
- modelPath,
78
- modelName,
79
- modelSize,
80
- port,
81
- serverOptions
82
- );
83
-
84
- // Display configuration
85
- console.log(chalk.dim(`Model: ${modelPath}`));
86
- console.log(chalk.dim(`Size: ${formatBytes(modelSize)}`));
87
- console.log(chalk.dim(`Port: ${config.port}${options.port ? '' : ' (auto-assigned)'}`));
88
- console.log(chalk.dim(`Threads: ${config.threads}`));
89
- console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
90
- console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
91
- console.log(chalk.dim(`Log Verbosity: ${config.logVerbosity !== undefined ? config.logVerbosity : 'all'}`));
92
- console.log(chalk.dim(`Log Timestamps: ${config.logTimestamps ? 'enabled' : 'disabled'}`));
93
- console.log();
94
-
95
- // 7. Ensure log directory exists
96
- await ensureDir(path.dirname(config.stdoutPath));
97
-
98
- // 8. Create plist file
99
- console.log(chalk.dim('Creating launchctl service...'));
100
- await launchctlManager.createPlist(config);
101
-
102
- // 9. Load service
39
+ // 4. Load service if needed
103
40
  try {
104
- await launchctlManager.loadService(config.plistPath);
41
+ await launchctlManager.loadService(server.plistPath);
105
42
  } catch (error) {
106
- // Clean up plist if load fails
107
- await launchctlManager.deletePlist(config.plistPath);
108
- throw new Error(`Failed to load service: ${(error as Error).message}`);
43
+ // May already be loaded, that's okay
109
44
  }
110
45
 
111
- // 10. Start service
46
+ // 5. Start the service
112
47
  try {
113
- await launchctlManager.startService(config.label);
48
+ await launchctlManager.startService(server.label);
114
49
  } catch (error) {
115
- // Clean up if start fails
116
- await launchctlManager.unloadService(config.plistPath);
117
- await launchctlManager.deletePlist(config.plistPath);
118
50
  throw new Error(`Failed to start service: ${(error as Error).message}`);
119
51
  }
120
52
 
121
- // 11. Wait for startup
53
+ // 6. Wait for startup
122
54
  console.log(chalk.dim('Waiting for server to start...'));
123
- const started = await launchctlManager.waitForServiceStart(config.label, 5000);
55
+ const started = await launchctlManager.waitForServiceStart(server.label, 5000);
124
56
 
125
57
  if (!started) {
126
- // Clean up if startup fails
127
- await launchctlManager.stopService(config.label);
128
- await launchctlManager.unloadService(config.plistPath);
129
- await launchctlManager.deletePlist(config.plistPath);
130
- throw new Error('Server failed to start. Check logs with: llamacpp server logs --errors');
58
+ throw new Error(
59
+ `Server failed to start. Check logs with: llamacpp server logs ${server.id}`
60
+ );
131
61
  }
132
62
 
133
- // 12. Update config with running status
134
- const updatedConfig = await statusChecker.updateServerStatus(config);
135
-
136
- // 13. Save server config
137
- await stateManager.saveServerConfig(updatedConfig);
63
+ // 7. Update server status
64
+ await statusChecker.updateServerStatus(server);
138
65
 
139
- // 14. Display success message
66
+ // 8. Display success
140
67
  console.log();
141
68
  console.log(chalk.green('✅ Server started successfully!'));
142
69
  console.log();
143
- console.log(chalk.dim(`Connect: http://localhost:${config.port}`));
144
- console.log(chalk.dim(`View logs: llamacpp server logs ${config.id}`));
145
- console.log(chalk.dim(`Stop: llamacpp server stop ${config.id}`));
70
+ console.log(chalk.dim(`Connect: http://localhost:${server.port}`));
71
+ console.log(chalk.dim(`View logs: llamacpp server logs ${server.id}`));
72
+ console.log(chalk.dim(`Stop: llamacpp server stop ${server.id}`));
146
73
  }
@@ -6,13 +6,13 @@ import { stateManager } from './state-manager';
6
6
 
7
7
  export interface ServerOptions {
8
8
  port?: number;
9
+ host?: string;
9
10
  threads?: number;
10
11
  ctxSize?: number;
11
12
  gpuLayers?: number;
12
13
  embeddings?: boolean;
13
14
  jinja?: boolean;
14
- logVerbosity?: number;
15
- logTimestamps?: boolean;
15
+ verbose?: boolean;
16
16
  }
17
17
 
18
18
  export interface SmartDefaults {
@@ -64,13 +64,13 @@ export class ConfigGenerator {
64
64
  const smartDefaults = this.calculateSmartDefaults(modelSize);
65
65
 
66
66
  // Apply user overrides
67
+ const host = options?.host ?? '127.0.0.1'; // Default to localhost (secure)
67
68
  const threads = options?.threads ?? smartDefaults.threads;
68
69
  const ctxSize = options?.ctxSize ?? smartDefaults.ctxSize;
69
70
  const gpuLayers = options?.gpuLayers ?? smartDefaults.gpuLayers;
70
71
  const embeddings = options?.embeddings ?? true;
71
72
  const jinja = options?.jinja ?? true;
72
- const logVerbosity = options?.logVerbosity; // Default to undefined (log everything), filter at CLI level
73
- const logTimestamps = options?.logTimestamps ?? true; // Enable timestamps by default
73
+ const verbose = options?.verbose ?? true; // Default to true (HTTP request logging)
74
74
 
75
75
  // Generate server ID
76
76
  const id = sanitizeModelName(modelName);
@@ -87,13 +87,13 @@ export class ConfigGenerator {
87
87
  modelPath,
88
88
  modelName,
89
89
  port,
90
+ host,
90
91
  threads,
91
92
  ctxSize,
92
93
  gpuLayers,
93
94
  embeddings,
94
95
  jinja,
95
- logVerbosity,
96
- logTimestamps,
96
+ verbose,
97
97
  status: 'stopped',
98
98
  createdAt: new Date().toISOString(),
99
99
  plistPath,
@@ -112,13 +112,13 @@ export class ConfigGenerator {
112
112
  const globalConfig = await stateManager.loadGlobalConfig();
113
113
 
114
114
  return {
115
+ host: options?.host ?? '127.0.0.1',
115
116
  threads: options?.threads ?? globalConfig.defaults.threads,
116
117
  ctxSize: options?.ctxSize ?? globalConfig.defaults.ctxSize,
117
118
  gpuLayers: options?.gpuLayers ?? globalConfig.defaults.gpuLayers,
118
119
  embeddings: options?.embeddings ?? true,
119
120
  jinja: options?.jinja ?? true,
120
- logVerbosity: options?.logVerbosity, // undefined = log everything
121
- logTimestamps: options?.logTimestamps ?? true,
121
+ verbose: options?.verbose ?? true,
122
122
  };
123
123
  }
124
124
  }
@@ -20,6 +20,7 @@ export class LaunchctlManager {
20
20
  const args = [
21
21
  '/opt/homebrew/bin/llama-server',
22
22
  '--model', config.modelPath,
23
+ '--host', config.host,
23
24
  '--port', config.port.toString(),
24
25
  '--threads', config.threads.toString(),
25
26
  '--ctx-size', config.ctxSize.toString(),
@@ -29,10 +30,11 @@ export class LaunchctlManager {
29
30
  // Add flags
30
31
  if (config.embeddings) args.push('--embeddings');
31
32
  if (config.jinja) args.push('--jinja');
32
- if (config.logVerbosity !== undefined) {
33
- args.push('--log-verbosity', config.logVerbosity.toString());
33
+
34
+ // Conditionally enable verbose HTTP logging for detailed request/response info
35
+ if (config.verbose) {
36
+ args.push('--log-verbose');
34
37
  }
35
- if (config.logTimestamps) args.push('--log-timestamps');
36
38
 
37
39
  // Generate XML array elements
38
40
  const argsXml = args.map(arg => ` <string>${arg}</string>`).join('\n');
@@ -81,6 +81,18 @@ export class StateManager {
81
81
  await writeJsonAtomic(configPath, config);
82
82
  }
83
83
 
84
+ /**
85
+ * Update a server configuration with partial changes
86
+ */
87
+ async updateServerConfig(id: string, updates: Partial<ServerConfig>): Promise<void> {
88
+ const existingConfig = await this.loadServerConfig(id);
89
+ if (!existingConfig) {
90
+ throw new Error(`Server configuration not found: ${id}`);
91
+ }
92
+ const updatedConfig = { ...existingConfig, ...updates };
93
+ await this.saveServerConfig(updatedConfig);
94
+ }
95
+
84
96
  /**
85
97
  * Delete a server configuration
86
98
  */
@@ -5,6 +5,7 @@ export interface ServerConfig {
5
5
  modelPath: string; // Full path to GGUF file
6
6
  modelName: string; // Display name (original filename)
7
7
  port: number; // Server port
8
+ host: string; // Bind address (default: 127.0.0.1)
8
9
 
9
10
  // llama-server configuration
10
11
  threads: number;
@@ -12,8 +13,7 @@ export interface ServerConfig {
12
13
  gpuLayers: number;
13
14
  embeddings: boolean; // Always true
14
15
  jinja: boolean; // Always true
15
- logVerbosity?: number; // Log verbosity level (0=errors only, 1=warnings, 2=info/HTTP, 9=debug, undefined=all)
16
- logTimestamps: boolean; // Add timestamps to log messages
16
+ verbose: boolean; // Enable verbose HTTP logging (--log-verbose flag)
17
17
 
18
18
  // State tracking
19
19
  status: ServerStatus;
@@ -22,8 +22,22 @@ export class LogParser {
22
22
  * Process log lines and output compact format
23
23
  */
24
24
  processLine(line: string, callback: (compactLine: string) => void): void {
25
- // Check if this is the start of an HTTP request log
26
- if (line.includes('log_server_r: request: POST')) {
25
+ // Check if this is a simple single-line format (no JSON, non-verbose mode)
26
+ // Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
27
+ if (line.includes('log_server_r: request:') && !line.includes('{')) {
28
+ // Check if this is the start of verbose format (status line before JSON)
29
+ // or a simple single-line log
30
+ if (this.isBuffering) {
31
+ // We're already buffering, so this is a new request - process previous buffer
32
+ const compactLine = this.consolidateRequest(this.buffer);
33
+ if (compactLine) {
34
+ callback(compactLine);
35
+ }
36
+ this.buffer = [];
37
+ this.isBuffering = false;
38
+ }
39
+
40
+ // Start buffering (might be verbose or simple)
27
41
  this.isBuffering = true;
28
42
  this.buffer = [line];
29
43
  return;
@@ -33,7 +47,7 @@ export class LogParser {
33
47
  if (this.isBuffering) {
34
48
  this.buffer.push(line);
35
49
 
36
- // Check if we have a complete request (found response line)
50
+ // Check if we have a complete request (found response line in verbose mode)
37
51
  if (line.includes('log_server_r: response:')) {
38
52
  const compactLine = this.consolidateRequest(this.buffer);
39
53
  if (compactLine) {
@@ -45,6 +59,43 @@ export class LogParser {
45
59
  }
46
60
  }
47
61
 
62
+ /**
63
+ * Flush any buffered simple format logs
64
+ * Call this at the end of processing to handle simple logs that don't have response lines
65
+ */
66
+ flush(callback: (compactLine: string) => void): void {
67
+ if (this.isBuffering && this.buffer.length > 0) {
68
+ // If we only have one line, it's a simple format log
69
+ if (this.buffer.length === 1) {
70
+ const simpleLine = this.parseSimpleFormat(this.buffer[0]);
71
+ if (simpleLine) {
72
+ callback(simpleLine);
73
+ }
74
+ }
75
+ this.buffer = [];
76
+ this.isBuffering = false;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Parse simple single-line format (non-verbose mode)
82
+ * Format: srv log_server_r: request: POST /v1/chat/completions 127.0.0.1 200
83
+ */
84
+ private parseSimpleFormat(line: string): string | null {
85
+ try {
86
+ const timestamp = this.extractTimestamp(line);
87
+ const requestMatch = line.match(/request: (POST|GET|PUT|DELETE) ([^\s]+) ([^\s]+) (\d+)/);
88
+ if (!requestMatch) return null;
89
+
90
+ const [, method, endpoint, ip, status] = requestMatch;
91
+
92
+ // Simple format doesn't include message/token details
93
+ return `${timestamp} ${method} ${endpoint} ${ip} ${status}`;
94
+ } catch (error) {
95
+ return null;
96
+ }
97
+ }
98
+
48
99
  /**
49
100
  * Consolidate buffered request/response lines into single line
50
101
  */