@appkit/llamacpp-cli 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.md +129 -16
  3. package/dist/cli.js +61 -8
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/config.d.ts +10 -0
  6. package/dist/commands/config.d.ts.map +1 -0
  7. package/dist/commands/config.js +136 -0
  8. package/dist/commands/config.js.map +1 -0
  9. package/dist/commands/create.d.ts +11 -0
  10. package/dist/commands/create.d.ts.map +1 -0
  11. package/dist/commands/create.js +160 -0
  12. package/dist/commands/create.js.map +1 -0
  13. package/dist/commands/list.js +1 -1
  14. package/dist/commands/list.js.map +1 -1
  15. package/dist/commands/logs.d.ts.map +1 -1
  16. package/dist/commands/logs.js +17 -3
  17. package/dist/commands/logs.js.map +1 -1
  18. package/dist/commands/ps.js +1 -1
  19. package/dist/commands/ps.js.map +1 -1
  20. package/dist/commands/pull.js +1 -1
  21. package/dist/commands/pull.js.map +1 -1
  22. package/dist/commands/run.d.ts +5 -1
  23. package/dist/commands/run.d.ts.map +1 -1
  24. package/dist/commands/run.js +22 -3
  25. package/dist/commands/run.js.map +1 -1
  26. package/dist/commands/server-show.d.ts +2 -0
  27. package/dist/commands/server-show.d.ts.map +1 -0
  28. package/dist/commands/server-show.js +121 -0
  29. package/dist/commands/server-show.js.map +1 -0
  30. package/dist/commands/start.d.ts +1 -10
  31. package/dist/commands/start.d.ts.map +1 -1
  32. package/dist/commands/start.js +31 -121
  33. package/dist/commands/start.js.map +1 -1
  34. package/dist/lib/config-generator.d.ts +2 -2
  35. package/dist/lib/config-generator.d.ts.map +1 -1
  36. package/dist/lib/config-generator.js +6 -6
  37. package/dist/lib/config-generator.js.map +1 -1
  38. package/dist/lib/launchctl-manager.d.ts.map +1 -1
  39. package/dist/lib/launchctl-manager.js +4 -4
  40. package/dist/lib/launchctl-manager.js.map +1 -1
  41. package/dist/lib/state-manager.d.ts +4 -0
  42. package/dist/lib/state-manager.d.ts.map +1 -1
  43. package/dist/lib/state-manager.js +11 -0
  44. package/dist/lib/state-manager.js.map +1 -1
  45. package/dist/types/server-config.d.ts +2 -2
  46. package/dist/types/server-config.d.ts.map +1 -1
  47. package/dist/utils/log-parser.d.ts +10 -0
  48. package/dist/utils/log-parser.d.ts.map +1 -1
  49. package/dist/utils/log-parser.js +51 -3
  50. package/dist/utils/log-parser.js.map +1 -1
  51. package/package.json +1 -1
  52. package/src/cli.ts +61 -8
  53. package/src/commands/config.ts +155 -0
  54. package/src/commands/create.ts +153 -0
  55. package/src/commands/list.ts +1 -1
  56. package/src/commands/logs.ts +20 -3
  57. package/src/commands/ps.ts +1 -1
  58. package/src/commands/pull.ts +1 -1
  59. package/src/commands/run.ts +27 -3
  60. package/src/commands/server-show.ts +126 -0
  61. package/src/commands/start.ts +39 -112
  62. package/src/lib/config-generator.ts +8 -8
  63. package/src/lib/launchctl-manager.ts +5 -3
  64. package/src/lib/state-manager.ts +12 -0
  65. package/src/types/server-config.ts +2 -2
  66. package/src/utils/log-parser.ts +54 -3
package/src/cli.ts CHANGED
@@ -4,6 +4,7 @@ import { Command } from 'commander';
4
4
  import chalk from 'chalk';
5
5
  import { listCommand } from './commands/list';
6
6
  import { psCommand } from './commands/ps';
7
+ import { createCommand } from './commands/create';
7
8
  import { startCommand } from './commands/start';
8
9
  import { runCommand } from './commands/run';
9
10
  import { stopCommand } from './commands/stop';
@@ -13,6 +14,8 @@ import { rmCommand } from './commands/rm';
13
14
  import { logsCommand } from './commands/logs';
14
15
  import { searchCommand } from './commands/search';
15
16
  import { showCommand } from './commands/show';
17
+ import { serverShowCommand } from './commands/server-show';
18
+ import { serverConfigCommand } from './commands/config';
16
19
 
17
20
  const program = new Command();
18
21
 
@@ -114,20 +117,69 @@ const server = program
114
117
  .command('server')
115
118
  .description('Manage llama-server instances');
116
119
 
117
- // Start a server
120
+ // Create a new server
118
121
  server
119
- .command('start')
120
- .description('Start a llama-server instance')
122
+ .command('create')
123
+ .description('Create and start a new llama-server instance')
121
124
  .argument('<model>', 'Model filename or path')
122
125
  .option('-p, --port <number>', 'Port number (default: auto-assign)', parseInt)
126
+ .option('-h, --host <address>', 'Bind address (default: 127.0.0.1, use 0.0.0.0 for remote access)')
123
127
  .option('-t, --threads <number>', 'Thread count (default: auto)', parseInt)
124
128
  .option('-c, --ctx-size <number>', 'Context size (default: auto)', parseInt)
125
129
  .option('-g, --gpu-layers <number>', 'GPU layers (default: 60)', parseInt)
126
- .option('-v, --log-verbosity <level>', 'Log verbosity: 0=errors, 1=warnings, 2=info (default), 9=debug, omit for all', parseInt)
127
- .option('--no-log-timestamps', 'Disable timestamps in log messages')
130
+ .option('-v, --verbose', 'Enable verbose HTTP logging (detailed request/response info)')
128
131
  .action(async (model: string, options) => {
129
132
  try {
130
- await startCommand(model, options);
133
+ await createCommand(model, options);
134
+ } catch (error) {
135
+ console.error(chalk.red('āŒ Error:'), (error as Error).message);
136
+ process.exit(1);
137
+ }
138
+ });
139
+
140
+ // Show server details
141
+ server
142
+ .command('show')
143
+ .description('Show server configuration details')
144
+ .argument('<identifier>', 'Server identifier: port (9000), server ID (llama-3-2-3b), or partial model name')
145
+ .action(async (identifier: string) => {
146
+ try {
147
+ await serverShowCommand(identifier);
148
+ } catch (error) {
149
+ console.error(chalk.red('āŒ Error:'), (error as Error).message);
150
+ process.exit(1);
151
+ }
152
+ });
153
+
154
+ // Update server configuration
155
+ server
156
+ .command('config')
157
+ .description('Update server configuration parameters')
158
+ .argument('<identifier>', 'Server identifier: port (9000), server ID (llama-3-2-3b), or partial model name')
159
+ .option('-h, --host <address>', 'Update bind address (127.0.0.1 for localhost, 0.0.0.0 for remote access)')
160
+ .option('-t, --threads <number>', 'Update thread count', parseInt)
161
+ .option('-c, --ctx-size <number>', 'Update context size', parseInt)
162
+ .option('-g, --gpu-layers <number>', 'Update GPU layers', parseInt)
163
+ .option('-v, --verbose', 'Enable verbose logging')
164
+ .option('--no-verbose', 'Disable verbose logging')
165
+ .option('-r, --restart', 'Automatically restart server if running')
166
+ .action(async (identifier: string, options) => {
167
+ try {
168
+ await serverConfigCommand(identifier, options);
169
+ } catch (error) {
170
+ console.error(chalk.red('āŒ Error:'), (error as Error).message);
171
+ process.exit(1);
172
+ }
173
+ });
174
+
175
+ // Start an existing server
176
+ server
177
+ .command('start')
178
+ .description('Start an existing stopped server')
179
+ .argument('<identifier>', 'Server identifier: port (9000), server ID (llama-3-2-3b), or partial model name')
180
+ .action(async (identifier: string) => {
181
+ try {
182
+ await startCommand(identifier);
131
183
  } catch (error) {
132
184
  console.error(chalk.red('āŒ Error:'), (error as Error).message);
133
185
  process.exit(1);
@@ -139,9 +191,10 @@ server
139
191
  .command('run')
140
192
  .description('Run an interactive chat session with a model')
141
193
  .argument('<model>', 'Model identifier: port (9000), server ID (llama-3-2-3b), partial name, or model filename')
142
- .action(async (model: string) => {
194
+ .option('-m, --message <text>', 'Send a single message and exit (non-interactive mode)')
195
+ .action(async (model: string, options) => {
143
196
  try {
144
- await runCommand(model);
197
+ await runCommand(model, options);
145
198
  } catch (error) {
146
199
  console.error(chalk.red('āŒ Error:'), (error as Error).message);
147
200
  process.exit(1);
@@ -0,0 +1,155 @@
1
+ import chalk from 'chalk';
2
+ import { stateManager } from '../lib/state-manager';
3
+ import { statusChecker } from '../lib/status-checker';
4
+ import { launchctlManager } from '../lib/launchctl-manager';
5
+ import { configGenerator } from '../lib/config-generator';
6
+
7
+ export interface ConfigUpdateOptions {
8
+ host?: string;
9
+ threads?: number;
10
+ ctxSize?: number;
11
+ gpuLayers?: number;
12
+ verbose?: boolean;
13
+ restart?: boolean;
14
+ }
15
+
16
+ export async function serverConfigCommand(
17
+ identifier: string,
18
+ options: ConfigUpdateOptions
19
+ ): Promise<void> {
20
+ // Find the server
21
+ const server = await stateManager.findServer(identifier);
22
+
23
+ if (!server) {
24
+ console.error(chalk.red(`āŒ Server not found: ${identifier}`));
25
+ console.log(chalk.dim('\nAvailable servers:'));
26
+ const allServers = await stateManager.getAllServers();
27
+ if (allServers.length === 0) {
28
+ console.log(chalk.dim(' (none)'));
29
+ console.log(chalk.dim('\nCreate a server: llamacpp server create <model-filename>'));
30
+ } else {
31
+ allServers.forEach(s => {
32
+ console.log(chalk.dim(` - ${s.id} (port ${s.port})`));
33
+ });
34
+ }
35
+ process.exit(1);
36
+ }
37
+
38
+ // Check if any config options were provided
39
+ const hasChanges = options.host !== undefined ||
40
+ options.threads !== undefined ||
41
+ options.ctxSize !== undefined ||
42
+ options.gpuLayers !== undefined ||
43
+ options.verbose !== undefined;
44
+
45
+ if (!hasChanges) {
46
+ console.error(chalk.red('āŒ No configuration changes specified'));
47
+ console.log(chalk.dim('\nAvailable options:'));
48
+ console.log(chalk.dim(' --host <address> Bind address (127.0.0.1 or 0.0.0.0)'));
49
+ console.log(chalk.dim(' --threads <n> Number of threads'));
50
+ console.log(chalk.dim(' --ctx-size <n> Context size'));
51
+ console.log(chalk.dim(' --gpu-layers <n> GPU layers'));
52
+ console.log(chalk.dim(' --verbose Enable verbose logging'));
53
+ console.log(chalk.dim(' --no-verbose Disable verbose logging'));
54
+ console.log(chalk.dim(' --restart Auto-restart if running'));
55
+ console.log(chalk.dim('\nExample:'));
56
+ console.log(chalk.dim(` llamacpp server config ${server.id} --ctx-size 8192 --restart`));
57
+ process.exit(1);
58
+ }
59
+
60
+ // Check current status
61
+ const updatedServer = await statusChecker.updateServerStatus(server);
62
+ const wasRunning = updatedServer.status === 'running';
63
+
64
+ if (wasRunning && !options.restart) {
65
+ console.warn(chalk.yellow('āš ļø Server is currently running'));
66
+ console.log(chalk.dim('Changes will require a restart to take effect.'));
67
+ console.log(chalk.dim('Use --restart flag to automatically restart the server.\n'));
68
+ }
69
+
70
+ // Show what will change
71
+ console.log(chalk.bold('Configuration Changes:'));
72
+ console.log('─'.repeat(70));
73
+
74
+ if (options.host !== undefined) {
75
+ console.log(`${chalk.bold('Host:')} ${chalk.dim(server.host)} → ${chalk.green(options.host)}`);
76
+
77
+ // Security warning for 0.0.0.0
78
+ if (options.host === '0.0.0.0') {
79
+ console.log(chalk.yellow('\nāš ļø WARNING: Binding to 0.0.0.0 allows remote access from any network interface.'));
80
+ console.log(chalk.yellow(' This exposes your server to your local network and potentially the internet.'));
81
+ console.log(chalk.yellow(' Use 127.0.0.1 for localhost-only access (recommended for local development).\n'));
82
+ }
83
+ }
84
+ if (options.threads !== undefined) {
85
+ console.log(`${chalk.bold('Threads:')} ${chalk.dim(server.threads.toString())} → ${chalk.green(options.threads.toString())}`);
86
+ }
87
+ if (options.ctxSize !== undefined) {
88
+ console.log(`${chalk.bold('Context Size:')} ${chalk.dim(server.ctxSize.toLocaleString())} → ${chalk.green(options.ctxSize.toLocaleString())}`);
89
+ }
90
+ if (options.gpuLayers !== undefined) {
91
+ console.log(`${chalk.bold('GPU Layers:')} ${chalk.dim(server.gpuLayers.toString())} → ${chalk.green(options.gpuLayers.toString())}`);
92
+ }
93
+ if (options.verbose !== undefined) {
94
+ const oldValue = server.verbose ? 'enabled' : 'disabled';
95
+ const newValue = options.verbose ? 'enabled' : 'disabled';
96
+ console.log(`${chalk.bold('Verbose Logs:')} ${chalk.dim(oldValue)} → ${chalk.green(newValue)}`);
97
+ }
98
+ console.log('');
99
+
100
+ // Stop server if running and restart flag is set
101
+ if (wasRunning && options.restart) {
102
+ console.log(chalk.dim('Stopping server...'));
103
+ await launchctlManager.stopService(server.label);
104
+
105
+ // Wait a moment for clean shutdown
106
+ await new Promise(resolve => setTimeout(resolve, 1000));
107
+ }
108
+
109
+ // Update configuration
110
+ const updatedConfig = {
111
+ ...server,
112
+ ...(options.host !== undefined && { host: options.host }),
113
+ ...(options.threads !== undefined && { threads: options.threads }),
114
+ ...(options.ctxSize !== undefined && { ctxSize: options.ctxSize }),
115
+ ...(options.gpuLayers !== undefined && { gpuLayers: options.gpuLayers }),
116
+ ...(options.verbose !== undefined && { verbose: options.verbose }),
117
+ };
118
+
119
+ await stateManager.updateServerConfig(server.id, updatedConfig);
120
+
121
+ // Regenerate plist with new configuration
122
+ console.log(chalk.dim('Regenerating service configuration...'));
123
+ await launchctlManager.createPlist(updatedConfig);
124
+
125
+ // Restart server if it was running and restart flag is set
126
+ if (wasRunning && options.restart) {
127
+ console.log(chalk.dim('Starting server with new configuration...'));
128
+ await launchctlManager.loadService(updatedConfig.plistPath);
129
+ await launchctlManager.startService(updatedConfig.label);
130
+
131
+ // Wait and verify
132
+ await new Promise(resolve => setTimeout(resolve, 2000));
133
+ const finalStatus = await statusChecker.updateServerStatus(updatedConfig);
134
+
135
+ if (finalStatus.status === 'running') {
136
+ console.log(chalk.green(`āœ… Server restarted successfully with new configuration`));
137
+ console.log(chalk.dim(` Port: http://localhost:${finalStatus.port}`));
138
+ if (finalStatus.pid) {
139
+ console.log(chalk.dim(` PID: ${finalStatus.pid}`));
140
+ }
141
+ } else {
142
+ console.error(chalk.red('āŒ Server failed to start with new configuration'));
143
+ console.log(chalk.dim(' Check logs: ') + `llamacpp server logs ${server.id} --errors`);
144
+ process.exit(1);
145
+ }
146
+ } else {
147
+ console.log(chalk.green('āœ… Configuration updated successfully'));
148
+ if (wasRunning && !options.restart) {
149
+ console.log(chalk.yellow('\nāš ļø Server is still running with old configuration'));
150
+ console.log(chalk.dim(' Restart to apply changes: ') + `llamacpp server stop ${server.id} && llamacpp server start ${server.id}`);
151
+ } else if (!wasRunning) {
152
+ console.log(chalk.dim('\n Start server: ') + `llamacpp server start ${server.id}`);
153
+ }
154
+ }
155
+ }
@@ -0,0 +1,153 @@
1
+ import chalk from 'chalk';
2
+ import * as path from 'path';
3
+ import { modelScanner } from '../lib/model-scanner';
4
+ import { stateManager } from '../lib/state-manager';
5
+ import { configGenerator, ServerOptions } from '../lib/config-generator';
6
+ import { portManager } from '../lib/port-manager';
7
+ import { launchctlManager } from '../lib/launchctl-manager';
8
+ import { statusChecker } from '../lib/status-checker';
9
+ import { commandExists } from '../utils/process-utils';
10
+ import { formatBytes } from '../utils/format-utils';
11
+ import { ensureDir } from '../utils/file-utils';
12
+
13
+ interface CreateOptions {
14
+ port?: number;
15
+ host?: string;
16
+ threads?: number;
17
+ ctxSize?: number;
18
+ gpuLayers?: number;
19
+ verbose?: boolean;
20
+ }
21
+
22
+ export async function createCommand(model: string, options: CreateOptions): Promise<void> {
23
+ // Initialize state manager
24
+ await stateManager.initialize();
25
+
26
+ // 1. Check if llama-server exists
27
+ if (!(await commandExists('llama-server'))) {
28
+ throw new Error('llama-server not found. Install with: brew install llama.cpp');
29
+ }
30
+
31
+ // 2. Resolve model path
32
+ const modelPath = await modelScanner.resolveModelPath(model);
33
+ if (!modelPath) {
34
+ throw new Error(`Model not found: ${model}\n\nRun: llamacpp list`);
35
+ }
36
+
37
+ const modelName = path.basename(modelPath);
38
+
39
+ // 3. Check if server already exists for this model
40
+ const existingServer = await stateManager.serverExistsForModel(modelPath);
41
+ if (existingServer) {
42
+ throw new Error(`Server already exists for ${modelName}\n\nUse: llamacpp server start ${modelName}`);
43
+ }
44
+
45
+ // 4. Get model size
46
+ const modelSize = await modelScanner.getModelSize(modelName);
47
+ if (!modelSize) {
48
+ throw new Error(`Failed to read model file: ${modelPath}`);
49
+ }
50
+
51
+ // 5. Determine port
52
+ let port: number;
53
+ if (options.port) {
54
+ portManager.validatePort(options.port);
55
+ const available = await portManager.isPortAvailable(options.port);
56
+ if (!available) {
57
+ throw new Error(`Port ${options.port} is already in use`);
58
+ }
59
+ port = options.port;
60
+ } else {
61
+ port = await portManager.findAvailablePort();
62
+ }
63
+
64
+ // 6. Generate server configuration
65
+ console.log(chalk.blue(`šŸš€ Creating server for ${modelName}\n`));
66
+
67
+ const serverOptions: ServerOptions = {
68
+ port: options.port,
69
+ host: options.host,
70
+ threads: options.threads,
71
+ ctxSize: options.ctxSize,
72
+ gpuLayers: options.gpuLayers,
73
+ verbose: options.verbose,
74
+ };
75
+
76
+ const config = await configGenerator.generateConfig(
77
+ modelPath,
78
+ modelName,
79
+ modelSize,
80
+ port,
81
+ serverOptions
82
+ );
83
+
84
+ // Security warning for 0.0.0.0
85
+ if (config.host === '0.0.0.0') {
86
+ console.log(chalk.yellow('āš ļø WARNING: Binding to 0.0.0.0 allows remote access from any network interface.'));
87
+ console.log(chalk.yellow(' This exposes your server to your local network and potentially the internet.'));
88
+ console.log(chalk.yellow(' Use 127.0.0.1 for localhost-only access (recommended for local development).\n'));
89
+ }
90
+
91
+ // Display configuration
92
+ console.log(chalk.dim(`Model: ${modelPath}`));
93
+ console.log(chalk.dim(`Size: ${formatBytes(modelSize)}`));
94
+ console.log(chalk.dim(`Host: ${config.host}`));
95
+ console.log(chalk.dim(`Port: ${config.port}${options.port ? '' : ' (auto-assigned)'}`));
96
+ console.log(chalk.dim(`Threads: ${config.threads}`));
97
+ console.log(chalk.dim(`Context Size: ${config.ctxSize}`));
98
+ console.log(chalk.dim(`GPU Layers: ${config.gpuLayers}`));
99
+ console.log(chalk.dim(`Verbose Logging: ${config.verbose ? 'enabled' : 'disabled'}`));
100
+ console.log();
101
+
102
+ // 7. Ensure log directory exists
103
+ await ensureDir(path.dirname(config.stdoutPath));
104
+
105
+ // 8. Create plist file
106
+ console.log(chalk.dim('Creating launchctl service...'));
107
+ await launchctlManager.createPlist(config);
108
+
109
+ // 9. Load service
110
+ try {
111
+ await launchctlManager.loadService(config.plistPath);
112
+ } catch (error) {
113
+ // Clean up plist if load fails
114
+ await launchctlManager.deletePlist(config.plistPath);
115
+ throw new Error(`Failed to load service: ${(error as Error).message}`);
116
+ }
117
+
118
+ // 10. Start service
119
+ try {
120
+ await launchctlManager.startService(config.label);
121
+ } catch (error) {
122
+ // Clean up if start fails
123
+ await launchctlManager.unloadService(config.plistPath);
124
+ await launchctlManager.deletePlist(config.plistPath);
125
+ throw new Error(`Failed to start service: ${(error as Error).message}`);
126
+ }
127
+
128
+ // 11. Wait for startup
129
+ console.log(chalk.dim('Waiting for server to start...'));
130
+ const started = await launchctlManager.waitForServiceStart(config.label, 5000);
131
+
132
+ if (!started) {
133
+ // Clean up if startup fails
134
+ await launchctlManager.stopService(config.label);
135
+ await launchctlManager.unloadService(config.plistPath);
136
+ await launchctlManager.deletePlist(config.plistPath);
137
+ throw new Error('Server failed to start. Check logs with: llamacpp server logs --errors');
138
+ }
139
+
140
+ // 12. Update config with running status
141
+ const updatedConfig = await statusChecker.updateServerStatus(config);
142
+
143
+ // 13. Save server config
144
+ await stateManager.saveServerConfig(updatedConfig);
145
+
146
+ // 14. Display success message
147
+ console.log();
148
+ console.log(chalk.green('āœ… Server created and started successfully!'));
149
+ console.log();
150
+ console.log(chalk.dim(`Connect: http://${config.host}:${config.port}`));
151
+ console.log(chalk.dim(`View logs: llamacpp server logs ${config.id}`));
152
+ console.log(chalk.dim(`Stop: llamacpp server stop ${config.id}`));
153
+ }
@@ -33,5 +33,5 @@ export async function listCommand(): Promise<void> {
33
33
 
34
34
  const totalSize = models.reduce((sum, m) => sum + m.size, 0);
35
35
  console.log(chalk.dim(`\nTotal: ${models.length} models (${formatBytes(totalSize)})`));
36
- console.log(chalk.dim(`\nStart a server: llamacpp server start <model-filename>`));
36
+ console.log(chalk.dim(`\nCreate a server: llamacpp server create <model-filename>`));
37
37
  }
@@ -63,7 +63,13 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
63
63
  }
64
64
 
65
65
  console.log(chalk.blue(`šŸ“‹ Logs for ${server.modelName} (${logType}${filterDesc})`));
66
- console.log(chalk.dim(` ${logPath}\n`));
66
+ console.log(chalk.dim(` ${logPath}`));
67
+
68
+ // Show subtle note if verbose logging is not enabled
69
+ if (!server.verbose && !options.verbose && !options.errors && !options.http && !options.filter) {
70
+ console.log(chalk.dim(` verbosity is disabled`));
71
+ }
72
+ console.log();
67
73
 
68
74
  if (options.follow) {
69
75
  // Follow logs in real-time with optional filtering
@@ -133,15 +139,26 @@ export async function logsCommand(identifier: string, options: LogsOptions): Pro
133
139
  if (useCompactMode) {
134
140
  // Compact mode: read file and parse
135
141
  try {
136
- const command = `tail -n ${lines * 3} "${logPath}" | grep -E "log_server_r"`;
142
+ // Use large multiplier to account for verbose debug output between requests
143
+ const command = `tail -n ${lines * 100} "${logPath}" | grep -E "log_server_r"`;
137
144
  const output = await execCommand(command);
138
145
  const logLines = output.split('\n').filter((l) => l.trim());
139
146
 
147
+ const compactLines: string[] = [];
140
148
  for (const line of logLines) {
141
149
  logParser.processLine(line, (compactLine) => {
142
- console.log(compactLine);
150
+ compactLines.push(compactLine);
143
151
  });
144
152
  }
153
+
154
+ // Flush any remaining buffered logs (handles simple format)
155
+ logParser.flush((compactLine) => {
156
+ compactLines.push(compactLine);
157
+ });
158
+
159
+ // Show only the last N compact lines
160
+ const limitedLines = compactLines.slice(-lines);
161
+ limitedLines.forEach((line) => console.log(line));
145
162
  } catch (error) {
146
163
  throw new Error(`Failed to read logs: ${(error as Error).message}`);
147
164
  }
@@ -10,7 +10,7 @@ export async function psCommand(): Promise<void> {
10
10
 
11
11
  if (servers.length === 0) {
12
12
  console.log(chalk.yellow('No servers configured.'));
13
- console.log(chalk.dim('\nStart a server: llamacpp server start <model-filename>'));
13
+ console.log(chalk.dim('\nCreate a server: llamacpp server create <model-filename>'));
14
14
  return;
15
15
  }
16
16
 
@@ -30,7 +30,7 @@ export async function pullCommand(identifier: string, options: PullOptions): Pro
30
30
  const modelPath = await modelDownloader.downloadModel(parsed.repo, filename);
31
31
 
32
32
  console.log();
33
- console.log(chalk.dim(`Start server: llamacpp server start ${filename}`));
33
+ console.log(chalk.dim(`Create server: llamacpp server create ${filename}`));
34
34
  } catch (error) {
35
35
  if ((error as Error).message.includes('interrupted')) {
36
36
  console.log(chalk.dim('\nDownload was interrupted. Run the same command again to retry.'));
@@ -25,7 +25,11 @@ interface ChatCompletionChunk {
25
25
  }>;
26
26
  }
27
27
 
28
- export async function runCommand(modelIdentifier: string): Promise<void> {
28
+ interface RunOptions {
29
+ message?: string;
30
+ }
31
+
32
+ export async function runCommand(modelIdentifier: string, options: RunOptions = {}): Promise<void> {
29
33
  await stateManager.initialize();
30
34
 
31
35
  // 1. Find or start server
@@ -35,7 +39,7 @@ export async function runCommand(modelIdentifier: string): Promise<void> {
35
39
  // Try to resolve as a model name and start it
36
40
  console.log(chalk.blue(`šŸš€ No running server found. Starting ${modelIdentifier}...\n`));
37
41
  try {
38
- await startCommand(modelIdentifier, {});
42
+ await startCommand(modelIdentifier);
39
43
  server = await stateManager.findServer(modelIdentifier);
40
44
  if (!server) {
41
45
  throw new Error('Failed to start server');
@@ -52,7 +56,27 @@ export async function runCommand(modelIdentifier: string): Promise<void> {
52
56
  throw new Error(`Server exists but is not running. Start it with: llamacpp server start ${server.id}`);
53
57
  }
54
58
 
55
- // 3. Start REPL
59
+ // 3. If message provided, do one-shot mode
60
+ if (options.message) {
61
+ const conversationHistory: ChatMessage[] = [
62
+ {
63
+ role: 'user',
64
+ content: options.message,
65
+ },
66
+ ];
67
+
68
+ try {
69
+ await streamChatCompletion(server, conversationHistory);
70
+ console.log(); // Blank line after response
71
+ process.exit(0);
72
+ } catch (error) {
73
+ console.error(chalk.red(`\nāŒ Error: ${(error as Error).message}\n`));
74
+ process.exit(1);
75
+ }
76
+ return;
77
+ }
78
+
79
+ // 4. Start REPL
56
80
  console.log(chalk.green(`šŸ’¬ Connected to ${server.modelName} (port ${server.port})`));
57
81
  console.log(chalk.dim(`Type your message and press Enter. Use /exit to quit, /clear to reset history, /help for commands.\n`));
58
82
 
@@ -0,0 +1,126 @@
1
+ import chalk from 'chalk';
2
+ import { stateManager } from '../lib/state-manager';
3
+ import { statusChecker } from '../lib/status-checker';
4
+ import { formatUptime, formatBytes } from '../utils/format-utils';
5
+ import { getProcessMemory } from '../utils/process-utils';
6
+
7
+ export async function serverShowCommand(identifier: string): Promise<void> {
8
+ // Find the server
9
+ const server = await stateManager.findServer(identifier);
10
+
11
+ if (!server) {
12
+ console.error(chalk.red(`āŒ Server not found: ${identifier}`));
13
+ console.log(chalk.dim('\nAvailable servers:'));
14
+ const allServers = await stateManager.getAllServers();
15
+ if (allServers.length === 0) {
16
+ console.log(chalk.dim(' (none)'));
17
+ console.log(chalk.dim('\nCreate a server: llamacpp server create <model-filename>'));
18
+ } else {
19
+ allServers.forEach(s => {
20
+ console.log(chalk.dim(` - ${s.id} (port ${s.port})`));
21
+ });
22
+ }
23
+ process.exit(1);
24
+ }
25
+
26
+ // Update status to get real-time info
27
+ console.log(chalk.dim('Checking server status...\n'));
28
+ const updatedServer = await statusChecker.updateServerStatus(server);
29
+
30
+ // Display server information
31
+ console.log(chalk.bold('Server Configuration:'));
32
+ console.log('─'.repeat(70));
33
+
34
+ // Basic info
35
+ console.log(`${chalk.bold('Server ID:')} ${updatedServer.id}`);
36
+ console.log(`${chalk.bold('Model Name:')} ${updatedServer.modelName}`);
37
+ console.log(`${chalk.bold('Model Path:')} ${chalk.dim(updatedServer.modelPath)}`);
38
+ console.log(`${chalk.bold('Host:')} ${updatedServer.host}`);
39
+ console.log(`${chalk.bold('Port:')} http://${updatedServer.host}:${updatedServer.port}`);
40
+
41
+ // Status with color
42
+ let statusText: string;
43
+ let statusColor: (text: string) => string;
44
+ switch (updatedServer.status) {
45
+ case 'running':
46
+ statusText = 'āœ… RUNNING';
47
+ statusColor = chalk.green;
48
+ break;
49
+ case 'crashed':
50
+ statusText = 'āŒ CRASHED';
51
+ statusColor = chalk.red;
52
+ break;
53
+ default:
54
+ statusText = 'āš ļø STOPPED';
55
+ statusColor = chalk.yellow;
56
+ }
57
+ console.log(`${chalk.bold('Status:')} ${statusColor(statusText)}`);
58
+
59
+ if (updatedServer.pid) {
60
+ console.log(`${chalk.bold('PID:')} ${updatedServer.pid}`);
61
+ }
62
+
63
+ // Runtime info for running servers
64
+ if (updatedServer.status === 'running') {
65
+ if (updatedServer.lastStarted) {
66
+ const uptime = formatUptime(updatedServer.lastStarted);
67
+ console.log(`${chalk.bold('Uptime:')} ${uptime}`);
68
+ }
69
+
70
+ if (updatedServer.pid) {
71
+ const memoryBytes = await getProcessMemory(updatedServer.pid);
72
+ if (memoryBytes !== null) {
73
+ console.log(`${chalk.bold('Memory:')} ${formatBytes(memoryBytes)}`);
74
+ }
75
+ }
76
+ }
77
+
78
+ // Configuration section
79
+ console.log('\n' + '─'.repeat(70));
80
+ console.log(chalk.bold('Configuration:'));
81
+ console.log('─'.repeat(70));
82
+ console.log(`${chalk.bold('Threads:')} ${updatedServer.threads}`);
83
+ console.log(`${chalk.bold('Context Size:')} ${updatedServer.ctxSize.toLocaleString()}`);
84
+ console.log(`${chalk.bold('GPU Layers:')} ${updatedServer.gpuLayers}`);
85
+ console.log(`${chalk.bold('Embeddings:')} ${updatedServer.embeddings ? 'enabled' : 'disabled'}`);
86
+ console.log(`${chalk.bold('Jinja:')} ${updatedServer.jinja ? 'enabled' : 'disabled'}`);
87
+ console.log(`${chalk.bold('Verbose Logs:')} ${updatedServer.verbose ? chalk.green('enabled') : chalk.dim('disabled')}`);
88
+
89
+ // Timestamps section
90
+ console.log('\n' + '─'.repeat(70));
91
+ console.log(chalk.bold('Timestamps:'));
92
+ console.log('─'.repeat(70));
93
+ console.log(`${chalk.bold('Created:')} ${new Date(updatedServer.createdAt).toLocaleString()}`);
94
+ if (updatedServer.lastStarted) {
95
+ console.log(`${chalk.bold('Last Started:')} ${new Date(updatedServer.lastStarted).toLocaleString()}`);
96
+ }
97
+ if (updatedServer.lastStopped) {
98
+ console.log(`${chalk.bold('Last Stopped:')} ${new Date(updatedServer.lastStopped).toLocaleString()}`);
99
+ }
100
+
101
+ // System paths section
102
+ console.log('\n' + '─'.repeat(70));
103
+ console.log(chalk.bold('System Paths:'));
104
+ console.log('─'.repeat(70));
105
+ console.log(`${chalk.bold('Service Label:')} ${updatedServer.label}`);
106
+ console.log(`${chalk.bold('Plist File:')} ${chalk.dim(updatedServer.plistPath)}`);
107
+ console.log(`${chalk.bold('Stdout Log:')} ${chalk.dim(updatedServer.stdoutPath)}`);
108
+ console.log(`${chalk.bold('Stderr Log:')} ${chalk.dim(updatedServer.stderrPath)}`);
109
+
110
+ // Helpful commands
111
+ console.log('\n' + '─'.repeat(70));
112
+ console.log(chalk.bold('Quick Commands:'));
113
+ console.log('─'.repeat(70));
114
+
115
+ if (updatedServer.status === 'running') {
116
+ console.log(chalk.dim(' View logs: ') + `llamacpp server logs ${updatedServer.id}`);
117
+ console.log(chalk.dim(' Interactive chat: ') + `llamacpp server run ${updatedServer.id}`);
118
+ console.log(chalk.dim(' Stop server: ') + `llamacpp server stop ${updatedServer.id}`);
119
+ } else {
120
+ console.log(chalk.dim(' Start server: ') + `llamacpp server start ${updatedServer.id}`);
121
+ if (updatedServer.status === 'crashed') {
122
+ console.log(chalk.dim(' View error logs: ') + `llamacpp server logs ${updatedServer.id} --errors`);
123
+ }
124
+ }
125
+ console.log(chalk.dim(' Remove server: ') + `llamacpp server rm ${updatedServer.id}`);
126
+ }