@appkit/llamacpp-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.versionrc.json +16 -0
- package/CHANGELOG.md +10 -0
- package/README.md +474 -0
- package/bin/llamacpp +26 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +196 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/delete.d.ts +2 -0
- package/dist/commands/delete.d.ts.map +1 -0
- package/dist/commands/delete.js +104 -0
- package/dist/commands/delete.js.map +1 -0
- package/dist/commands/list.d.ts +2 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +37 -0
- package/dist/commands/list.js.map +1 -0
- package/dist/commands/logs.d.ts +8 -0
- package/dist/commands/logs.d.ts.map +1 -0
- package/dist/commands/logs.js +57 -0
- package/dist/commands/logs.js.map +1 -0
- package/dist/commands/ps.d.ts +2 -0
- package/dist/commands/ps.d.ts.map +1 -0
- package/dist/commands/ps.js +72 -0
- package/dist/commands/ps.js.map +1 -0
- package/dist/commands/pull.d.ts +6 -0
- package/dist/commands/pull.d.ts.map +1 -0
- package/dist/commands/pull.js +36 -0
- package/dist/commands/pull.js.map +1 -0
- package/dist/commands/rm.d.ts +2 -0
- package/dist/commands/rm.d.ts.map +1 -0
- package/dist/commands/rm.js +134 -0
- package/dist/commands/rm.js.map +1 -0
- package/dist/commands/run.d.ts +2 -0
- package/dist/commands/run.d.ts.map +1 -0
- package/dist/commands/run.js +198 -0
- package/dist/commands/run.js.map +1 -0
- package/dist/commands/search.d.ts +7 -0
- package/dist/commands/search.d.ts.map +1 -0
- package/dist/commands/search.js +93 -0
- package/dist/commands/search.js.map +1 -0
- package/dist/commands/show.d.ts +6 -0
- package/dist/commands/show.d.ts.map +1 -0
- package/dist/commands/show.js +196 -0
- package/dist/commands/show.js.map +1 -0
- package/dist/commands/start.d.ts +9 -0
- package/dist/commands/start.d.ts.map +1 -0
- package/dist/commands/start.js +150 -0
- package/dist/commands/start.js.map +1 -0
- package/dist/commands/stop.d.ts +2 -0
- package/dist/commands/stop.d.ts.map +1 -0
- package/dist/commands/stop.js +39 -0
- package/dist/commands/stop.js.map +1 -0
- package/dist/lib/config-generator.d.ts +30 -0
- package/dist/lib/config-generator.d.ts.map +1 -0
- package/dist/lib/config-generator.js +125 -0
- package/dist/lib/config-generator.js.map +1 -0
- package/dist/lib/launchctl-manager.d.ts +55 -0
- package/dist/lib/launchctl-manager.d.ts.map +1 -0
- package/dist/lib/launchctl-manager.js +227 -0
- package/dist/lib/launchctl-manager.js.map +1 -0
- package/dist/lib/model-downloader.d.ts +44 -0
- package/dist/lib/model-downloader.d.ts.map +1 -0
- package/dist/lib/model-downloader.js +248 -0
- package/dist/lib/model-downloader.js.map +1 -0
- package/dist/lib/model-scanner.d.ts +31 -0
- package/dist/lib/model-scanner.d.ts.map +1 -0
- package/dist/lib/model-scanner.js +145 -0
- package/dist/lib/model-scanner.js.map +1 -0
- package/dist/lib/model-search.d.ts +29 -0
- package/dist/lib/model-search.d.ts.map +1 -0
- package/dist/lib/model-search.js +131 -0
- package/dist/lib/model-search.js.map +1 -0
- package/dist/lib/port-manager.d.ts +26 -0
- package/dist/lib/port-manager.d.ts.map +1 -0
- package/dist/lib/port-manager.js +75 -0
- package/dist/lib/port-manager.js.map +1 -0
- package/dist/lib/state-manager.d.ts +59 -0
- package/dist/lib/state-manager.d.ts.map +1 -0
- package/dist/lib/state-manager.js +178 -0
- package/dist/lib/state-manager.js.map +1 -0
- package/dist/lib/status-checker.d.ts +28 -0
- package/dist/lib/status-checker.d.ts.map +1 -0
- package/dist/lib/status-checker.js +99 -0
- package/dist/lib/status-checker.js.map +1 -0
- package/dist/types/global-config.d.ts +16 -0
- package/dist/types/global-config.d.ts.map +1 -0
- package/dist/types/global-config.js +18 -0
- package/dist/types/global-config.js.map +1 -0
- package/dist/types/model-info.d.ts +9 -0
- package/dist/types/model-info.d.ts.map +1 -0
- package/dist/types/model-info.js +3 -0
- package/dist/types/model-info.js.map +1 -0
- package/dist/types/server-config.d.ts +27 -0
- package/dist/types/server-config.d.ts.map +1 -0
- package/dist/types/server-config.js +15 -0
- package/dist/types/server-config.js.map +1 -0
- package/dist/utils/file-utils.d.ts +49 -0
- package/dist/utils/file-utils.d.ts.map +1 -0
- package/dist/utils/file-utils.js +144 -0
- package/dist/utils/file-utils.js.map +1 -0
- package/dist/utils/format-utils.d.ts +29 -0
- package/dist/utils/format-utils.d.ts.map +1 -0
- package/dist/utils/format-utils.js +82 -0
- package/dist/utils/format-utils.js.map +1 -0
- package/dist/utils/process-utils.d.ts +27 -0
- package/dist/utils/process-utils.d.ts.map +1 -0
- package/dist/utils/process-utils.js +66 -0
- package/dist/utils/process-utils.js.map +1 -0
- package/package.json +56 -0
- package/src/cli.ts +195 -0
- package/src/commands/delete.ts +74 -0
- package/src/commands/list.ts +37 -0
- package/src/commands/logs.ts +61 -0
- package/src/commands/ps.ts +79 -0
- package/src/commands/pull.ts +40 -0
- package/src/commands/rm.ts +114 -0
- package/src/commands/run.ts +209 -0
- package/src/commands/search.ts +107 -0
- package/src/commands/show.ts +207 -0
- package/src/commands/start.ts +140 -0
- package/src/commands/stop.ts +39 -0
- package/src/lib/config-generator.ts +119 -0
- package/src/lib/launchctl-manager.ts +209 -0
- package/src/lib/model-downloader.ts +259 -0
- package/src/lib/model-scanner.ts +125 -0
- package/src/lib/model-search.ts +114 -0
- package/src/lib/port-manager.ts +80 -0
- package/src/lib/state-manager.ts +177 -0
- package/src/lib/status-checker.ts +113 -0
- package/src/types/global-config.ts +26 -0
- package/src/types/model-info.ts +8 -0
- package/src/types/server-config.ts +42 -0
- package/src/utils/file-utils.ts +106 -0
- package/src/utils/format-utils.ts +80 -0
- package/src/utils/process-utils.ts +60 -0
- package/tsconfig.json +20 -0
package/.versionrc.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"types": [
|
|
3
|
+
{ "type": "feat", "section": "Features" },
|
|
4
|
+
{ "type": "fix", "section": "Bug Fixes" },
|
|
5
|
+
{ "type": "chore", "hidden": true },
|
|
6
|
+
{ "type": "docs", "hidden": true },
|
|
7
|
+
{ "type": "style", "hidden": true },
|
|
8
|
+
{ "type": "refactor", "section": "Code Refactoring" },
|
|
9
|
+
{ "type": "perf", "section": "Performance Improvements" },
|
|
10
|
+
{ "type": "test", "hidden": true }
|
|
11
|
+
],
|
|
12
|
+
"releaseCommitMessageFormat": "chore(release): {{currentTag}}",
|
|
13
|
+
"skip": {
|
|
14
|
+
"changelog": false
|
|
15
|
+
}
|
|
16
|
+
}
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
|
+
|
|
5
|
+
## 1.0.0 (2025-12-04)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* refactored and added commands ([0e58ae1](https://github.com/appkitstudio/llamacpp-cli/commit/0e58ae1c619a519b484ae3c78863b4932672865c))
|
package/README.md
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
# llamacpp-cli
|
|
2
|
+
|
|
3
|
+
> Manage llama.cpp servers like Ollama—but faster. Full control over llama-server with macOS launchctl integration.
|
|
4
|
+
|
|
5
|
+
CLI tool to manage local llama.cpp servers on macOS. Provides an Ollama-like experience for managing GGUF models and llama-server instances, with **significantly faster response times** than Ollama.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@appkit/llamacpp-cli)
|
|
8
|
+
[](https://opensource.org/licenses/MIT)
|
|
9
|
+
|
|
10
|
+
**Status:** Beta - Stable for personal use, actively maintained
|
|
11
|
+
|
|
12
|
+
## Features
|
|
13
|
+
|
|
14
|
+
- 🚀 **Easy server management** - Start, stop, and monitor llama.cpp servers
|
|
15
|
+
- 🤖 **Model downloads** - Pull GGUF models from Hugging Face
|
|
16
|
+
- ⚙️ **Smart defaults** - Auto-configure threads, context size, and GPU layers based on model size
|
|
17
|
+
- 🔌 **Auto port assignment** - Automatically find available ports (9000-9999)
|
|
18
|
+
- 📊 **Status monitoring** - Real-time server status with launchctl integration
|
|
19
|
+
- 🪵 **Log access** - View and tail server logs
|
|
20
|
+
|
|
21
|
+
## Why llamacpp-cli?
|
|
22
|
+
|
|
23
|
+
**TL;DR:** Much faster response times than Ollama by using llama.cpp's native server directly.
|
|
24
|
+
|
|
25
|
+
Ollama is great, but it adds a wrapper layer that introduces latency. llamacpp-cli gives you:
|
|
26
|
+
|
|
27
|
+
- **⚡️ Faster inference** - Direct llama-server means lower overhead and quicker responses
|
|
28
|
+
- **🎛️ Full control** - Access all llama-server flags and configuration options
|
|
29
|
+
- **🔧 Transparency** - Standard launchctl services, visible in Activity Monitor
|
|
30
|
+
- **📦 Any GGUF model** - Not limited to Ollama's model library
|
|
31
|
+
- **🪶 Lightweight** - No daemon overhead, just native macOS services
|
|
32
|
+
|
|
33
|
+
### Comparison
|
|
34
|
+
|
|
35
|
+
| Feature | llamacpp-cli | Ollama |
|
|
36
|
+
|---------|-------------|--------|
|
|
37
|
+
| **Response Time** | ⚡️ **Faster** (native) | Slower (wrapper layer) |
|
|
38
|
+
| Model Format | Any GGUF from HF | Ollama's library |
|
|
39
|
+
| Server Binary | llama.cpp native | Custom wrapper |
|
|
40
|
+
| Configuration | Full llama-server flags | Limited options |
|
|
41
|
+
| Service Management | macOS launchctl | Custom daemon |
|
|
42
|
+
| Resource Usage | Lower overhead | Higher overhead |
|
|
43
|
+
| Transparency | Standard Unix tools | Black box |
|
|
44
|
+
|
|
45
|
+
If you need raw speed and full control, llamacpp-cli is the better choice.
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
npm install -g @appkit/llamacpp-cli
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Prerequisites
|
|
54
|
+
|
|
55
|
+
- macOS (uses launchctl for service management)
|
|
56
|
+
- [llama.cpp](https://github.com/ggerganov/llama.cpp) installed via Homebrew:
|
|
57
|
+
```bash
|
|
58
|
+
brew install llama.cpp
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Quick Start
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Search for models on Hugging Face
|
|
65
|
+
llamacpp search "llama 3b"
|
|
66
|
+
|
|
67
|
+
# Download a model
|
|
68
|
+
llamacpp pull bartowski/Llama-3.2-3B-Instruct-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
|
|
69
|
+
|
|
70
|
+
# List local models
|
|
71
|
+
llamacpp ls
|
|
72
|
+
|
|
73
|
+
# Start a server (auto-assigns port, uses smart defaults)
|
|
74
|
+
llamacpp server start llama-3.2-3b-instruct-q4_k_m.gguf
|
|
75
|
+
|
|
76
|
+
# View running servers
|
|
77
|
+
llamacpp ps
|
|
78
|
+
|
|
79
|
+
# Chat with your model interactively
|
|
80
|
+
llamacpp server run llama-3.2-3b
|
|
81
|
+
|
|
82
|
+
# Stop a server
|
|
83
|
+
llamacpp server stop llama-3.2-3b
|
|
84
|
+
|
|
85
|
+
# View logs
|
|
86
|
+
llamacpp server logs llama-3.2-3b -f
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Using Your Server
|
|
90
|
+
|
|
91
|
+
Once a server is running, it exposes an OpenAI-compatible API:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Chat completion
|
|
95
|
+
curl http://localhost:9000/v1/chat/completions \
|
|
96
|
+
-H "Content-Type: application/json" \
|
|
97
|
+
-d '{
|
|
98
|
+
"messages": [
|
|
99
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
100
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
101
|
+
],
|
|
102
|
+
"temperature": 0.7,
|
|
103
|
+
"max_tokens": 100
|
|
104
|
+
}'
|
|
105
|
+
|
|
106
|
+
# Text completion
|
|
107
|
+
curl http://localhost:9000/v1/completions \
|
|
108
|
+
-H "Content-Type: application/json" \
|
|
109
|
+
-d '{
|
|
110
|
+
"prompt": "Once upon a time",
|
|
111
|
+
"max_tokens": 50
|
|
112
|
+
}'
|
|
113
|
+
|
|
114
|
+
# Get embeddings
|
|
115
|
+
curl http://localhost:9000/v1/embeddings \
|
|
116
|
+
-H "Content-Type: application/json" \
|
|
117
|
+
-d '{
|
|
118
|
+
"input": "Hello world"
|
|
119
|
+
}'
|
|
120
|
+
|
|
121
|
+
# Health check
|
|
122
|
+
curl http://localhost:9000/health
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
The server is fully compatible with OpenAI's API format, so you can use it with any OpenAI-compatible client library.
|
|
126
|
+
|
|
127
|
+
### Example Output
|
|
128
|
+
|
|
129
|
+
Starting a server:
|
|
130
|
+
```
|
|
131
|
+
$ llamacpp server start llama-3.2-3b-instruct-q4_k_m.gguf
|
|
132
|
+
|
|
133
|
+
✓ Server started successfully!
|
|
134
|
+
|
|
135
|
+
Model: llama-3.2-3b-instruct-q4_k_m.gguf
|
|
136
|
+
Port: 9000
|
|
137
|
+
Status: Running (PID 12345)
|
|
138
|
+
|
|
139
|
+
API endpoint: http://localhost:9000
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Viewing running servers:
|
|
143
|
+
```
|
|
144
|
+
$ llamacpp ps
|
|
145
|
+
|
|
146
|
+
┌────────────────────────┬────────┬─────────┬──────┬──────────┬─────────┐
|
|
147
|
+
│ Model │ Port │ Status │ PID │ Threads │ Ctx Size│
|
|
148
|
+
├────────────────────────┼────────┼─────────┼──────┼──────────┼─────────┤
|
|
149
|
+
│ llama-3.2-3b-instruct │ 9000 │ Running │ 1234 │ 8 │ 4096 │
|
|
150
|
+
│ qwen2-7b-instruct │ 9001 │ Running │ 5678 │ 8 │ 8192 │
|
|
151
|
+
└────────────────────────┴────────┴─────────┴──────┴──────────┴─────────┘
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Running interactive chat:
|
|
155
|
+
```
|
|
156
|
+
$ llamacpp server run llama-3.2-3b
|
|
157
|
+
|
|
158
|
+
Connected to llama-3.2-3b-instruct on port 9000
|
|
159
|
+
|
|
160
|
+
You: What is the capital of France?
|
|
161
|
+
Assistant: The capital of France is Paris...
|
|
162
|
+
|
|
163
|
+
You: exit
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Commands
|
|
167
|
+
|
|
168
|
+
### `llamacpp ls`
|
|
169
|
+
List all GGUF models in ~/models directory.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
llamacpp ls
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### `llamacpp search <query> [options]`
|
|
176
|
+
Search Hugging Face for GGUF models.
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
# Search for models
|
|
180
|
+
llamacpp search "llama 3.2"
|
|
181
|
+
|
|
182
|
+
# Limit results
|
|
183
|
+
llamacpp search "qwen" --limit 10
|
|
184
|
+
|
|
185
|
+
# Show files for a specific result (by index number)
|
|
186
|
+
llamacpp search "llama 3b" --files 1
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Options:**
|
|
190
|
+
- `-l, --limit <number>` - Max results to show (default: 20)
|
|
191
|
+
- `--files [number]` - Show available GGUF files for result # (e.g., --files 1)
|
|
192
|
+
|
|
193
|
+
**Tip:** Results are numbered. Use the number with `--files` to see available quantizations for that model!
|
|
194
|
+
|
|
195
|
+
### `llamacpp show <identifier> [options]`
|
|
196
|
+
Show details about a model or file without downloading.
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
# Show model info and all GGUF files
|
|
200
|
+
llamacpp show bartowski/Llama-3.2-3B-Instruct-GGUF
|
|
201
|
+
|
|
202
|
+
# Show info for a specific file
|
|
203
|
+
llamacpp show bartowski/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q4_K_M.gguf
|
|
204
|
+
|
|
205
|
+
# Or use --file flag
|
|
206
|
+
llamacpp show bartowski/Llama-3.2-3B-Instruct-GGUF --file Llama-3.2-3B-Instruct-Q4_K_M.gguf
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
**Options:**
|
|
210
|
+
- `-f, --file <filename>` - Show details for a specific file
|
|
211
|
+
|
|
212
|
+
**Displays:** Downloads, likes, license, tags, and available GGUF files
|
|
213
|
+
|
|
214
|
+
### `llamacpp pull <identifier> [options]`
|
|
215
|
+
Download a GGUF model from Hugging Face.
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Option 1: Full path (recommended)
|
|
219
|
+
llamacpp pull bartowski/Llama-3.2-3B-Instruct-GGUF/llama-3.2-3b-instruct-q4_k_m.gguf
|
|
220
|
+
|
|
221
|
+
# Option 2: Repo + --file flag
|
|
222
|
+
llamacpp pull bartowski/Llama-3.2-3B-Instruct-GGUF --file llama-3.2-3b-instruct-q4_k_m.gguf
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
**Options:**
|
|
226
|
+
- `-f, --file <filename>` - Specific GGUF file (alternative to path)
|
|
227
|
+
|
|
228
|
+
### `llamacpp rm <model>`
|
|
229
|
+
Delete a model file from ~/models (and stop any associated servers).
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
llamacpp rm llama-3.2-3b-instruct-q4_k_m.gguf
|
|
233
|
+
llamacpp rm llama-3.2 # Partial name matching
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### `llamacpp ps`
|
|
237
|
+
List all servers with status.
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
llamacpp ps
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Server Management
|
|
244
|
+
|
|
245
|
+
### `llamacpp server start <model> [options]`
|
|
246
|
+
Start a llama-server instance.
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
llamacpp server start llama-3.2-3b-instruct-q4_k_m.gguf
|
|
250
|
+
llamacpp server start llama-3.2-3b-instruct-q4_k_m.gguf --port 8080 --ctx-size 16384
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
**Options:**
|
|
254
|
+
- `-p, --port <number>` - Port number (default: auto-assign from 9000)
|
|
255
|
+
- `-t, --threads <number>` - Thread count (default: half of CPU cores)
|
|
256
|
+
- `-c, --ctx-size <number>` - Context size (default: based on model size)
|
|
257
|
+
- `-g, --gpu-layers <number>` - GPU layers (default: 60)
|
|
258
|
+
|
|
259
|
+
### `llamacpp server run <identifier>`
|
|
260
|
+
Run an interactive chat session with a model.
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
llamacpp server run llama-3.2-3b # By partial name
|
|
264
|
+
llamacpp server run 9000 # By port
|
|
265
|
+
llamacpp server run llama-3-2-3b # By server ID
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
**Identifiers:** Port number, server ID, partial model name, or model filename
|
|
269
|
+
|
|
270
|
+
Type `exit` or press Ctrl+C to end the session.
|
|
271
|
+
|
|
272
|
+
### `llamacpp server stop <identifier>`
|
|
273
|
+
Stop a running server by model name, port, or ID.
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
llamacpp server stop llama-3.2-3b
|
|
277
|
+
llamacpp server stop 9000
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### `llamacpp server rm <identifier>`
|
|
281
|
+
Remove a server configuration and launchctl service (preserves model file).
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
llamacpp server rm llama-3.2-3b
|
|
285
|
+
llamacpp server rm 9000
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### `llamacpp server logs <identifier> [options]`
|
|
289
|
+
View server logs.
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
llamacpp server logs llama-3.2-3b
|
|
293
|
+
llamacpp server logs llama-3.2-3b -f # Follow logs
|
|
294
|
+
llamacpp server logs llama-3.2-3b -n 100 # Last 100 lines
|
|
295
|
+
llamacpp server logs llama-3.2-3b --errors # Errors only
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**Options:**
|
|
299
|
+
- `-f, --follow` - Follow log output in real-time
|
|
300
|
+
- `-n, --lines <number>` - Number of lines to show (default: 50)
|
|
301
|
+
- `--errors` - Show stderr instead of stdout
|
|
302
|
+
|
|
303
|
+
## Configuration
|
|
304
|
+
|
|
305
|
+
llamacpp-cli stores its configuration in `~/.llamacpp/`:
|
|
306
|
+
|
|
307
|
+
```
|
|
308
|
+
~/.llamacpp/
|
|
309
|
+
├── config.json # Global settings
|
|
310
|
+
├── servers/ # Server configurations
|
|
311
|
+
│ └── <server-id>.json
|
|
312
|
+
└── logs/ # Server logs
|
|
313
|
+
├── <server-id>.stdout
|
|
314
|
+
└── <server-id>.stderr
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
## Smart Defaults
|
|
318
|
+
|
|
319
|
+
llamacpp-cli automatically configures optimal settings based on model size:
|
|
320
|
+
|
|
321
|
+
| Model Size | Context Size | Threads | GPU Layers |
|
|
322
|
+
|------------|--------------|---------|------------|
|
|
323
|
+
| < 1GB | 2048 | Half cores | 60 |
|
|
324
|
+
| 1-3GB | 4096 | Half cores | 60 |
|
|
325
|
+
| 3-6GB | 8192 | Half cores | 60 |
|
|
326
|
+
| > 6GB | 16384 | Half cores | 60 |
|
|
327
|
+
|
|
328
|
+
All servers include `--embeddings` and `--jinja` flags by default.
|
|
329
|
+
|
|
330
|
+
## How It Works
|
|
331
|
+
|
|
332
|
+
llamacpp-cli uses macOS launchctl to manage llama-server processes:
|
|
333
|
+
|
|
334
|
+
1. Creates a launchd plist file in `~/Library/LaunchAgents/`
|
|
335
|
+
2. Registers the service with `launchctl load`
|
|
336
|
+
3. Starts the server with `launchctl start`
|
|
337
|
+
4. Monitors status via `launchctl list` and `lsof`
|
|
338
|
+
|
|
339
|
+
Services are named `com.llama.<model-id>` and persist across reboots.
|
|
340
|
+
|
|
341
|
+
## Known Limitations
|
|
342
|
+
|
|
343
|
+
- **macOS only** - Relies on launchctl for service management (Linux/Windows support planned)
|
|
344
|
+
- **Homebrew dependency** - Requires llama.cpp installed via `brew install llama.cpp`
|
|
345
|
+
- **~/models convention** - Expects GGUF models in `~/models` directory
|
|
346
|
+
- **Single binary** - Assumes llama-server at `/opt/homebrew/bin/llama-server`
|
|
347
|
+
- **Port range** - Auto-assignment limited to 9000-9999 (configurable with `--port`)
|
|
348
|
+
|
|
349
|
+
## Troubleshooting
|
|
350
|
+
|
|
351
|
+
### Command not found
|
|
352
|
+
Make sure npm global bin directory is in your PATH:
|
|
353
|
+
```bash
|
|
354
|
+
npm config get prefix # Should be in PATH
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### llama-server not found
|
|
358
|
+
Install llama.cpp via Homebrew:
|
|
359
|
+
```bash
|
|
360
|
+
brew install llama.cpp
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### Port already in use
|
|
364
|
+
llamacpp-cli will automatically find the next available port. Or specify a custom port:
|
|
365
|
+
```bash
|
|
366
|
+
llamacpp server start model.gguf --port 8080
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
### Server won't start
|
|
370
|
+
Check the logs for errors:
|
|
371
|
+
```bash
|
|
372
|
+
llamacpp server logs <identifier> --errors
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
## Development
|
|
376
|
+
|
|
377
|
+
```bash
|
|
378
|
+
# Install dependencies
|
|
379
|
+
npm install
|
|
380
|
+
|
|
381
|
+
# Run in development mode
|
|
382
|
+
npm run dev -- ps
|
|
383
|
+
|
|
384
|
+
# Build for production
|
|
385
|
+
npm run build
|
|
386
|
+
|
|
387
|
+
# Clean build artifacts
|
|
388
|
+
npm run clean
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
### Releasing
|
|
392
|
+
|
|
393
|
+
This project uses [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for automated releases based on conventional commits.
|
|
394
|
+
|
|
395
|
+
**Commit Message Format:**
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
# Features (bumps minor version)
|
|
399
|
+
git commit -m "feat: add interactive chat command"
|
|
400
|
+
git commit -m "feat(search): add limit option for search results"
|
|
401
|
+
|
|
402
|
+
# Bug fixes (bumps patch version)
|
|
403
|
+
git commit -m "fix: handle port conflicts correctly"
|
|
404
|
+
git commit -m "fix(logs): stream logs without buffering"
|
|
405
|
+
|
|
406
|
+
# Breaking changes (bumps major version)
|
|
407
|
+
git commit -m "feat!: change server command structure"
|
|
408
|
+
git commit -m "feat: major refactor
|
|
409
|
+
|
|
410
|
+
BREAKING CHANGE: server commands now require 'server' prefix"
|
|
411
|
+
|
|
412
|
+
# Other types (no version bump, hidden in changelog)
|
|
413
|
+
git commit -m "chore: update dependencies"
|
|
414
|
+
git commit -m "docs: fix typo in README"
|
|
415
|
+
git commit -m "test: add unit tests for port manager"
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
**Release Commands:**
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
# Automatic version bump based on commits
|
|
422
|
+
npm run release
|
|
423
|
+
|
|
424
|
+
# Force specific version bump
|
|
425
|
+
npm run release:patch # 1.0.0 → 1.0.1
|
|
426
|
+
npm run release:minor # 1.0.0 → 1.1.0
|
|
427
|
+
npm run release:major # 1.0.0 → 2.0.0
|
|
428
|
+
|
|
429
|
+
# First release (doesn't bump version, just tags)
|
|
430
|
+
npm run release:first
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
**What happens during release:**
|
|
434
|
+
|
|
435
|
+
1. Analyzes commits since last release
|
|
436
|
+
2. Determines version bump (feat = minor, fix = patch, BREAKING CHANGE = major)
|
|
437
|
+
3. Updates `package.json` version
|
|
438
|
+
4. Generates/updates `CHANGELOG.md`
|
|
439
|
+
5. Creates git commit: `chore(release): v1.2.3`
|
|
440
|
+
6. Creates git tag: `v1.2.3`
|
|
441
|
+
7. Pushes tags to GitHub
|
|
442
|
+
8. Publishes to npm with `--access public`
|
|
443
|
+
|
|
444
|
+
## Contributing
|
|
445
|
+
|
|
446
|
+
Contributions are welcome! If you'd like to contribute:
|
|
447
|
+
|
|
448
|
+
1. **Open an issue first** for major changes to discuss the approach
|
|
449
|
+
2. Fork the repository
|
|
450
|
+
3. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
451
|
+
4. Make your changes and test with `npm run dev`
|
|
452
|
+
5. **Commit using conventional commits** (see [Releasing](#releasing) section)
|
|
453
|
+
- `feat:` for new features
|
|
454
|
+
- `fix:` for bug fixes
|
|
455
|
+
- `docs:` for documentation
|
|
456
|
+
- `chore:` for maintenance
|
|
457
|
+
6. Push to the branch (`git push origin feature/amazing-feature`)
|
|
458
|
+
7. Open a Pull Request
|
|
459
|
+
|
|
460
|
+
### Development Tips
|
|
461
|
+
|
|
462
|
+
- Use `npm run dev -- <command>` to test commands without building
|
|
463
|
+
- Check logs with `llamacpp server logs <server> --errors` when debugging
|
|
464
|
+
- Test launchctl integration with `launchctl list | grep com.llama`
|
|
465
|
+
- All server configs are in `~/.llamacpp/servers/`
|
|
466
|
+
- Test interactive chat with `npm run dev -- server run <model>`
|
|
467
|
+
|
|
468
|
+
## Acknowledgments
|
|
469
|
+
|
|
470
|
+
Built on top of the excellent [llama.cpp](https://github.com/ggerganov/llama.cpp) project by Georgi Gerganov and contributors.
|
|
471
|
+
|
|
472
|
+
## License
|
|
473
|
+
|
|
474
|
+
MIT
|
package/bin/llamacpp
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// CLI entry point - runs the TypeScript CLI directly with tsx in development
|
|
4
|
+
// or the compiled JS in production
|
|
5
|
+
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const { spawn } = require('child_process');
|
|
8
|
+
|
|
9
|
+
const isDevelopment = process.env.NODE_ENV !== 'production';
|
|
10
|
+
const projectRoot = path.join(__dirname, '..');
|
|
11
|
+
|
|
12
|
+
if (isDevelopment) {
|
|
13
|
+
// Development: use tsx to run TypeScript directly
|
|
14
|
+
const tsxPath = path.join(projectRoot, 'node_modules', '.bin', 'tsx');
|
|
15
|
+
const cliPath = path.join(projectRoot, 'src', 'cli.ts');
|
|
16
|
+
|
|
17
|
+
const child = spawn(tsxPath, [cliPath, ...process.argv.slice(2)], {
|
|
18
|
+
stdio: 'inherit',
|
|
19
|
+
cwd: projectRoot,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
child.on('exit', (code) => process.exit(code || 0));
|
|
23
|
+
} else {
|
|
24
|
+
// Production: run compiled JavaScript
|
|
25
|
+
require(path.join(projectRoot, 'dist', 'cli.js'));
|
|
26
|
+
}
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|