@appkit/llamacpp-cli 1.8.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/README.md +249 -40
- package/dist/cli.js +154 -10
- package/dist/cli.js.map +1 -1
- package/dist/commands/completion.d.ts +9 -0
- package/dist/commands/completion.d.ts.map +1 -0
- package/dist/commands/completion.js +83 -0
- package/dist/commands/completion.js.map +1 -0
- package/dist/commands/monitor.js +1 -1
- package/dist/commands/monitor.js.map +1 -1
- package/dist/commands/ps.d.ts +1 -3
- package/dist/commands/ps.d.ts.map +1 -1
- package/dist/commands/ps.js +36 -115
- package/dist/commands/ps.js.map +1 -1
- package/dist/commands/router/config.d.ts +11 -0
- package/dist/commands/router/config.d.ts.map +1 -0
- package/dist/commands/router/config.js +100 -0
- package/dist/commands/router/config.js.map +1 -0
- package/dist/commands/router/logs.d.ts +12 -0
- package/dist/commands/router/logs.d.ts.map +1 -0
- package/dist/commands/router/logs.js +238 -0
- package/dist/commands/router/logs.js.map +1 -0
- package/dist/commands/router/restart.d.ts +2 -0
- package/dist/commands/router/restart.d.ts.map +1 -0
- package/dist/commands/router/restart.js +39 -0
- package/dist/commands/router/restart.js.map +1 -0
- package/dist/commands/router/start.d.ts +2 -0
- package/dist/commands/router/start.d.ts.map +1 -0
- package/dist/commands/router/start.js +60 -0
- package/dist/commands/router/start.js.map +1 -0
- package/dist/commands/router/status.d.ts +2 -0
- package/dist/commands/router/status.d.ts.map +1 -0
- package/dist/commands/router/status.js +116 -0
- package/dist/commands/router/status.js.map +1 -0
- package/dist/commands/router/stop.d.ts +2 -0
- package/dist/commands/router/stop.d.ts.map +1 -0
- package/dist/commands/router/stop.js +36 -0
- package/dist/commands/router/stop.js.map +1 -0
- package/dist/commands/tui.d.ts +2 -0
- package/dist/commands/tui.d.ts.map +1 -0
- package/dist/commands/tui.js +27 -0
- package/dist/commands/tui.js.map +1 -0
- package/dist/lib/completion.d.ts +5 -0
- package/dist/lib/completion.d.ts.map +1 -0
- package/dist/lib/completion.js +195 -0
- package/dist/lib/completion.js.map +1 -0
- package/dist/lib/model-downloader.d.ts +5 -1
- package/dist/lib/model-downloader.d.ts.map +1 -1
- package/dist/lib/model-downloader.js +53 -20
- package/dist/lib/model-downloader.js.map +1 -1
- package/dist/lib/router-logger.d.ts +61 -0
- package/dist/lib/router-logger.d.ts.map +1 -0
- package/dist/lib/router-logger.js +200 -0
- package/dist/lib/router-logger.js.map +1 -0
- package/dist/lib/router-manager.d.ts +103 -0
- package/dist/lib/router-manager.d.ts.map +1 -0
- package/dist/lib/router-manager.js +394 -0
- package/dist/lib/router-manager.js.map +1 -0
- package/dist/lib/router-server.d.ts +61 -0
- package/dist/lib/router-server.d.ts.map +1 -0
- package/dist/lib/router-server.js +485 -0
- package/dist/lib/router-server.js.map +1 -0
- package/dist/tui/ConfigApp.d.ts +7 -0
- package/dist/tui/ConfigApp.d.ts.map +1 -0
- package/dist/tui/ConfigApp.js +1002 -0
- package/dist/tui/ConfigApp.js.map +1 -0
- package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -1
- package/dist/tui/HistoricalMonitorApp.js +85 -49
- package/dist/tui/HistoricalMonitorApp.js.map +1 -1
- package/dist/tui/ModelsApp.d.ts +7 -0
- package/dist/tui/ModelsApp.d.ts.map +1 -0
- package/dist/tui/ModelsApp.js +362 -0
- package/dist/tui/ModelsApp.js.map +1 -0
- package/dist/tui/MultiServerMonitorApp.d.ts +6 -1
- package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
- package/dist/tui/MultiServerMonitorApp.js +1038 -122
- package/dist/tui/MultiServerMonitorApp.js.map +1 -1
- package/dist/tui/RootNavigator.d.ts +7 -0
- package/dist/tui/RootNavigator.d.ts.map +1 -0
- package/dist/tui/RootNavigator.js +55 -0
- package/dist/tui/RootNavigator.js.map +1 -0
- package/dist/tui/SearchApp.d.ts +6 -0
- package/dist/tui/SearchApp.d.ts.map +1 -0
- package/dist/tui/SearchApp.js +451 -0
- package/dist/tui/SearchApp.js.map +1 -0
- package/dist/tui/SplashScreen.d.ts +16 -0
- package/dist/tui/SplashScreen.d.ts.map +1 -0
- package/dist/tui/SplashScreen.js +129 -0
- package/dist/tui/SplashScreen.js.map +1 -0
- package/dist/types/router-config.d.ts +19 -0
- package/dist/types/router-config.d.ts.map +1 -0
- package/dist/types/router-config.js +3 -0
- package/dist/types/router-config.js.map +1 -0
- package/package.json +1 -1
- package/src/cli.ts +121 -10
- package/src/commands/monitor.ts +1 -1
- package/src/commands/ps.ts +44 -133
- package/src/commands/router/config.ts +116 -0
- package/src/commands/router/logs.ts +256 -0
- package/src/commands/router/restart.ts +36 -0
- package/src/commands/router/start.ts +60 -0
- package/src/commands/router/status.ts +119 -0
- package/src/commands/router/stop.ts +33 -0
- package/src/commands/tui.ts +25 -0
- package/src/lib/model-downloader.ts +57 -20
- package/src/lib/router-logger.ts +201 -0
- package/src/lib/router-manager.ts +414 -0
- package/src/lib/router-server.ts +538 -0
- package/src/tui/ConfigApp.ts +1085 -0
- package/src/tui/HistoricalMonitorApp.ts +88 -49
- package/src/tui/ModelsApp.ts +368 -0
- package/src/tui/MultiServerMonitorApp.ts +1163 -122
- package/src/tui/RootNavigator.ts +74 -0
- package/src/tui/SearchApp.ts +511 -0
- package/src/tui/SplashScreen.ts +149 -0
- package/src/types/router-config.ts +25 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,64 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [1.10.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.9.0...v1.10.0) (2026-02-02)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* add Config screen TUI for editing server configuration ([0555eb8](https://github.com/appkitstudio/llamacpp-cli/commit/0555eb87b25fd9c0496ddf04bf59433ae0e9dc48))
|
|
11
|
+
* add Models management TUI for displaying and deleting models ([45b312f](https://github.com/appkitstudio/llamacpp-cli/commit/45b312f160e476a8effd0ad92a7f88e2b533f9c5))
|
|
12
|
+
* add splash screen with ASCII logo on TUI startup ([9a2bbba](https://github.com/appkitstudio/llamacpp-cli/commit/9a2bbbac80c8736548a71d38230a69b3745198ea))
|
|
13
|
+
* add Start/Stop controls and UI improvements to TUI ([2409c8e](https://github.com/appkitstudio/llamacpp-cli/commit/2409c8e283f37050f2b4faccbaef82bf6db2ac39))
|
|
14
|
+
* enhance TUI with interactive dashboard and deprecate old commands ([464c224](https://github.com/appkitstudio/llamacpp-cli/commit/464c224aefc2592442f80e72a076ec792a9d36dc))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
### Code Refactoring
|
|
18
|
+
|
|
19
|
+
* simplify TUI code for clarity and maintainability ([2b50d52](https://github.com/appkitstudio/llamacpp-cli/commit/2b50d5288b2ecd5bed4619518f50d74bd43c6b76))
|
|
20
|
+
|
|
21
|
+
## [1.9.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.8.0...v1.9.0) (2026-01-27)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
### Features
|
|
25
|
+
|
|
26
|
+
* add unified router endpoint for automatic model routing ([6db0892](https://github.com/appkitstudio/llamacpp-cli/commit/6db0892adca753c021c7d9be6b69231c983170a1))
|
|
27
|
+
|
|
28
|
+
## [1.9.0] (2026-01-26)
|
|
29
|
+
|
|
30
|
+
### Features
|
|
31
|
+
|
|
32
|
+
* **router**: add unified router endpoint for automatic model routing ([#TBD])
|
|
33
|
+
|
|
34
|
+
The router provides a single OpenAI-compatible endpoint that automatically routes requests to the correct backend server based on model name. Perfect for LLM clients that don't support multiple endpoints.
|
|
35
|
+
|
|
36
|
+
**Key features:**
|
|
37
|
+
- Single endpoint (default: http://localhost:9100) for all models
|
|
38
|
+
- Automatic routing based on `model` field in requests
|
|
39
|
+
- Zero-config model discovery from running servers
|
|
40
|
+
- Aggregated `/v1/models` endpoint
|
|
41
|
+
- Native Node.js HTTP proxy (no external dependencies)
|
|
42
|
+
- Streaming support for chat completions
|
|
43
|
+
- Comprehensive error handling (404, 503, 502, 504)
|
|
44
|
+
|
|
45
|
+
**Commands:**
|
|
46
|
+
- `llamacpp router start` - Start router service
|
|
47
|
+
- `llamacpp router stop` - Stop router service
|
|
48
|
+
- `llamacpp router status` - Show status and available models
|
|
49
|
+
- `llamacpp router restart` - Restart router
|
|
50
|
+
- `llamacpp router config` - Update configuration (port, host, timeout, health-interval)
|
|
51
|
+
|
|
52
|
+
**Usage:**
|
|
53
|
+
```python
|
|
54
|
+
from openai import OpenAI
|
|
55
|
+
|
|
56
|
+
client = OpenAI(base_url="http://localhost:9100/v1", api_key="not-needed")
|
|
57
|
+
response = client.chat.completions.create(
|
|
58
|
+
model="llama-3.2-3b-instruct-q4_k_m.gguf",
|
|
59
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
60
|
+
)
|
|
61
|
+
```
|
|
62
|
+
|
|
5
63
|
## [1.7.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.6.0...v1.7.0) (2026-01-23)
|
|
6
64
|
|
|
7
65
|
|
package/README.md
CHANGED
|
@@ -12,7 +12,9 @@ CLI tool to manage local llama.cpp servers on macOS. Provides an Ollama-like exp
|
|
|
12
12
|
## Features
|
|
13
13
|
|
|
14
14
|
- 🚀 **Easy server management** - Start, stop, and monitor llama.cpp servers
|
|
15
|
+
- 🔀 **Unified router** - Single OpenAI-compatible endpoint for all models with automatic routing and request logging
|
|
15
16
|
- 🤖 **Model downloads** - Pull GGUF models from Hugging Face
|
|
17
|
+
- 📦 **Models Management TUI** - Browse, search, and delete models without leaving the TUI. Search HuggingFace, download with progress tracking, manage local models
|
|
16
18
|
- ⚙️ **Smart defaults** - Auto-configure threads, context size, and GPU layers based on model size
|
|
17
19
|
- 🔌 **Auto port assignment** - Automatically find available ports (9000-9999)
|
|
18
20
|
- 📊 **Real-time monitoring TUI** - Multi-server dashboard with drill-down details, live GPU/CPU/memory metrics, token generation speeds, and animated loading states
|
|
@@ -74,18 +76,16 @@ llamacpp ls
|
|
|
74
76
|
# Create and start a server (auto-assigns port, uses smart defaults)
|
|
75
77
|
llamacpp server create llama-3.2-3b-instruct-q4_k_m.gguf
|
|
76
78
|
|
|
77
|
-
#
|
|
79
|
+
# Open interactive TUI dashboard (multi-server monitoring)
|
|
80
|
+
llamacpp
|
|
81
|
+
# Press 'M' to access Models Management TUI
|
|
82
|
+
|
|
83
|
+
# List all servers (static table)
|
|
78
84
|
llamacpp ps
|
|
79
85
|
|
|
80
86
|
# View log sizes for all servers
|
|
81
87
|
llamacpp logs
|
|
82
88
|
|
|
83
|
-
# Monitor all servers (multi-server dashboard)
|
|
84
|
-
llamacpp server monitor
|
|
85
|
-
|
|
86
|
-
# Or monitor a specific server
|
|
87
|
-
llamacpp server monitor llama-3.2-3b
|
|
88
|
-
|
|
89
89
|
# Chat with your model interactively
|
|
90
90
|
llamacpp server run llama-3.2-3b
|
|
91
91
|
|
|
@@ -140,6 +140,166 @@ curl http://localhost:9000/health
|
|
|
140
140
|
|
|
141
141
|
The server is fully compatible with OpenAI's API format, so you can use it with any OpenAI-compatible client library.
|
|
142
142
|
|
|
143
|
+
## Router (Unified Endpoint)
|
|
144
|
+
|
|
145
|
+
The router provides a single OpenAI-compatible endpoint that automatically routes requests to the correct backend server based on the model name. This is perfect for LLM clients that don't support multiple endpoints.
|
|
146
|
+
|
|
147
|
+
### Quick Start
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Start the router (default port: 9100)
|
|
151
|
+
llamacpp router start
|
|
152
|
+
|
|
153
|
+
# Configure your LLM client to use http://localhost:9100
|
|
154
|
+
# The router automatically routes requests to the correct server based on model name
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Commands
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
llamacpp router start # Start the router service
|
|
161
|
+
llamacpp router stop # Stop the router service
|
|
162
|
+
llamacpp router status # Show router status and available models
|
|
163
|
+
llamacpp router restart # Restart the router
|
|
164
|
+
llamacpp router config # Update router settings (--port, --host, --timeout, --health-interval, --verbose)
|
|
165
|
+
llamacpp router logs # View router logs (with --follow, --verbose, --clear options)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Usage Example
|
|
169
|
+
|
|
170
|
+
The router acts as a single endpoint for all your models:
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from openai import OpenAI
|
|
174
|
+
|
|
175
|
+
client = OpenAI(
|
|
176
|
+
base_url="http://localhost:9100/v1",
|
|
177
|
+
api_key="not-needed" # API key not required for local servers
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Router automatically routes to the correct server based on model name
|
|
181
|
+
response = client.chat.completions.create(
|
|
182
|
+
model="llama-3.2-3b-instruct-q4_k_m.gguf",
|
|
183
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### Supported Endpoints
|
|
188
|
+
|
|
189
|
+
- `POST /v1/chat/completions` - Chat completions (routes to correct backend)
|
|
190
|
+
- `POST /v1/embeddings` - Text embeddings (routes to correct backend)
|
|
191
|
+
- `GET /v1/models` - List all available models from running servers
|
|
192
|
+
- `GET /health` - Router health check
|
|
193
|
+
|
|
194
|
+
### Configuration
|
|
195
|
+
|
|
196
|
+
The router can be configured with:
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
# Change port
|
|
200
|
+
llamacpp router config --port 9200 --restart
|
|
201
|
+
|
|
202
|
+
# Update request timeout (ms)
|
|
203
|
+
llamacpp router config --timeout 60000 --restart
|
|
204
|
+
|
|
205
|
+
# Update health check interval (ms)
|
|
206
|
+
llamacpp router config --health-interval 3000 --restart
|
|
207
|
+
|
|
208
|
+
# Change bind address (for remote access)
|
|
209
|
+
llamacpp router config --host 0.0.0.0 --restart
|
|
210
|
+
|
|
211
|
+
# Enable verbose logging (saves detailed JSON logs)
|
|
212
|
+
llamacpp router config --verbose true --restart
|
|
213
|
+
|
|
214
|
+
# Disable verbose logging
|
|
215
|
+
llamacpp router config --verbose false --restart
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
**Note:** Changes require a restart to take effect. Use `--restart` flag to apply immediately.
|
|
219
|
+
|
|
220
|
+
### Logging
|
|
221
|
+
|
|
222
|
+
The router uses separate log streams for different purposes (nginx-style):
|
|
223
|
+
|
|
224
|
+
| Log File | Purpose | Content |
|
|
225
|
+
|----------|---------|---------|
|
|
226
|
+
| `router.stdout` | Request activity | Model routing, status codes, timing, prompts |
|
|
227
|
+
| `router.stderr` | System messages | Startup, shutdown, errors, proxy failures |
|
|
228
|
+
| `router.log` | Structured JSON | Detailed entries for programmatic parsing (verbose mode) |
|
|
229
|
+
|
|
230
|
+
**View recent logs:**
|
|
231
|
+
```bash
|
|
232
|
+
# Show activity logs (default - stdout)
|
|
233
|
+
llamacpp router logs
|
|
234
|
+
|
|
235
|
+
# Show system logs (errors, startup messages)
|
|
236
|
+
llamacpp router logs --stderr
|
|
237
|
+
|
|
238
|
+
# Follow activity in real-time
|
|
239
|
+
llamacpp router logs --follow
|
|
240
|
+
|
|
241
|
+
# Show last 10 lines
|
|
242
|
+
llamacpp router logs --lines 10
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
**Log formats:**
|
|
246
|
+
|
|
247
|
+
Activity logs (stdout):
|
|
248
|
+
```
|
|
249
|
+
200 POST /v1/chat/completions → llama-3.2-3b-instruct-q4_k_m.gguf (127.0.0.1:9001) 1234ms | "What is..."
|
|
250
|
+
404 POST /v1/chat/completions → unknown-model 3ms | "test" | Error: No server found
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
System logs (stderr):
|
|
254
|
+
```
|
|
255
|
+
[Router] Listening on http://127.0.0.1:9100
|
|
256
|
+
[Router] PID: 12345
|
|
257
|
+
[Router] Proxy request failed: ECONNREFUSED
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
Verbose JSON logs (router.log) - enable with `--verbose true`:
|
|
261
|
+
```bash
|
|
262
|
+
llamacpp router logs --verbose
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
**Log management:**
|
|
266
|
+
```bash
|
|
267
|
+
# Clear activity log
|
|
268
|
+
llamacpp router logs --clear
|
|
269
|
+
|
|
270
|
+
# Clear all router logs (stdout, stderr, verbose)
|
|
271
|
+
llamacpp router logs --clear-all
|
|
272
|
+
|
|
273
|
+
# Rotate log files with timestamp
|
|
274
|
+
llamacpp router logs --rotate
|
|
275
|
+
|
|
276
|
+
# View system logs instead of activity
|
|
277
|
+
llamacpp router logs --stderr
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**What's logged (activity):**
|
|
281
|
+
- ✅ Model name used
|
|
282
|
+
- ✅ HTTP status code (color-coded)
|
|
283
|
+
- ✅ Request duration (ms)
|
|
284
|
+
- ✅ Backend server (host:port)
|
|
285
|
+
- ✅ First 50 chars of prompt
|
|
286
|
+
- ✅ Error messages (if failed)
|
|
287
|
+
|
|
288
|
+
**Verbose mode benefits:**
|
|
289
|
+
- Detailed JSON logs for LLM/script parsing
|
|
290
|
+
- Stored in `~/.llamacpp/logs/router.log`
|
|
291
|
+
- Automatic rotation when exceeding 100MB
|
|
292
|
+
- Machine-readable format with timestamps
|
|
293
|
+
|
|
294
|
+
### How It Works
|
|
295
|
+
|
|
296
|
+
1. Router receives request with `model` field
|
|
297
|
+
2. Finds running server configured for that model
|
|
298
|
+
3. Proxies request to backend server
|
|
299
|
+
4. Streams response back to client
|
|
300
|
+
|
|
301
|
+
If the requested model's server is not running, the router returns a 503 error with a helpful message.
|
|
302
|
+
|
|
143
303
|
### Example Output
|
|
144
304
|
|
|
145
305
|
Creating a server:
|
|
@@ -189,6 +349,15 @@ Assistant: The capital of France is Paris...
|
|
|
189
349
|
|
|
190
350
|
## Commands
|
|
191
351
|
|
|
352
|
+
### `llamacpp`
|
|
353
|
+
Launch the interactive TUI dashboard for monitoring and managing servers.
|
|
354
|
+
|
|
355
|
+
```bash
|
|
356
|
+
llamacpp
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
See [Interactive TUI Dashboard](#interactive-tui-dashboard) for full details.
|
|
360
|
+
|
|
192
361
|
### `llamacpp ls`
|
|
193
362
|
List all GGUF models in ~/models directory.
|
|
194
363
|
|
|
@@ -307,6 +476,47 @@ llamacpp logs --rotate
|
|
|
307
476
|
|
|
308
477
|
**Use case:** Quickly see which servers are accumulating large logs, or clean up all logs at once.
|
|
309
478
|
|
|
479
|
+
## Models Management TUI
|
|
480
|
+
|
|
481
|
+
The Models Management TUI is accessible by pressing `M` from the `llamacpp` list view. It provides a full-featured interface for managing local models and searching/downloading new ones.
|
|
482
|
+
|
|
483
|
+
**Features:**
|
|
484
|
+
- **Browse local models** - View all GGUF files with size, modification date, and server usage
|
|
485
|
+
- **Delete models** - Remove models with automatic cleanup of associated servers
|
|
486
|
+
- **Search HuggingFace** - Find and browse models from Hugging Face repository
|
|
487
|
+
- **Download with progress** - Real-time progress tracking for model downloads
|
|
488
|
+
- **Seamless navigation** - Switch between monitoring and models management
|
|
489
|
+
|
|
490
|
+
**Quick Access:**
|
|
491
|
+
```bash
|
|
492
|
+
# Launch TUI and press 'M' to open Models Management
|
|
493
|
+
llamacpp
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**Models View:**
|
|
497
|
+
- View all installed models in scrollable table
|
|
498
|
+
- See which servers are using each model
|
|
499
|
+
- Color-coded status (green = safe to delete, yellow/gray = servers using)
|
|
500
|
+
- Delete models with Enter or D key
|
|
501
|
+
- Cascade deletion: automatically removes associated servers
|
|
502
|
+
|
|
503
|
+
**Search View (press 'S' from Models view):**
|
|
504
|
+
- Search HuggingFace models by name
|
|
505
|
+
- Browse search results with download counts and likes
|
|
506
|
+
- Expand models to show available GGUF files
|
|
507
|
+
- Download files with real-time progress tracking
|
|
508
|
+
- Cancel downloads with ESC (cleans up partial files)
|
|
509
|
+
|
|
510
|
+
**Keyboard Controls:**
|
|
511
|
+
- **M** - Switch to Models view (from TUI list view)
|
|
512
|
+
- **↑/↓** or **k/j** - Navigate lists
|
|
513
|
+
- **Enter** - Select/download/delete
|
|
514
|
+
- **S** - Open search view (from models view)
|
|
515
|
+
- **/** or **I** - Focus search input (in search view)
|
|
516
|
+
- **R** - Refresh view
|
|
517
|
+
- **ESC** - Back/cancel
|
|
518
|
+
- **Q** - Quit
|
|
519
|
+
|
|
310
520
|
## Server Management
|
|
311
521
|
|
|
312
522
|
### `llamacpp server create <model> [options]`
|
|
@@ -523,30 +733,23 @@ The compact format shows one line per HTTP request and includes:
|
|
|
523
733
|
|
|
524
734
|
Use `--http` to see full request/response JSON, or `--verbose` option to see all internal server logs.
|
|
525
735
|
|
|
526
|
-
|
|
527
|
-
Real-time monitoring TUI showing server metrics, GPU/CPU usage, and active inference slots.
|
|
736
|
+
## Interactive TUI Dashboard
|
|
528
737
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
**Two Modes:**
|
|
738
|
+
The main way to monitor and manage servers is through the interactive TUI dashboard, launched by running `llamacpp` with no arguments.
|
|
532
739
|
|
|
533
|
-
**1. Multi-Server Dashboard (no identifier):**
|
|
534
740
|
```bash
|
|
535
|
-
llamacpp
|
|
741
|
+
llamacpp
|
|
536
742
|
```
|
|
537
|
-
Shows overview of all servers with system resources. Use arrow keys (↑/↓) or vim keys (k/j) to navigate, then press Enter to view server details.
|
|
538
743
|
|
|
539
|
-
|
|
540
|
-
```bash
|
|
541
|
-
# Monitor by partial name
|
|
542
|
-
llamacpp server monitor llama-3.2-3b
|
|
543
|
-
|
|
544
|
-
# Monitor by port
|
|
545
|
-
llamacpp server monitor 9000
|
|
744
|
+

|
|
546
745
|
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
746
|
+
**Features:**
|
|
747
|
+
- Multi-server dashboard with real-time metrics
|
|
748
|
+
- Drill-down to single-server detail view
|
|
749
|
+
- Create, start, stop, and remove servers without leaving the TUI
|
|
750
|
+
- Edit server configuration inline
|
|
751
|
+
- Access Models Management (press `M`)
|
|
752
|
+
- Historical metrics with time-series charts
|
|
550
753
|
|
|
551
754
|
**Multi-Server Dashboard:**
|
|
552
755
|
```
|
|
@@ -571,19 +774,21 @@ llamacpp server monitor llama-3-2-3b
|
|
|
571
774
|
- **System Resources** - GPU/CPU/ANE utilization, memory usage, temperature
|
|
572
775
|
|
|
573
776
|
**Keyboard Shortcuts:**
|
|
574
|
-
- **Multi-Server
|
|
777
|
+
- **List View (Multi-Server):**
|
|
575
778
|
- `↑/↓` or `k/j` - Navigate server list
|
|
576
779
|
- `Enter` - View details for selected server
|
|
577
|
-
- `
|
|
578
|
-
- `
|
|
579
|
-
- `
|
|
580
|
-
-
|
|
581
|
-
- `Q` - Quit
|
|
582
|
-
- **Single-Server
|
|
583
|
-
- `
|
|
584
|
-
- `
|
|
585
|
-
-
|
|
586
|
-
- `
|
|
780
|
+
- `N` - Create new server
|
|
781
|
+
- `M` - Switch to Models Management
|
|
782
|
+
- `H` - View historical metrics (all servers)
|
|
783
|
+
- `ESC` - Exit TUI
|
|
784
|
+
- `Q` - Quit immediately
|
|
785
|
+
- **Detail View (Single-Server):**
|
|
786
|
+
- `S` - Start/Stop server (toggles based on status)
|
|
787
|
+
- `C` - Open configuration screen
|
|
788
|
+
- `R` - Remove server (with confirmation)
|
|
789
|
+
- `H` - View historical metrics (this server)
|
|
790
|
+
- `ESC` - Back to list view
|
|
791
|
+
- `Q` - Quit immediately
|
|
587
792
|
- **Historical View:**
|
|
588
793
|
- `H` - Toggle Hour View (Recent ↔ Hour)
|
|
589
794
|
- `ESC` - Back to live monitoring
|
|
@@ -621,7 +826,7 @@ Press `H` from any live monitoring view to see historical time-series charts. Th
|
|
|
621
826
|
|
|
622
827
|
**Data Collection:**
|
|
623
828
|
|
|
624
|
-
Historical data is automatically collected whenever you run the
|
|
829
|
+
Historical data is automatically collected whenever you run the TUI (`llamacpp`). Data is retained for 24 hours in `~/.llamacpp/history/<server-id>.json` files, then automatically pruned.
|
|
625
830
|
|
|
626
831
|
**Multi-Server Historical View:**
|
|
627
832
|
|
|
@@ -643,13 +848,15 @@ For GPU and CPU utilization metrics, install macmon:
|
|
|
643
848
|
brew install vladkens/tap/macmon
|
|
644
849
|
```
|
|
645
850
|
|
|
646
|
-
Without macmon, the
|
|
851
|
+
Without macmon, the TUI still shows:
|
|
647
852
|
- ✅ Server status and uptime
|
|
648
853
|
- ✅ Active slots and token generation speeds
|
|
649
854
|
- ✅ Memory usage (via built-in vm_stat)
|
|
650
855
|
- ❌ GPU/CPU/ANE utilization (requires macmon)
|
|
651
856
|
|
|
652
|
-
|
|
857
|
+
### Deprecated: `llamacpp server monitor`
|
|
858
|
+
|
|
859
|
+
The `llamacpp server monitor` command is deprecated. Use `llamacpp` instead to launch the TUI dashboard.
|
|
653
860
|
|
|
654
861
|
## Configuration
|
|
655
862
|
|
|
@@ -735,7 +942,9 @@ llamacpp server logs <identifier> --errors
|
|
|
735
942
|
npm install
|
|
736
943
|
|
|
737
944
|
# Run in development mode
|
|
738
|
-
npm run dev
|
|
945
|
+
npm run dev # Launch TUI
|
|
946
|
+
npm run dev -- ps # List servers (static table)
|
|
947
|
+
npm run dev -- ls # List models
|
|
739
948
|
|
|
740
949
|
# Build for production
|
|
741
950
|
npm run build
|
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
"use strict";
|
|
3
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
4
|
+
if (k2 === undefined) k2 = k;
|
|
5
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
6
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
7
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
8
|
+
}
|
|
9
|
+
Object.defineProperty(o, k2, desc);
|
|
10
|
+
}) : (function(o, m, k, k2) {
|
|
11
|
+
if (k2 === undefined) k2 = k;
|
|
12
|
+
o[k2] = m[k];
|
|
13
|
+
}));
|
|
14
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
15
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
16
|
+
}) : function(o, v) {
|
|
17
|
+
o["default"] = v;
|
|
18
|
+
});
|
|
19
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
20
|
+
var ownKeys = function(o) {
|
|
21
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
22
|
+
var ar = [];
|
|
23
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
24
|
+
return ar;
|
|
25
|
+
};
|
|
26
|
+
return ownKeys(o);
|
|
27
|
+
};
|
|
28
|
+
return function (mod) {
|
|
29
|
+
if (mod && mod.__esModule) return mod;
|
|
30
|
+
var result = {};
|
|
31
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
32
|
+
__setModuleDefault(result, mod);
|
|
33
|
+
return result;
|
|
34
|
+
};
|
|
35
|
+
})();
|
|
3
36
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
37
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
38
|
};
|
|
@@ -23,12 +56,29 @@ const server_show_1 = require("./commands/server-show");
|
|
|
23
56
|
const config_1 = require("./commands/config");
|
|
24
57
|
const config_global_1 = require("./commands/config-global");
|
|
25
58
|
const monitor_1 = require("./commands/monitor");
|
|
59
|
+
const start_2 = require("./commands/router/start");
|
|
60
|
+
const stop_2 = require("./commands/router/stop");
|
|
61
|
+
const status_1 = require("./commands/router/status");
|
|
62
|
+
const restart_1 = require("./commands/router/restart");
|
|
63
|
+
const config_2 = require("./commands/router/config");
|
|
64
|
+
const logs_2 = require("./commands/router/logs");
|
|
26
65
|
const package_json_1 = __importDefault(require("../package.json"));
|
|
27
66
|
const program = new commander_1.Command();
|
|
28
67
|
program
|
|
29
68
|
.name('llamacpp')
|
|
30
69
|
.description('CLI tool to manage local llama.cpp servers on macOS')
|
|
31
|
-
.version(package_json_1.default.version, '-v, --version', 'Output the version number')
|
|
70
|
+
.version(package_json_1.default.version, '-v, --version', 'Output the version number')
|
|
71
|
+
.action(async () => {
|
|
72
|
+
// Default action: launch TUI when no command provided
|
|
73
|
+
try {
|
|
74
|
+
const { tuiCommand } = await Promise.resolve().then(() => __importStar(require('./commands/tui')));
|
|
75
|
+
await tuiCommand();
|
|
76
|
+
}
|
|
77
|
+
catch (error) {
|
|
78
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
81
|
+
});
|
|
32
82
|
// List models
|
|
33
83
|
program
|
|
34
84
|
.command('ls')
|
|
@@ -42,14 +92,13 @@ program
|
|
|
42
92
|
process.exit(1);
|
|
43
93
|
}
|
|
44
94
|
});
|
|
45
|
-
// List
|
|
95
|
+
// List servers (static table)
|
|
46
96
|
program
|
|
47
|
-
.command('ps
|
|
48
|
-
.description('
|
|
49
|
-
.
|
|
50
|
-
.action(async (identifier, options) => {
|
|
97
|
+
.command('ps')
|
|
98
|
+
.description('List all servers with status (static table)')
|
|
99
|
+
.action(async () => {
|
|
51
100
|
try {
|
|
52
|
-
await (0, ps_1.psCommand)(
|
|
101
|
+
await (0, ps_1.psCommand)();
|
|
53
102
|
}
|
|
54
103
|
catch (error) {
|
|
55
104
|
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
@@ -293,14 +342,14 @@ server
|
|
|
293
342
|
process.exit(1);
|
|
294
343
|
}
|
|
295
344
|
});
|
|
296
|
-
// Monitor server (deprecated - redirects to
|
|
345
|
+
// Monitor server (deprecated - redirects to TUI)
|
|
297
346
|
server
|
|
298
347
|
.command('monitor [identifier]')
|
|
299
|
-
.description('Monitor server with real-time metrics TUI (deprecated: use "llamacpp
|
|
348
|
+
.description('Monitor server with real-time metrics TUI (deprecated: use "llamacpp" instead)')
|
|
300
349
|
.action(async (identifier) => {
|
|
301
350
|
try {
|
|
302
351
|
console.log(chalk_1.default.yellow('⚠️ The "monitor" command is deprecated and will be removed in a future version.'));
|
|
303
|
-
console.log(chalk_1.default.dim(' Please use "llamacpp
|
|
352
|
+
console.log(chalk_1.default.dim(' Please use "llamacpp" instead for the same functionality.\n'));
|
|
304
353
|
await (0, monitor_1.monitorCommand)(identifier);
|
|
305
354
|
}
|
|
306
355
|
catch (error) {
|
|
@@ -308,6 +357,101 @@ server
|
|
|
308
357
|
process.exit(1);
|
|
309
358
|
}
|
|
310
359
|
});
|
|
360
|
+
// Router management commands
|
|
361
|
+
const router = program
|
|
362
|
+
.command('router')
|
|
363
|
+
.description('Manage the unified router endpoint');
|
|
364
|
+
// Start router
|
|
365
|
+
router
|
|
366
|
+
.command('start')
|
|
367
|
+
.description('Start the router service')
|
|
368
|
+
.action(async () => {
|
|
369
|
+
try {
|
|
370
|
+
await (0, start_2.routerStartCommand)();
|
|
371
|
+
}
|
|
372
|
+
catch (error) {
|
|
373
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
374
|
+
process.exit(1);
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
// Stop router
|
|
378
|
+
router
|
|
379
|
+
.command('stop')
|
|
380
|
+
.description('Stop the router service')
|
|
381
|
+
.action(async () => {
|
|
382
|
+
try {
|
|
383
|
+
await (0, stop_2.routerStopCommand)();
|
|
384
|
+
}
|
|
385
|
+
catch (error) {
|
|
386
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
387
|
+
process.exit(1);
|
|
388
|
+
}
|
|
389
|
+
});
|
|
390
|
+
// Show router status
|
|
391
|
+
router
|
|
392
|
+
.command('status')
|
|
393
|
+
.description('Show router status and configuration')
|
|
394
|
+
.action(async () => {
|
|
395
|
+
try {
|
|
396
|
+
await (0, status_1.routerStatusCommand)();
|
|
397
|
+
}
|
|
398
|
+
catch (error) {
|
|
399
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
400
|
+
process.exit(1);
|
|
401
|
+
}
|
|
402
|
+
});
|
|
403
|
+
// Restart router
|
|
404
|
+
router
|
|
405
|
+
.command('restart')
|
|
406
|
+
.description('Restart the router service')
|
|
407
|
+
.action(async () => {
|
|
408
|
+
try {
|
|
409
|
+
await (0, restart_1.routerRestartCommand)();
|
|
410
|
+
}
|
|
411
|
+
catch (error) {
|
|
412
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
413
|
+
process.exit(1);
|
|
414
|
+
}
|
|
415
|
+
});
|
|
416
|
+
// Configure router
|
|
417
|
+
router
|
|
418
|
+
.command('config')
|
|
419
|
+
.description('Update router configuration')
|
|
420
|
+
.option('-p, --port <number>', 'Update port number', parseInt)
|
|
421
|
+
.option('-h, --host <address>', 'Update bind address')
|
|
422
|
+
.option('--timeout <ms>', 'Update request timeout (milliseconds)', parseInt)
|
|
423
|
+
.option('--health-interval <ms>', 'Update health check interval (milliseconds)', parseInt)
|
|
424
|
+
.option('-v, --verbose [boolean]', 'Enable/disable verbose logging to file (true/false)', (val) => val === 'true' || val === '1')
|
|
425
|
+
.option('-r, --restart', 'Automatically restart router if running')
|
|
426
|
+
.action(async (options) => {
|
|
427
|
+
try {
|
|
428
|
+
await (0, config_2.routerConfigCommand)(options);
|
|
429
|
+
}
|
|
430
|
+
catch (error) {
|
|
431
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
432
|
+
process.exit(1);
|
|
433
|
+
}
|
|
434
|
+
});
|
|
435
|
+
// Router logs
|
|
436
|
+
router
|
|
437
|
+
.command('logs')
|
|
438
|
+
.description('View router logs')
|
|
439
|
+
.option('-f, --follow', 'Follow logs in real-time (like tail -f)')
|
|
440
|
+
.option('-n, --lines <number>', 'Number of lines to show (default: 50)', parseInt)
|
|
441
|
+
.option('--stderr', 'Show system logs (stderr) instead of activity logs (stdout)')
|
|
442
|
+
.option('-v, --verbose', 'Show verbose JSON log file (if enabled)')
|
|
443
|
+
.option('--clear', 'Clear the log file')
|
|
444
|
+
.option('--rotate', 'Rotate the log file with timestamp')
|
|
445
|
+
.option('--clear-all', 'Clear all router logs (activity, system, verbose)')
|
|
446
|
+
.action(async (options) => {
|
|
447
|
+
try {
|
|
448
|
+
await (0, logs_2.routerLogsCommand)(options);
|
|
449
|
+
}
|
|
450
|
+
catch (error) {
|
|
451
|
+
console.error(chalk_1.default.red('❌ Error:'), error.message);
|
|
452
|
+
process.exit(1);
|
|
453
|
+
}
|
|
454
|
+
});
|
|
311
455
|
// Parse arguments
|
|
312
456
|
program.parse();
|
|
313
457
|
//# sourceMappingURL=cli.js.map
|