@appkit/llamacpp-cli 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CHANGELOG.md +58 -0
  2. package/README.md +249 -40
  3. package/dist/cli.js +154 -10
  4. package/dist/cli.js.map +1 -1
  5. package/dist/commands/completion.d.ts +9 -0
  6. package/dist/commands/completion.d.ts.map +1 -0
  7. package/dist/commands/completion.js +83 -0
  8. package/dist/commands/completion.js.map +1 -0
  9. package/dist/commands/monitor.js +1 -1
  10. package/dist/commands/monitor.js.map +1 -1
  11. package/dist/commands/ps.d.ts +1 -3
  12. package/dist/commands/ps.d.ts.map +1 -1
  13. package/dist/commands/ps.js +36 -115
  14. package/dist/commands/ps.js.map +1 -1
  15. package/dist/commands/router/config.d.ts +11 -0
  16. package/dist/commands/router/config.d.ts.map +1 -0
  17. package/dist/commands/router/config.js +100 -0
  18. package/dist/commands/router/config.js.map +1 -0
  19. package/dist/commands/router/logs.d.ts +12 -0
  20. package/dist/commands/router/logs.d.ts.map +1 -0
  21. package/dist/commands/router/logs.js +238 -0
  22. package/dist/commands/router/logs.js.map +1 -0
  23. package/dist/commands/router/restart.d.ts +2 -0
  24. package/dist/commands/router/restart.d.ts.map +1 -0
  25. package/dist/commands/router/restart.js +39 -0
  26. package/dist/commands/router/restart.js.map +1 -0
  27. package/dist/commands/router/start.d.ts +2 -0
  28. package/dist/commands/router/start.d.ts.map +1 -0
  29. package/dist/commands/router/start.js +60 -0
  30. package/dist/commands/router/start.js.map +1 -0
  31. package/dist/commands/router/status.d.ts +2 -0
  32. package/dist/commands/router/status.d.ts.map +1 -0
  33. package/dist/commands/router/status.js +116 -0
  34. package/dist/commands/router/status.js.map +1 -0
  35. package/dist/commands/router/stop.d.ts +2 -0
  36. package/dist/commands/router/stop.d.ts.map +1 -0
  37. package/dist/commands/router/stop.js +36 -0
  38. package/dist/commands/router/stop.js.map +1 -0
  39. package/dist/commands/tui.d.ts +2 -0
  40. package/dist/commands/tui.d.ts.map +1 -0
  41. package/dist/commands/tui.js +27 -0
  42. package/dist/commands/tui.js.map +1 -0
  43. package/dist/lib/completion.d.ts +5 -0
  44. package/dist/lib/completion.d.ts.map +1 -0
  45. package/dist/lib/completion.js +195 -0
  46. package/dist/lib/completion.js.map +1 -0
  47. package/dist/lib/model-downloader.d.ts +5 -1
  48. package/dist/lib/model-downloader.d.ts.map +1 -1
  49. package/dist/lib/model-downloader.js +53 -20
  50. package/dist/lib/model-downloader.js.map +1 -1
  51. package/dist/lib/router-logger.d.ts +61 -0
  52. package/dist/lib/router-logger.d.ts.map +1 -0
  53. package/dist/lib/router-logger.js +200 -0
  54. package/dist/lib/router-logger.js.map +1 -0
  55. package/dist/lib/router-manager.d.ts +103 -0
  56. package/dist/lib/router-manager.d.ts.map +1 -0
  57. package/dist/lib/router-manager.js +394 -0
  58. package/dist/lib/router-manager.js.map +1 -0
  59. package/dist/lib/router-server.d.ts +61 -0
  60. package/dist/lib/router-server.d.ts.map +1 -0
  61. package/dist/lib/router-server.js +485 -0
  62. package/dist/lib/router-server.js.map +1 -0
  63. package/dist/tui/ConfigApp.d.ts +7 -0
  64. package/dist/tui/ConfigApp.d.ts.map +1 -0
  65. package/dist/tui/ConfigApp.js +1002 -0
  66. package/dist/tui/ConfigApp.js.map +1 -0
  67. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -1
  68. package/dist/tui/HistoricalMonitorApp.js +85 -49
  69. package/dist/tui/HistoricalMonitorApp.js.map +1 -1
  70. package/dist/tui/ModelsApp.d.ts +7 -0
  71. package/dist/tui/ModelsApp.d.ts.map +1 -0
  72. package/dist/tui/ModelsApp.js +362 -0
  73. package/dist/tui/ModelsApp.js.map +1 -0
  74. package/dist/tui/MultiServerMonitorApp.d.ts +6 -1
  75. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
  76. package/dist/tui/MultiServerMonitorApp.js +1038 -122
  77. package/dist/tui/MultiServerMonitorApp.js.map +1 -1
  78. package/dist/tui/RootNavigator.d.ts +7 -0
  79. package/dist/tui/RootNavigator.d.ts.map +1 -0
  80. package/dist/tui/RootNavigator.js +55 -0
  81. package/dist/tui/RootNavigator.js.map +1 -0
  82. package/dist/tui/SearchApp.d.ts +6 -0
  83. package/dist/tui/SearchApp.d.ts.map +1 -0
  84. package/dist/tui/SearchApp.js +451 -0
  85. package/dist/tui/SearchApp.js.map +1 -0
  86. package/dist/tui/SplashScreen.d.ts +16 -0
  87. package/dist/tui/SplashScreen.d.ts.map +1 -0
  88. package/dist/tui/SplashScreen.js +129 -0
  89. package/dist/tui/SplashScreen.js.map +1 -0
  90. package/dist/types/router-config.d.ts +19 -0
  91. package/dist/types/router-config.d.ts.map +1 -0
  92. package/dist/types/router-config.js +3 -0
  93. package/dist/types/router-config.js.map +1 -0
  94. package/package.json +1 -1
  95. package/src/cli.ts +121 -10
  96. package/src/commands/monitor.ts +1 -1
  97. package/src/commands/ps.ts +44 -133
  98. package/src/commands/router/config.ts +116 -0
  99. package/src/commands/router/logs.ts +256 -0
  100. package/src/commands/router/restart.ts +36 -0
  101. package/src/commands/router/start.ts +60 -0
  102. package/src/commands/router/status.ts +119 -0
  103. package/src/commands/router/stop.ts +33 -0
  104. package/src/commands/tui.ts +25 -0
  105. package/src/lib/model-downloader.ts +57 -20
  106. package/src/lib/router-logger.ts +201 -0
  107. package/src/lib/router-manager.ts +414 -0
  108. package/src/lib/router-server.ts +538 -0
  109. package/src/tui/ConfigApp.ts +1085 -0
  110. package/src/tui/HistoricalMonitorApp.ts +88 -49
  111. package/src/tui/ModelsApp.ts +368 -0
  112. package/src/tui/MultiServerMonitorApp.ts +1163 -122
  113. package/src/tui/RootNavigator.ts +74 -0
  114. package/src/tui/SearchApp.ts +511 -0
  115. package/src/tui/SplashScreen.ts +149 -0
  116. package/src/types/router-config.ts +25 -0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,64 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
4
 
5
+ ## [1.10.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.9.0...v1.10.0) (2026-02-02)
6
+
7
+
8
+ ### Features
9
+
10
+ * add Config screen TUI for editing server configuration ([0555eb8](https://github.com/appkitstudio/llamacpp-cli/commit/0555eb87b25fd9c0496ddf04bf59433ae0e9dc48))
11
+ * add Models management TUI for displaying and deleting models ([45b312f](https://github.com/appkitstudio/llamacpp-cli/commit/45b312f160e476a8effd0ad92a7f88e2b533f9c5))
12
+ * add splash screen with ASCII logo on TUI startup ([9a2bbba](https://github.com/appkitstudio/llamacpp-cli/commit/9a2bbbac80c8736548a71d38230a69b3745198ea))
13
+ * add Start/Stop controls and UI improvements to TUI ([2409c8e](https://github.com/appkitstudio/llamacpp-cli/commit/2409c8e283f37050f2b4faccbaef82bf6db2ac39))
14
+ * enhance TUI with interactive dashboard and deprecate old commands ([464c224](https://github.com/appkitstudio/llamacpp-cli/commit/464c224aefc2592442f80e72a076ec792a9d36dc))
15
+
16
+
17
+ ### Code Refactoring
18
+
19
+ * simplify TUI code for clarity and maintainability ([2b50d52](https://github.com/appkitstudio/llamacpp-cli/commit/2b50d5288b2ecd5bed4619518f50d74bd43c6b76))
20
+
21
+ ## [1.9.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.8.0...v1.9.0) (2026-01-27)
22
+
23
+
24
+ ### Features
25
+
26
+ * add unified router endpoint for automatic model routing ([6db0892](https://github.com/appkitstudio/llamacpp-cli/commit/6db0892adca753c021c7d9be6b69231c983170a1))
27
+
28
+ ## [1.9.0] (2026-01-26)
29
+
30
+ ### Features
31
+
32
+ * **router**: add unified router endpoint for automatic model routing ([#TBD])
33
+
34
+ The router provides a single OpenAI-compatible endpoint that automatically routes requests to the correct backend server based on model name. Perfect for LLM clients that don't support multiple endpoints.
35
+
36
+ **Key features:**
37
+ - Single endpoint (default: http://localhost:9100) for all models
38
+ - Automatic routing based on `model` field in requests
39
+ - Zero-config model discovery from running servers
40
+ - Aggregated `/v1/models` endpoint
41
+ - Native Node.js HTTP proxy (no external dependencies)
42
+ - Streaming support for chat completions
43
+ - Comprehensive error handling (404, 503, 502, 504)
44
+
45
+ **Commands:**
46
+ - `llamacpp router start` - Start router service
47
+ - `llamacpp router stop` - Stop router service
48
+ - `llamacpp router status` - Show status and available models
49
+ - `llamacpp router restart` - Restart router
50
+ - `llamacpp router config` - Update configuration (port, host, timeout, health-interval)
51
+
52
+ **Usage:**
53
+ ```python
54
+ from openai import OpenAI
55
+
56
+ client = OpenAI(base_url="http://localhost:9100/v1", api_key="not-needed")
57
+ response = client.chat.completions.create(
58
+ model="llama-3.2-3b-instruct-q4_k_m.gguf",
59
+ messages=[{"role": "user", "content": "Hello!"}]
60
+ )
61
+ ```
62
+
5
63
  ## [1.7.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.6.0...v1.7.0) (2026-01-23)
6
64
 
7
65
 
package/README.md CHANGED
@@ -12,7 +12,9 @@ CLI tool to manage local llama.cpp servers on macOS. Provides an Ollama-like exp
12
12
  ## Features
13
13
 
14
14
  - 🚀 **Easy server management** - Start, stop, and monitor llama.cpp servers
15
+ - 🔀 **Unified router** - Single OpenAI-compatible endpoint for all models with automatic routing and request logging
15
16
  - 🤖 **Model downloads** - Pull GGUF models from Hugging Face
17
+ - 📦 **Models Management TUI** - Browse, search, and delete models without leaving the TUI. Search HuggingFace, download with progress tracking, manage local models
16
18
  - ⚙️ **Smart defaults** - Auto-configure threads, context size, and GPU layers based on model size
17
19
  - 🔌 **Auto port assignment** - Automatically find available ports (9000-9999)
18
20
  - 📊 **Real-time monitoring TUI** - Multi-server dashboard with drill-down details, live GPU/CPU/memory metrics, token generation speeds, and animated loading states
@@ -74,18 +76,16 @@ llamacpp ls
74
76
  # Create and start a server (auto-assigns port, uses smart defaults)
75
77
  llamacpp server create llama-3.2-3b-instruct-q4_k_m.gguf
76
78
 
77
- # View running servers
79
+ # Open interactive TUI dashboard (multi-server monitoring)
80
+ llamacpp
81
+ # Press 'M' to access Models Management TUI
82
+
83
+ # List all servers (static table)
78
84
  llamacpp ps
79
85
 
80
86
  # View log sizes for all servers
81
87
  llamacpp logs
82
88
 
83
- # Monitor all servers (multi-server dashboard)
84
- llamacpp server monitor
85
-
86
- # Or monitor a specific server
87
- llamacpp server monitor llama-3.2-3b
88
-
89
89
  # Chat with your model interactively
90
90
  llamacpp server run llama-3.2-3b
91
91
 
@@ -140,6 +140,166 @@ curl http://localhost:9000/health
140
140
 
141
141
  The server is fully compatible with OpenAI's API format, so you can use it with any OpenAI-compatible client library.
142
142
 
143
+ ## Router (Unified Endpoint)
144
+
145
+ The router provides a single OpenAI-compatible endpoint that automatically routes requests to the correct backend server based on the model name. This is perfect for LLM clients that don't support multiple endpoints.
146
+
147
+ ### Quick Start
148
+
149
+ ```bash
150
+ # Start the router (default port: 9100)
151
+ llamacpp router start
152
+
153
+ # Configure your LLM client to use http://localhost:9100
154
+ # The router automatically routes requests to the correct server based on model name
155
+ ```
156
+
157
+ ### Commands
158
+
159
+ ```bash
160
+ llamacpp router start # Start the router service
161
+ llamacpp router stop # Stop the router service
162
+ llamacpp router status # Show router status and available models
163
+ llamacpp router restart # Restart the router
164
+ llamacpp router config # Update router settings (--port, --host, --timeout, --health-interval, --verbose)
165
+ llamacpp router logs # View router logs (with --follow, --verbose, --clear options)
166
+ ```
167
+
168
+ ### Usage Example
169
+
170
+ The router acts as a single endpoint for all your models:
171
+
172
+ ```python
173
+ from openai import OpenAI
174
+
175
+ client = OpenAI(
176
+ base_url="http://localhost:9100/v1",
177
+ api_key="not-needed" # API key not required for local servers
178
+ )
179
+
180
+ # Router automatically routes to the correct server based on model name
181
+ response = client.chat.completions.create(
182
+ model="llama-3.2-3b-instruct-q4_k_m.gguf",
183
+ messages=[{"role": "user", "content": "Hello!"}]
184
+ )
185
+ ```
186
+
187
+ ### Supported Endpoints
188
+
189
+ - `POST /v1/chat/completions` - Chat completions (routes to correct backend)
190
+ - `POST /v1/embeddings` - Text embeddings (routes to correct backend)
191
+ - `GET /v1/models` - List all available models from running servers
192
+ - `GET /health` - Router health check
193
+
194
+ ### Configuration
195
+
196
+ The router can be configured with:
197
+
198
+ ```bash
199
+ # Change port
200
+ llamacpp router config --port 9200 --restart
201
+
202
+ # Update request timeout (ms)
203
+ llamacpp router config --timeout 60000 --restart
204
+
205
+ # Update health check interval (ms)
206
+ llamacpp router config --health-interval 3000 --restart
207
+
208
+ # Change bind address (for remote access)
209
+ llamacpp router config --host 0.0.0.0 --restart
210
+
211
+ # Enable verbose logging (saves detailed JSON logs)
212
+ llamacpp router config --verbose true --restart
213
+
214
+ # Disable verbose logging
215
+ llamacpp router config --verbose false --restart
216
+ ```
217
+
218
+ **Note:** Changes require a restart to take effect. Use `--restart` flag to apply immediately.
219
+
220
+ ### Logging
221
+
222
+ The router uses separate log streams for different purposes (nginx-style):
223
+
224
+ | Log File | Purpose | Content |
225
+ |----------|---------|---------|
226
+ | `router.stdout` | Request activity | Model routing, status codes, timing, prompts |
227
+ | `router.stderr` | System messages | Startup, shutdown, errors, proxy failures |
228
+ | `router.log` | Structured JSON | Detailed entries for programmatic parsing (verbose mode) |
229
+
230
+ **View recent logs:**
231
+ ```bash
232
+ # Show activity logs (default - stdout)
233
+ llamacpp router logs
234
+
235
+ # Show system logs (errors, startup messages)
236
+ llamacpp router logs --stderr
237
+
238
+ # Follow activity in real-time
239
+ llamacpp router logs --follow
240
+
241
+ # Show last 10 lines
242
+ llamacpp router logs --lines 10
243
+ ```
244
+
245
+ **Log formats:**
246
+
247
+ Activity logs (stdout):
248
+ ```
249
+ 200 POST /v1/chat/completions → llama-3.2-3b-instruct-q4_k_m.gguf (127.0.0.1:9001) 1234ms | "What is..."
250
+ 404 POST /v1/chat/completions → unknown-model 3ms | "test" | Error: No server found
251
+ ```
252
+
253
+ System logs (stderr):
254
+ ```
255
+ [Router] Listening on http://127.0.0.1:9100
256
+ [Router] PID: 12345
257
+ [Router] Proxy request failed: ECONNREFUSED
258
+ ```
259
+
260
+ Verbose JSON logs (router.log) - enable with `--verbose true`:
261
+ ```bash
262
+ llamacpp router logs --verbose
263
+ ```
264
+
265
+ **Log management:**
266
+ ```bash
267
+ # Clear activity log
268
+ llamacpp router logs --clear
269
+
270
+ # Clear all router logs (stdout, stderr, verbose)
271
+ llamacpp router logs --clear-all
272
+
273
+ # Rotate log files with timestamp
274
+ llamacpp router logs --rotate
275
+
276
+ # View system logs instead of activity
277
+ llamacpp router logs --stderr
278
+ ```
279
+
280
+ **What's logged (activity):**
281
+ - ✅ Model name used
282
+ - ✅ HTTP status code (color-coded)
283
+ - ✅ Request duration (ms)
284
+ - ✅ Backend server (host:port)
285
+ - ✅ First 50 chars of prompt
286
+ - ✅ Error messages (if failed)
287
+
288
+ **Verbose mode benefits:**
289
+ - Detailed JSON logs for LLM/script parsing
290
+ - Stored in `~/.llamacpp/logs/router.log`
291
+ - Automatic rotation when exceeding 100MB
292
+ - Machine-readable format with timestamps
293
+
294
+ ### How It Works
295
+
296
+ 1. Router receives request with `model` field
297
+ 2. Finds running server configured for that model
298
+ 3. Proxies request to backend server
299
+ 4. Streams response back to client
300
+
301
+ If the requested model's server is not running, the router returns a 503 error with a helpful message.
302
+
143
303
  ### Example Output
144
304
 
145
305
  Creating a server:
@@ -189,6 +349,15 @@ Assistant: The capital of France is Paris...
189
349
 
190
350
  ## Commands
191
351
 
352
+ ### `llamacpp`
353
+ Launch the interactive TUI dashboard for monitoring and managing servers.
354
+
355
+ ```bash
356
+ llamacpp
357
+ ```
358
+
359
+ See [Interactive TUI Dashboard](#interactive-tui-dashboard) for full details.
360
+
192
361
  ### `llamacpp ls`
193
362
  List all GGUF models in ~/models directory.
194
363
 
@@ -307,6 +476,47 @@ llamacpp logs --rotate
307
476
 
308
477
  **Use case:** Quickly see which servers are accumulating large logs, or clean up all logs at once.
309
478
 
479
+ ## Models Management TUI
480
+
481
+ The Models Management TUI is accessible by pressing `M` from the `llamacpp` list view. It provides a full-featured interface for managing local models and searching/downloading new ones.
482
+
483
+ **Features:**
484
+ - **Browse local models** - View all GGUF files with size, modification date, and server usage
485
+ - **Delete models** - Remove models with automatic cleanup of associated servers
486
+ - **Search HuggingFace** - Find and browse models from Hugging Face repository
487
+ - **Download with progress** - Real-time progress tracking for model downloads
488
+ - **Seamless navigation** - Switch between monitoring and models management
489
+
490
+ **Quick Access:**
491
+ ```bash
492
+ # Launch TUI and press 'M' to open Models Management
493
+ llamacpp
494
+ ```
495
+
496
+ **Models View:**
497
+ - View all installed models in scrollable table
498
+ - See which servers are using each model
499
+ - Color-coded status (green = safe to delete, yellow/gray = servers using)
500
+ - Delete models with Enter or D key
501
+ - Cascade deletion: automatically removes associated servers
502
+
503
+ **Search View (press 'S' from Models view):**
504
+ - Search HuggingFace models by name
505
+ - Browse search results with download counts and likes
506
+ - Expand models to show available GGUF files
507
+ - Download files with real-time progress tracking
508
+ - Cancel downloads with ESC (cleans up partial files)
509
+
510
+ **Keyboard Controls:**
511
+ - **M** - Switch to Models view (from TUI list view)
512
+ - **↑/↓** or **k/j** - Navigate lists
513
+ - **Enter** - Select/download/delete
514
+ - **S** - Open search view (from models view)
515
+ - **/** or **I** - Focus search input (in search view)
516
+ - **R** - Refresh view
517
+ - **ESC** - Back/cancel
518
+ - **Q** - Quit
519
+
310
520
  ## Server Management
311
521
 
312
522
  ### `llamacpp server create <model> [options]`
@@ -523,30 +733,23 @@ The compact format shows one line per HTTP request and includes:
523
733
 
524
734
  Use `--http` to see full request/response JSON, or `--verbose` option to see all internal server logs.
525
735
 
526
- ### `llamacpp server monitor [identifier]`
527
- Real-time monitoring TUI showing server metrics, GPU/CPU usage, and active inference slots.
736
+ ## Interactive TUI Dashboard
528
737
 
529
- ![Server Monitoring TUI](https://raw.githubusercontent.com/dweaver/llamacpp-cli/main/docs/images/monitor-detail.png)
530
-
531
- **Two Modes:**
738
+ The main way to monitor and manage servers is through the interactive TUI dashboard, launched by running `llamacpp` with no arguments.
532
739
 
533
- **1. Multi-Server Dashboard (no identifier):**
534
740
  ```bash
535
- llamacpp server monitor
741
+ llamacpp
536
742
  ```
537
- Shows overview of all servers with system resources. Use arrow keys (↑/↓) or vim keys (k/j) to navigate, then press Enter to view server details.
538
743
 
539
- **2. Single-Server Monitor (with identifier):**
540
- ```bash
541
- # Monitor by partial name
542
- llamacpp server monitor llama-3.2-3b
543
-
544
- # Monitor by port
545
- llamacpp server monitor 9000
744
+ ![Server Monitoring TUI](https://raw.githubusercontent.com/dweaver/llamacpp-cli/main/docs/images/monitor-detail.png)
546
745
 
547
- # Monitor by server ID
548
- llamacpp server monitor llama-3-2-3b
549
- ```
746
+ **Features:**
747
+ - Multi-server dashboard with real-time metrics
748
+ - Drill-down to single-server detail view
749
+ - Create, start, stop, and remove servers without leaving the TUI
750
+ - Edit server configuration inline
751
+ - Access Models Management (press `M`)
752
+ - Historical metrics with time-series charts
550
753
 
551
754
  **Multi-Server Dashboard:**
552
755
  ```
@@ -571,19 +774,21 @@ llamacpp server monitor llama-3-2-3b
571
774
  - **System Resources** - GPU/CPU/ANE utilization, memory usage, temperature
572
775
 
573
776
  **Keyboard Shortcuts:**
574
- - **Multi-Server Mode:**
777
+ - **List View (Multi-Server):**
575
778
  - `↑/↓` or `k/j` - Navigate server list
576
779
  - `Enter` - View details for selected server
577
- - `ESC` - Back to list (from detail view)
578
- - `H` - View historical metrics
579
- - `R` - Force refresh now
580
- - `+/-` - Adjust update speed
581
- - `Q` - Quit
582
- - **Single-Server Mode:**
583
- - `H` - View historical metrics
584
- - `R` - Force refresh now
585
- - `+/-` - Adjust update speed
586
- - `Q` - Quit
780
+ - `N` - Create new server
781
+ - `M` - Switch to Models Management
782
+ - `H` - View historical metrics (all servers)
783
+ - `ESC` - Exit TUI
784
+ - `Q` - Quit immediately
785
+ - **Detail View (Single-Server):**
786
+ - `S` - Start/Stop server (toggles based on status)
787
+ - `C` - Open configuration screen
788
+ - `R` - Remove server (with confirmation)
789
+ - `H` - View historical metrics (this server)
790
+ - `ESC` - Back to list view
791
+ - `Q` - Quit immediately
587
792
  - **Historical View:**
588
793
  - `H` - Toggle Hour View (Recent ↔ Hour)
589
794
  - `ESC` - Back to live monitoring
@@ -621,7 +826,7 @@ Press `H` from any live monitoring view to see historical time-series charts. Th
621
826
 
622
827
  **Data Collection:**
623
828
 
624
- Historical data is automatically collected whenever you run the monitor command. Data is retained for 24 hours in `~/.llamacpp/history/<server-id>.json` files, then automatically pruned.
829
+ Historical data is automatically collected whenever you run the TUI (`llamacpp`). Data is retained for 24 hours in `~/.llamacpp/history/<server-id>.json` files, then automatically pruned.
625
830
 
626
831
  **Multi-Server Historical View:**
627
832
 
@@ -643,13 +848,15 @@ For GPU and CPU utilization metrics, install macmon:
643
848
  brew install vladkens/tap/macmon
644
849
  ```
645
850
 
646
- Without macmon, the monitor still shows:
851
+ Without macmon, the TUI still shows:
647
852
  - ✅ Server status and uptime
648
853
  - ✅ Active slots and token generation speeds
649
854
  - ✅ Memory usage (via built-in vm_stat)
650
855
  - ❌ GPU/CPU/ANE utilization (requires macmon)
651
856
 
652
- **Identifiers:** Port number, server ID, or partial model name
857
+ ### Deprecated: `llamacpp server monitor`
858
+
859
+ The `llamacpp server monitor` command is deprecated. Use `llamacpp` instead to launch the TUI dashboard.
653
860
 
654
861
  ## Configuration
655
862
 
@@ -735,7 +942,9 @@ llamacpp server logs <identifier> --errors
735
942
  npm install
736
943
 
737
944
  # Run in development mode
738
- npm run dev -- ps
945
+ npm run dev # Launch TUI
946
+ npm run dev -- ps # List servers (static table)
947
+ npm run dev -- ls # List models
739
948
 
740
949
  # Build for production
741
950
  npm run build
package/dist/cli.js CHANGED
@@ -1,5 +1,38 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
+ if (k2 === undefined) k2 = k;
5
+ var desc = Object.getOwnPropertyDescriptor(m, k);
6
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
+ desc = { enumerable: true, get: function() { return m[k]; } };
8
+ }
9
+ Object.defineProperty(o, k2, desc);
10
+ }) : (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ o[k2] = m[k];
13
+ }));
14
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
16
+ }) : function(o, v) {
17
+ o["default"] = v;
18
+ });
19
+ var __importStar = (this && this.__importStar) || (function () {
20
+ var ownKeys = function(o) {
21
+ ownKeys = Object.getOwnPropertyNames || function (o) {
22
+ var ar = [];
23
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
24
+ return ar;
25
+ };
26
+ return ownKeys(o);
27
+ };
28
+ return function (mod) {
29
+ if (mod && mod.__esModule) return mod;
30
+ var result = {};
31
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
32
+ __setModuleDefault(result, mod);
33
+ return result;
34
+ };
35
+ })();
3
36
  var __importDefault = (this && this.__importDefault) || function (mod) {
4
37
  return (mod && mod.__esModule) ? mod : { "default": mod };
5
38
  };
@@ -23,12 +56,29 @@ const server_show_1 = require("./commands/server-show");
23
56
  const config_1 = require("./commands/config");
24
57
  const config_global_1 = require("./commands/config-global");
25
58
  const monitor_1 = require("./commands/monitor");
59
+ const start_2 = require("./commands/router/start");
60
+ const stop_2 = require("./commands/router/stop");
61
+ const status_1 = require("./commands/router/status");
62
+ const restart_1 = require("./commands/router/restart");
63
+ const config_2 = require("./commands/router/config");
64
+ const logs_2 = require("./commands/router/logs");
26
65
  const package_json_1 = __importDefault(require("../package.json"));
27
66
  const program = new commander_1.Command();
28
67
  program
29
68
  .name('llamacpp')
30
69
  .description('CLI tool to manage local llama.cpp servers on macOS')
31
- .version(package_json_1.default.version, '-v, --version', 'Output the version number');
70
+ .version(package_json_1.default.version, '-v, --version', 'Output the version number')
71
+ .action(async () => {
72
+ // Default action: launch TUI when no command provided
73
+ try {
74
+ const { tuiCommand } = await Promise.resolve().then(() => __importStar(require('./commands/tui')));
75
+ await tuiCommand();
76
+ }
77
+ catch (error) {
78
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
79
+ process.exit(1);
80
+ }
81
+ });
32
82
  // List models
33
83
  program
34
84
  .command('ls')
@@ -42,14 +92,13 @@ program
42
92
  process.exit(1);
43
93
  }
44
94
  });
45
- // List running servers
95
+ // List servers (static table)
46
96
  program
47
- .command('ps [identifier]')
48
- .description('Interactive server monitoring dashboard')
49
- .option('--table', 'Show static table instead of TUI (for scripting)')
50
- .action(async (identifier, options) => {
97
+ .command('ps')
98
+ .description('List all servers with status (static table)')
99
+ .action(async () => {
51
100
  try {
52
- await (0, ps_1.psCommand)(identifier, options);
101
+ await (0, ps_1.psCommand)();
53
102
  }
54
103
  catch (error) {
55
104
  console.error(chalk_1.default.red('❌ Error:'), error.message);
@@ -293,14 +342,14 @@ server
293
342
  process.exit(1);
294
343
  }
295
344
  });
296
- // Monitor server (deprecated - redirects to ps)
345
+ // Monitor server (deprecated - redirects to TUI)
297
346
  server
298
347
  .command('monitor [identifier]')
299
- .description('Monitor server with real-time metrics TUI (deprecated: use "llamacpp ps" instead)')
348
+ .description('Monitor server with real-time metrics TUI (deprecated: use "llamacpp" instead)')
300
349
  .action(async (identifier) => {
301
350
  try {
302
351
  console.log(chalk_1.default.yellow('⚠️ The "monitor" command is deprecated and will be removed in a future version.'));
303
- console.log(chalk_1.default.dim(' Please use "llamacpp ps" instead for the same functionality.\n'));
352
+ console.log(chalk_1.default.dim(' Please use "llamacpp" instead for the same functionality.\n'));
304
353
  await (0, monitor_1.monitorCommand)(identifier);
305
354
  }
306
355
  catch (error) {
@@ -308,6 +357,101 @@ server
308
357
  process.exit(1);
309
358
  }
310
359
  });
360
+ // Router management commands
361
+ const router = program
362
+ .command('router')
363
+ .description('Manage the unified router endpoint');
364
+ // Start router
365
+ router
366
+ .command('start')
367
+ .description('Start the router service')
368
+ .action(async () => {
369
+ try {
370
+ await (0, start_2.routerStartCommand)();
371
+ }
372
+ catch (error) {
373
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
374
+ process.exit(1);
375
+ }
376
+ });
377
+ // Stop router
378
+ router
379
+ .command('stop')
380
+ .description('Stop the router service')
381
+ .action(async () => {
382
+ try {
383
+ await (0, stop_2.routerStopCommand)();
384
+ }
385
+ catch (error) {
386
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
387
+ process.exit(1);
388
+ }
389
+ });
390
+ // Show router status
391
+ router
392
+ .command('status')
393
+ .description('Show router status and configuration')
394
+ .action(async () => {
395
+ try {
396
+ await (0, status_1.routerStatusCommand)();
397
+ }
398
+ catch (error) {
399
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
400
+ process.exit(1);
401
+ }
402
+ });
403
+ // Restart router
404
+ router
405
+ .command('restart')
406
+ .description('Restart the router service')
407
+ .action(async () => {
408
+ try {
409
+ await (0, restart_1.routerRestartCommand)();
410
+ }
411
+ catch (error) {
412
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
413
+ process.exit(1);
414
+ }
415
+ });
416
+ // Configure router
417
+ router
418
+ .command('config')
419
+ .description('Update router configuration')
420
+ .option('-p, --port <number>', 'Update port number', parseInt)
421
+ .option('-h, --host <address>', 'Update bind address')
422
+ .option('--timeout <ms>', 'Update request timeout (milliseconds)', parseInt)
423
+ .option('--health-interval <ms>', 'Update health check interval (milliseconds)', parseInt)
424
+ .option('-v, --verbose [boolean]', 'Enable/disable verbose logging to file (true/false)', (val) => val === 'true' || val === '1')
425
+ .option('-r, --restart', 'Automatically restart router if running')
426
+ .action(async (options) => {
427
+ try {
428
+ await (0, config_2.routerConfigCommand)(options);
429
+ }
430
+ catch (error) {
431
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
432
+ process.exit(1);
433
+ }
434
+ });
435
+ // Router logs
436
+ router
437
+ .command('logs')
438
+ .description('View router logs')
439
+ .option('-f, --follow', 'Follow logs in real-time (like tail -f)')
440
+ .option('-n, --lines <number>', 'Number of lines to show (default: 50)', parseInt)
441
+ .option('--stderr', 'Show system logs (stderr) instead of activity logs (stdout)')
442
+ .option('-v, --verbose', 'Show verbose JSON log file (if enabled)')
443
+ .option('--clear', 'Clear the log file')
444
+ .option('--rotate', 'Rotate the log file with timestamp')
445
+ .option('--clear-all', 'Clear all router logs (activity, system, verbose)')
446
+ .action(async (options) => {
447
+ try {
448
+ await (0, logs_2.routerLogsCommand)(options);
449
+ }
450
+ catch (error) {
451
+ console.error(chalk_1.default.red('❌ Error:'), error.message);
452
+ process.exit(1);
453
+ }
454
+ });
311
455
  // Parse arguments
312
456
  program.parse();
313
457
  //# sourceMappingURL=cli.js.map