@appkit/llamacpp-cli 1.11.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +572 -170
  2. package/dist/cli.js +99 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/admin/config.d.ts +10 -0
  5. package/dist/commands/admin/config.d.ts.map +1 -0
  6. package/dist/commands/admin/config.js +100 -0
  7. package/dist/commands/admin/config.js.map +1 -0
  8. package/dist/commands/admin/logs.d.ts +10 -0
  9. package/dist/commands/admin/logs.d.ts.map +1 -0
  10. package/dist/commands/admin/logs.js +114 -0
  11. package/dist/commands/admin/logs.js.map +1 -0
  12. package/dist/commands/admin/restart.d.ts +2 -0
  13. package/dist/commands/admin/restart.d.ts.map +1 -0
  14. package/dist/commands/admin/restart.js +29 -0
  15. package/dist/commands/admin/restart.js.map +1 -0
  16. package/dist/commands/admin/start.d.ts +2 -0
  17. package/dist/commands/admin/start.d.ts.map +1 -0
  18. package/dist/commands/admin/start.js +30 -0
  19. package/dist/commands/admin/start.js.map +1 -0
  20. package/dist/commands/admin/status.d.ts +2 -0
  21. package/dist/commands/admin/status.d.ts.map +1 -0
  22. package/dist/commands/admin/status.js +82 -0
  23. package/dist/commands/admin/status.js.map +1 -0
  24. package/dist/commands/admin/stop.d.ts +2 -0
  25. package/dist/commands/admin/stop.d.ts.map +1 -0
  26. package/dist/commands/admin/stop.js +21 -0
  27. package/dist/commands/admin/stop.js.map +1 -0
  28. package/dist/commands/logs.d.ts +1 -0
  29. package/dist/commands/logs.d.ts.map +1 -1
  30. package/dist/commands/logs.js +22 -0
  31. package/dist/commands/logs.js.map +1 -1
  32. package/dist/lib/admin-manager.d.ts +111 -0
  33. package/dist/lib/admin-manager.d.ts.map +1 -0
  34. package/dist/lib/admin-manager.js +413 -0
  35. package/dist/lib/admin-manager.js.map +1 -0
  36. package/dist/lib/admin-server.d.ts +148 -0
  37. package/dist/lib/admin-server.d.ts.map +1 -0
  38. package/dist/lib/admin-server.js +1161 -0
  39. package/dist/lib/admin-server.js.map +1 -0
  40. package/dist/lib/download-job-manager.d.ts +64 -0
  41. package/dist/lib/download-job-manager.d.ts.map +1 -0
  42. package/dist/lib/download-job-manager.js +164 -0
  43. package/dist/lib/download-job-manager.js.map +1 -0
  44. package/dist/tui/MultiServerMonitorApp.js +1 -1
  45. package/dist/types/admin-config.d.ts +19 -0
  46. package/dist/types/admin-config.d.ts.map +1 -0
  47. package/dist/types/admin-config.js +3 -0
  48. package/dist/types/admin-config.js.map +1 -0
  49. package/dist/utils/log-parser.d.ts +9 -0
  50. package/dist/utils/log-parser.d.ts.map +1 -1
  51. package/dist/utils/log-parser.js +11 -0
  52. package/dist/utils/log-parser.js.map +1 -1
  53. package/package.json +10 -2
  54. package/web/README.md +429 -0
  55. package/web/dist/assets/index-Bin89Lwr.css +1 -0
  56. package/web/dist/assets/index-CVmonw3T.js +17 -0
  57. package/web/dist/index.html +14 -0
  58. package/web/dist/vite.svg +1 -0
  59. package/.versionrc.json +0 -16
  60. package/CHANGELOG.md +0 -203
  61. package/MONITORING-ACCURACY-FIX.md +0 -199
  62. package/PER-PROCESS-METRICS.md +0 -190
  63. package/docs/images/.gitkeep +0 -1
  64. package/src/cli.ts +0 -423
  65. package/src/commands/config-global.ts +0 -38
  66. package/src/commands/config.ts +0 -323
  67. package/src/commands/create.ts +0 -183
  68. package/src/commands/delete.ts +0 -74
  69. package/src/commands/list.ts +0 -37
  70. package/src/commands/logs-all.ts +0 -251
  71. package/src/commands/logs.ts +0 -321
  72. package/src/commands/monitor.ts +0 -110
  73. package/src/commands/ps.ts +0 -84
  74. package/src/commands/pull.ts +0 -44
  75. package/src/commands/rm.ts +0 -107
  76. package/src/commands/router/config.ts +0 -116
  77. package/src/commands/router/logs.ts +0 -256
  78. package/src/commands/router/restart.ts +0 -36
  79. package/src/commands/router/start.ts +0 -60
  80. package/src/commands/router/status.ts +0 -119
  81. package/src/commands/router/stop.ts +0 -33
  82. package/src/commands/run.ts +0 -233
  83. package/src/commands/search.ts +0 -107
  84. package/src/commands/server-show.ts +0 -161
  85. package/src/commands/show.ts +0 -207
  86. package/src/commands/start.ts +0 -101
  87. package/src/commands/stop.ts +0 -39
  88. package/src/commands/tui.ts +0 -25
  89. package/src/lib/config-generator.ts +0 -130
  90. package/src/lib/history-manager.ts +0 -172
  91. package/src/lib/launchctl-manager.ts +0 -225
  92. package/src/lib/metrics-aggregator.ts +0 -257
  93. package/src/lib/model-downloader.ts +0 -328
  94. package/src/lib/model-scanner.ts +0 -157
  95. package/src/lib/model-search.ts +0 -114
  96. package/src/lib/models-dir-setup.ts +0 -46
  97. package/src/lib/port-manager.ts +0 -80
  98. package/src/lib/router-logger.ts +0 -201
  99. package/src/lib/router-manager.ts +0 -414
  100. package/src/lib/router-server.ts +0 -538
  101. package/src/lib/state-manager.ts +0 -206
  102. package/src/lib/status-checker.ts +0 -113
  103. package/src/lib/system-collector.ts +0 -315
  104. package/src/tui/ConfigApp.ts +0 -1085
  105. package/src/tui/HistoricalMonitorApp.ts +0 -587
  106. package/src/tui/ModelsApp.ts +0 -368
  107. package/src/tui/MonitorApp.ts +0 -386
  108. package/src/tui/MultiServerMonitorApp.ts +0 -1833
  109. package/src/tui/RootNavigator.ts +0 -74
  110. package/src/tui/SearchApp.ts +0 -511
  111. package/src/tui/SplashScreen.ts +0 -149
  112. package/src/types/global-config.ts +0 -26
  113. package/src/types/history-types.ts +0 -39
  114. package/src/types/model-info.ts +0 -8
  115. package/src/types/monitor-types.ts +0 -162
  116. package/src/types/router-config.ts +0 -25
  117. package/src/types/server-config.ts +0 -46
  118. package/src/utils/downsample-utils.ts +0 -128
  119. package/src/utils/file-utils.ts +0 -146
  120. package/src/utils/format-utils.ts +0 -98
  121. package/src/utils/log-parser.ts +0 -271
  122. package/src/utils/log-utils.ts +0 -178
  123. package/src/utils/process-utils.ts +0 -316
  124. package/src/utils/prompt-utils.ts +0 -47
  125. package/test-load.sh +0 -100
  126. package/tsconfig.json +0 -20
@@ -0,0 +1,14 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <title>web</title>
8
+ <script type="module" crossorigin src="/assets/index-CVmonw3T.js"></script>
9
+ <link rel="stylesheet" crossorigin href="/assets/index-Bin89Lwr.css">
10
+ </head>
11
+ <body>
12
+ <div id="root"></div>
13
+ </body>
14
+ </html>
@@ -0,0 +1 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
package/.versionrc.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "types": [
3
- { "type": "feat", "section": "Features" },
4
- { "type": "fix", "section": "Bug Fixes" },
5
- { "type": "chore", "hidden": true },
6
- { "type": "docs", "hidden": true },
7
- { "type": "style", "hidden": true },
8
- { "type": "refactor", "section": "Code Refactoring" },
9
- { "type": "perf", "section": "Performance Improvements" },
10
- { "type": "test", "hidden": true }
11
- ],
12
- "releaseCommitMessageFormat": "chore(release): {{currentTag}}",
13
- "skip": {
14
- "changelog": false
15
- }
16
- }
package/CHANGELOG.md DELETED
@@ -1,203 +0,0 @@
1
- # Changelog
2
-
3
- All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
4
-
5
- ## [1.10.1](https://github.com/appkitstudio/llamacpp-cli/compare/v1.10.0...v1.10.1) (2026-02-04)
6
-
7
-
8
- ### Features
9
-
10
- * increase max context size limit from 128k to 2M ([c3a810c](https://github.com/appkitstudio/llamacpp-cli/commit/c3a810c782f9fd2146bfb9cfb8ac3dcbbf8b8abf))
11
-
12
-
13
- ### Bug Fixes
14
-
15
- * support both old and new llama.cpp log formats in log parser ([002c97e](https://github.com/appkitstudio/llamacpp-cli/commit/002c97e0f67cba51bc31c34980a9cb91c5e09cd9))
16
-
17
- ## [1.10.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.9.0...v1.10.0) (2026-02-02)
18
-
19
-
20
- ### Features
21
-
22
- * add Config screen TUI for editing server configuration ([0555eb8](https://github.com/appkitstudio/llamacpp-cli/commit/0555eb87b25fd9c0496ddf04bf59433ae0e9dc48))
23
- * add Models management TUI for displaying and deleting models ([45b312f](https://github.com/appkitstudio/llamacpp-cli/commit/45b312f160e476a8effd0ad92a7f88e2b533f9c5))
24
- * add splash screen with ASCII logo on TUI startup ([9a2bbba](https://github.com/appkitstudio/llamacpp-cli/commit/9a2bbbac80c8736548a71d38230a69b3745198ea))
25
- * add Start/Stop controls and UI improvements to TUI ([2409c8e](https://github.com/appkitstudio/llamacpp-cli/commit/2409c8e283f37050f2b4faccbaef82bf6db2ac39))
26
- * enhance TUI with interactive dashboard and deprecate old commands ([464c224](https://github.com/appkitstudio/llamacpp-cli/commit/464c224aefc2592442f80e72a076ec792a9d36dc))
27
-
28
-
29
- ### Code Refactoring
30
-
31
- * simplify TUI code for clarity and maintainability ([2b50d52](https://github.com/appkitstudio/llamacpp-cli/commit/2b50d5288b2ecd5bed4619518f50d74bd43c6b76))
32
-
33
- ## [1.9.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.8.0...v1.9.0) (2026-01-27)
34
-
35
-
36
- ### Features
37
-
38
- * add unified router endpoint for automatic model routing ([6db0892](https://github.com/appkitstudio/llamacpp-cli/commit/6db0892adca753c021c7d9be6b69231c983170a1))
39
-
40
- ## [1.9.0] (2026-01-26)
41
-
42
- ### Features
43
-
44
- * **router**: add unified router endpoint for automatic model routing ([#TBD])
45
-
46
- The router provides a single OpenAI-compatible endpoint that automatically routes requests to the correct backend server based on model name. Perfect for LLM clients that don't support multiple endpoints.
47
-
48
- **Key features:**
49
- - Single endpoint (default: http://localhost:9100) for all models
50
- - Automatic routing based on `model` field in requests
51
- - Zero-config model discovery from running servers
52
- - Aggregated `/v1/models` endpoint
53
- - Native Node.js HTTP proxy (no external dependencies)
54
- - Streaming support for chat completions
55
- - Comprehensive error handling (404, 503, 502, 504)
56
-
57
- **Commands:**
58
- - `llamacpp router start` - Start router service
59
- - `llamacpp router stop` - Stop router service
60
- - `llamacpp router status` - Show status and available models
61
- - `llamacpp router restart` - Restart router
62
- - `llamacpp router config` - Update configuration (port, host, timeout, health-interval)
63
-
64
- **Usage:**
65
- ```python
66
- from openai import OpenAI
67
-
68
- client = OpenAI(base_url="http://localhost:9100/v1", api_key="not-needed")
69
- response = client.chat.completions.create(
70
- model="llama-3.2-3b-instruct-q4_k_m.gguf",
71
- messages=[{"role": "user", "content": "Hello!"}]
72
- )
73
- ```
74
-
75
- ## [1.7.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.6.0...v1.7.0) (2026-01-23)
76
-
77
-
78
- ### Features
79
-
80
- * add log management commands and auto-rotation for server logs ([e670a53](https://github.com/appkitstudio/llamacpp-cli/commit/e670a53a712d04267f06327af730dc2429e4ab43))
81
-
82
- ## [1.6.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.5.0...v1.6.0) (2026-01-17)
83
-
84
-
85
- ### Features
86
-
87
- * add full-hour downsampling functions and enhance multi-server monitor UI with dynamic server ID width ([ae2862a](https://github.com/appkitstudio/llamacpp-cli/commit/ae2862acba905cddf60f0e7c30f6a7867391a5e2))
88
- * add GPU memory tracking to server monitoring ([bc59c6a](https://github.com/appkitstudio/llamacpp-cli/commit/bc59c6a74580e428ab674167146caea47d8a32c1))
89
- * enhance monitoring functionality with server status updates and improved resource tracking ([45fb833](https://github.com/appkitstudio/llamacpp-cli/commit/45fb833da5efe023a2271e7bd12d780a71474629))
90
- * enhance multi-server monitor UI with improved navigation and selection indicators ([9e57cfb](https://github.com/appkitstudio/llamacpp-cli/commit/9e57cfb8ce93a2c561981598cf75f0e4ff1a477d))
91
- * enhance server monitoring with interactive dashboard and improved metrics display ([fba8d79](https://github.com/appkitstudio/llamacpp-cli/commit/fba8d79ee58ecd7ccfe02e319ae7bf5474b591df))
92
- * implement per-process metrics for historical monitoring accuracy ([cc59df0](https://github.com/appkitstudio/llamacpp-cli/commit/cc59df069775031de1bfacdeb3a462a17610e4eb))
93
- * improve historical monitoring UI with faster refresh rate and enhanced display elements ([e0ce04b](https://github.com/appkitstudio/llamacpp-cli/commit/e0ce04ba258f6d945a977c39f056ba22cb324c70))
94
-
95
- ## [1.5.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.4.1...v1.5.0) (2026-01-13)
96
-
97
-
98
- ### Features
99
-
100
- * add multi-server monitor with optimized metrics collection ([9deaf2b](https://github.com/appkitstudio/llamacpp-cli/commit/9deaf2b12280304a0941b0da82a0838d2a80bf6a))
101
- * add real-time monitoring TUI with GPU/CPU metrics ([5f762f5](https://github.com/appkitstudio/llamacpp-cli/commit/5f762f542817e405fd05c1fe3282a4621456c068))
102
-
103
- ## [1.4.1](https://github.com/appkitstudio/llamacpp-cli/compare/v1.4.0...v1.4.1) (2026-01-09)
104
-
105
-
106
- ### Bug Fixes
107
-
108
- * downgrade chalk to v4 for CommonJS compatibility ([0fb11d8](https://github.com/appkitstudio/llamacpp-cli/commit/0fb11d886039c3dea4846833f0e2cc5e264c1115))
109
-
110
- ## [1.4.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.3.3...v1.4.0) (2026-01-09)
111
-
112
-
113
- ### ⚠ BREAKING CHANGES
114
-
115
- * ** Default models directory changed from ~/models to ~/.llamacpp/models
116
-
117
- Changes:
118
- - New default: ~/.llamacpp/models (keeps all llamacpp data in one place)
119
- - Interactive prompt when models directory doesn't exist
120
- - New 'config' command to view/change global settings
121
- - Users can customize models directory and switch anytime
122
-
123
- Commands:
124
- - `llamacpp config` - View current configuration
125
- - `llamacpp config --models-dir <path>` - Change models directory
126
- - All commands (ls, pull, server create) now use configured directory
127
-
128
- Interactive Setup:
129
- - When models directory is missing, user is prompted to:
130
- 1. Accept default path (press Enter)
131
- 2. Specify custom path (type path and press Enter)
132
- - Configuration is saved and persists across sessions
133
- - Directory is created automatically after confirmation
134
-
135
- Benefits:
136
- - No more errors on fresh installs
137
- - All llamacpp data in one place (~/.llamacpp/)
138
- - Flexible for users who want custom locations
139
- - Better onboarding experience
140
-
141
- Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
142
-
143
- ### Features
144
-
145
- * add configurable models directory with interactive setup ([975a81b](https://github.com/appkitstudio/llamacpp-cli/commit/975a81b217837b4238c4301f216e18e59f144cbe))
146
-
147
- ## [1.3.3](https://github.com/appkitstudio/llamacpp-cli/compare/v1.3.2...v1.3.3) (2026-01-09)
148
-
149
-
150
- ### Bug Fixes
151
-
152
- * auto-create ~/models directory when downloading models ([a9f54c1](https://github.com/appkitstudio/llamacpp-cli/commit/a9f54c119a41e29fa42003f3c1af0a732f5db9ea))
153
-
154
- ## [1.3.2](https://github.com/appkitstudio/llamacpp-cli/compare/v1.3.1...v1.3.2) (2026-01-09)
155
-
156
-
157
- ### Bug Fixes
158
-
159
- * read version from package.json instead of hardcoded value ([c9f3113](https://github.com/appkitstudio/llamacpp-cli/commit/c9f31133621a339f244073da918e1eec47f81e9c))
160
-
161
- ## [1.3.1](https://github.com/appkitstudio/llamacpp-cli/compare/v1.3.0...v1.3.1) (2026-01-09)
162
-
163
-
164
- ### Bug Fixes
165
-
166
- * improve bin script resilience and add custom flags support ([9884ceb](https://github.com/appkitstudio/llamacpp-cli/commit/9884cebdebad2d20a00fd3b077f0b2d61aecdc8d))
167
-
168
- ## [1.3.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.2.0...v1.3.0) (2025-12-30)
169
-
170
-
171
- ### Features
172
-
173
- * add server config command with host parameter support ([11faf4c](https://github.com/appkitstudio/llamacpp-cli/commit/11faf4c4031696be8e49efb6dbe73f4c5f5bee50))
174
-
175
- ## [1.2.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.1.1...v1.2.0) (2025-12-09)
176
-
177
-
178
- ### Features
179
-
180
- * add 'show' command to display server configuration and status details ([1f615eb](https://github.com/appkitstudio/llamacpp-cli/commit/1f615ebe369ae9d61b087a79fc17787e4a56d3b5))
181
- * add support for one-shot message mode in 'run' command ([853d62a](https://github.com/appkitstudio/llamacpp-cli/commit/853d62ab9ef9ec8c4e45213f00453d7b28648124))
182
- * implement 'create' command for server management and update related documentation ([39996e2](https://github.com/appkitstudio/llamacpp-cli/commit/39996e26f412dd593d9c253322a90d17907dfff4))
183
-
184
- ## [1.1.1](https://github.com/appkitstudio/llamacpp-cli/compare/v1.1.0...v1.1.1) (2025-12-09)
185
-
186
-
187
- ### Bug Fixes
188
-
189
- * update command syntax to include 'server' prefix for consistency ([22dfdfd](https://github.com/appkitstudio/llamacpp-cli/commit/22dfdfd62026503a19ff0a6d91da88954f71d1ff))
190
-
191
- ## [1.1.0](https://github.com/appkitstudio/llamacpp-cli/compare/v1.0.0...v1.1.0) (2025-12-09)
192
-
193
-
194
- ### Features
195
-
196
- * enhance logging and server status features ([1411e76](https://github.com/appkitstudio/llamacpp-cli/commit/1411e7677d01b2d04096d3d271ae95bb517e59b2))
197
-
198
- ## 1.0.0 (2025-12-04)
199
-
200
-
201
- ### Features
202
-
203
- * refactored and added commands ([0e58ae1](https://github.com/appkitstudio/llamacpp-cli/commit/0e58ae1c619a519b484ae3c78863b4932672865c))
@@ -1,199 +0,0 @@
1
- # Historical Monitoring Accuracy Fix
2
-
3
- **STATUS:** This document describes the initial fix attempt for memory calculations. However, the root issue was that historical monitoring was showing **system-wide metrics** instead of **per-process metrics**. See `PER-PROCESS-METRICS.md` for the correct implementation.
4
-
5
- ## Issue Summary
6
-
7
- Comparison between our historical monitoring and macmon revealed discrepancies in memory usage calculations.
8
-
9
- ## Issues Identified
10
-
11
- ### 1. Memory Total Calculation (CRITICAL)
12
-
13
- **Problem:** Total memory was calculated by summing all vm_stat page counts, which doesn't equal the actual installed RAM.
14
-
15
- **Evidence:**
16
- - Historical monitor showed: ~60% memory usage
17
- - macmon showed: 26.86 / 32.0 GB = ~84% memory usage
18
- - The denominator (32.0 GB installed RAM) was being calculated incorrectly
19
-
20
- **Root Cause:**
21
- ```typescript
22
- // OLD CODE (INCORRECT)
23
- const totalPages = pagesActive + pagesWired + pagesCompressed +
24
- pagesFree + pagesInactive + pagesSpeculative;
25
- const memoryTotal = totalPages * pageSize;
26
- ```
27
-
28
- This approach has fundamental flaws:
29
- - vm_stat doesn't report all memory categories (kernel reserved, etc.)
30
- - Page counts don't sum to actual installed RAM
31
- - Results in artificially inflated "total" value
32
- - Makes memory usage appear lower than reality
33
-
34
- **Fix:**
35
- ```typescript
36
- // NEW CODE (CORRECT)
37
- // Get total installed RAM from sysctl (accurate)
38
- const memoryTotal = await execCommand('sysctl -n hw.memsize 2>/dev/null');
39
- ```
40
-
41
- Use `sysctl hw.memsize` to get actual installed RAM size in bytes. This matches what Activity Monitor and macmon report.
42
-
43
- ### 2. Memory Used Calculation (VERIFIED CORRECT)
44
-
45
- **Current approach:**
46
- ```typescript
47
- // Used = Active + Wired + Compressed
48
- const usedPages = pagesActive + pagesWired + pagesCompressed;
49
- const memoryUsed = usedPages * pageSize;
50
- ```
51
-
52
- This formula is **correct** and matches what Activity Monitor and macmon report as "used memory".
53
-
54
- - **Active:** Recently used memory
55
- - **Wired:** Kernel memory that can't be paged out
56
- - **Compressed:** Compressed pages in RAM
57
-
58
- We removed the calculation of unused page types (free, inactive, speculative) since they're not needed.
59
-
60
- ### 3. CPU Calculation (VERIFIED CORRECT)
61
-
62
- **Formula:**
63
- ```typescript
64
- cpuUsage = ((pcpuUsage * pCoreCount) + (ecpuUsage * eCoreCount)) / totalCores * 100
65
- ```
66
-
67
- This weighted average is mathematically correct:
68
- - macmon reports per-core-type averages (P-CPU: 25%, E-CPU: 36%)
69
- - Formula computes overall system average: `(25% × 6 + 36% × 4) / 10 = 29.4%`
70
- - Historical average of 33% is reasonable given fluctuations over time
71
-
72
- ### 4. GPU Calculation (VERIFIED CORRECT)
73
-
74
- **Observation:**
75
- - Historical: Avg: 1.8%, Max: 4.0%, Min: 0.6%
76
- - macmon snapshot: GPU 4%
77
-
78
- This is **expected behavior**:
79
- - GPU is mostly idle (0-2%) between inference requests
80
- - Spikes to 4% during active token generation
81
- - Average of 1.8% correctly reflects mostly-idle state
82
- - Max of 4.0% matches macmon's instantaneous reading
83
-
84
- ## Changes Made
85
-
86
- ### `src/lib/system-collector.ts`
87
-
88
- **1. Removed total memory calculation from vm_stat parsing:**
89
- ```typescript
90
- // Now only returns memoryUsed
91
- private parseVmStatOutput(output: string): { memoryUsed: number }
92
- ```
93
-
94
- **2. Added method to get actual installed RAM:**
95
- ```typescript
96
- private async getTotalMemory(): Promise<number> {
97
- const output = await execCommand('sysctl -n hw.memsize 2>/dev/null');
98
- return parseInt(output.trim(), 10) || 0;
99
- }
100
- ```
101
-
102
- **3. Combined both sources in new method:**
103
- ```typescript
104
- private async getMemoryMetrics(): Promise<{
105
- memoryUsed: number;
106
- memoryTotal: number;
107
- }> {
108
- // Get used memory from vm_stat (active + wired + compressed)
109
- const vmStatOutput = await execCommand('vm_stat 2>/dev/null');
110
- const { memoryUsed } = this.parseVmStatOutput(vmStatOutput);
111
-
112
- // Get total installed RAM from sysctl (accurate)
113
- const memoryTotal = await this.getTotalMemory();
114
-
115
- return { memoryUsed, memoryTotal };
116
- }
117
- ```
118
-
119
- **4. Updated collector to use new method:**
120
- ```typescript
121
- // Always get memory from vm_stat + sysctl (accurate total from sysctl)
122
- const memoryMetrics = await this.getMemoryMetrics();
123
- ```
124
-
125
- ## Verification
126
-
127
- After these changes, memory usage should now accurately match macmon and Activity Monitor:
128
-
129
- **Before:**
130
- - Total: Calculated from page sum (~40 GB equivalent)
131
- - Used: 26.86 GB
132
- - **Percentage: ~60% (WRONG)**
133
-
134
- **After:**
135
- - Total: 32.0 GB (from `sysctl hw.memsize`)
136
- - Used: 26.86 GB (from vm_stat)
137
- - **Percentage: ~84% (CORRECT)**
138
-
139
- ## Testing Recommendations
140
-
141
- 1. **Compare with macmon:**
142
- ```bash
143
- # Terminal 1: Run macmon
144
- macmon
145
-
146
- # Terminal 2: Monitor server
147
- npm run dev -- server monitor <server-id>
148
- ```
149
-
150
- Memory percentages should now match within 1-2%.
151
-
152
- 2. **Compare with Activity Monitor:**
153
- - Open Activity Monitor → Memory tab
154
- - Check "Memory Used" value
155
- - Should match historical monitor's memory calculation
156
-
157
- 3. **Verify historical data:**
158
- ```bash
159
- # View historical metrics (press H in monitor)
160
- npm run dev -- server monitor <server-id>
161
- # Press 'H' to toggle historical view
162
- ```
163
-
164
- Memory usage should now show realistic values (~80-90% on actively used system).
165
-
166
- 4. **Check edge cases:**
167
- - Fresh boot (low memory usage ~30-40%)
168
- - Under load (high memory usage ~85-95%)
169
- - Multiple servers running (memory should increase proportionally)
170
-
171
- ## Impact on Historical Data
172
-
173
- **Note:** Existing historical data was collected with the old (incorrect) calculation.
174
-
175
- **Options:**
176
-
177
- 1. **Keep old data as-is** (recommended for now)
178
- - Historical charts will show old incorrect baseline
179
- - New data will be accurate going forward
180
- - Natural transition over 24 hours as old data ages out
181
-
182
- 2. **Clear history and start fresh:**
183
- ```bash
184
- rm ~/.llamacpp/history/*.json
185
- ```
186
- - Immediate accuracy
187
- - Lose historical context
188
-
189
- ## Related Files
190
-
191
- - `src/lib/system-collector.ts` - System metrics collection (MODIFIED)
192
- - `src/lib/history-manager.ts` - History persistence (unchanged)
193
- - `src/tui/HistoricalMonitorApp.ts` - Historical UI (unchanged)
194
-
195
- ## References
196
-
197
- - macOS `vm_stat` documentation: Reports memory in pages (16KB on Apple Silicon)
198
- - macOS `sysctl` documentation: `hw.memsize` reports installed RAM in bytes
199
- - Activity Monitor algorithm: Uses active + wired + compressed for "Memory Used"
@@ -1,190 +0,0 @@
1
- # Per-Process Metrics Implementation
2
-
3
- ## Overview
4
-
5
- Historical monitoring now shows **per-process metrics** for the specific llama-server being monitored, rather than system-wide metrics. This provides accurate resource usage for each model.
6
-
7
- ## What Changed
8
-
9
- ### Before (System-Wide)
10
- - **GPU Usage:** All processes combined
11
- - **CPU Usage:** All processes combined
12
- - **Memory Usage:** All processes combined (% of total RAM)
13
-
14
- ### After (Per-Process)
15
- - **GPU Usage:** System-wide (unchanged - can't isolate per-process on macOS)
16
- - **CPU Usage:** Just the llama-server process (from `ps`)
17
- - **Memory Usage:** Just the llama-server process in GB (from `top`)
18
-
19
- ## Implementation Details
20
-
21
- ### 1. Process Metrics Collection
22
-
23
- **Added CPU collection (`src/utils/process-utils.ts`):**
24
- ```typescript
25
- // Batch collection for efficiency
26
- export async function getBatchProcessCpu(pids: number[]): Promise<Map<number, number | null>>
27
-
28
- // Single process collection
29
- export async function getProcessCpu(pid: number): Promise<number | null>
30
- ```
31
-
32
- **Features:**
33
- - Uses `ps -p <pid> -o %cpu` to get per-process CPU percentage
34
- - 3-second cache to prevent excessive process spawning
35
- - Batch collection for multi-server monitoring
36
- - Returns percentage (0-100+, can exceed 100% on multi-core)
37
-
38
- ### 2. Type Updates
39
-
40
- **ServerMetrics interface (`src/types/monitor-types.ts`):**
41
- ```typescript
42
- export interface ServerMetrics {
43
- // ... existing fields
44
- processMemory?: number; // Already existed
45
- processCpuUsage?: number; // NEW: Per-process CPU %
46
- }
47
- ```
48
-
49
- **HistorySnapshot interface (`src/types/history-types.ts`):**
50
- ```typescript
51
- export interface HistorySnapshot {
52
- server: {
53
- // ... existing fields
54
- processMemory?: number; // Already existed
55
- processCpuUsage?: number; // NEW: Per-process CPU %
56
- };
57
- system?: {
58
- // ... system-wide metrics (kept for live monitoring)
59
- };
60
- }
61
- ```
62
-
63
- ### 3. Metrics Collection
64
-
65
- **MetricsAggregator (`src/lib/metrics-aggregator.ts`):**
66
- - Added `processCpuUsage` parameter to `collectServerMetrics()`
67
- - Collects CPU in parallel with other metrics
68
- - Supports batch collection for multi-server scenarios
69
-
70
- **HistoryManager (`src/lib/history-manager.ts`):**
71
- - Saves `processCpuUsage` in snapshots
72
- - Maintains backward compatibility (optional field)
73
-
74
- ### 4. Historical Monitor UI
75
-
76
- **HistoricalMonitorApp (`src/tui/HistoricalMonitorApp.ts`):**
77
-
78
- **Chart Changes:**
79
-
80
- **GPU Usage:**
81
- - **Unchanged:** Still system-wide
82
- - **Reason:** macOS doesn't provide per-process GPU metrics easily
83
- - **Label:** "GPU Usage (%)"
84
-
85
- **CPU Usage:**
86
- - **Before:** `snapshot.system.cpuUsage` (system-wide)
87
- - **After:** `snapshot.server.processCpuUsage` (per-process)
88
- - **Label:** "Process CPU Usage (%)"
89
- - **Range:** Not forced to 0-100% (can show >100% for multi-threaded workloads)
90
-
91
- **Memory Usage:**
92
- - **Before:** `(system.memoryUsed / system.memoryTotal) * 100` (system-wide %)
93
- - **After:** `processMemory / (1024 * 1024 * 1024)` (per-process GB)
94
- - **Label:** "Process Memory Usage (GB)"
95
- - **Format:** Shows 2 decimal places (e.g., "3.45 GB")
96
- - **Statistics:** Avg, Max, Min in GB
97
-
98
- **Multi-Server Comparison:**
99
- - Table also updated to show per-process CPU and memory
100
- - Memory column now shows GB instead of %
101
-
102
- ## Benefits
103
-
104
- 1. **Accurate Attribution:** See exactly what each model is using
105
- 2. **Multi-Server Clarity:** Compare resource usage across different models
106
- 3. **Debugging:** Identify which specific model is consuming resources
107
- 4. **Capacity Planning:** Understand per-model requirements
108
-
109
- ## Example Output
110
-
111
- **Before (System-Wide):**
112
- ```
113
- CPU Usage (%)
114
- Avg: 33.0% (±17.4) Max: 86.6% Min: 12.0%
115
-
116
- Memory Usage (%)
117
- Avg: 31.0% (±0.6) Max: 31.9% Min: 29.9%
118
- ```
119
-
120
- **After (Per-Process):**
121
- ```
122
- Process CPU Usage (%)
123
- Avg: 45.2% (±12.3) Max: 120.5% Min: 8.1%
124
-
125
- Process Memory Usage (GB)
126
- Avg: 3.45 GB (±0.12) Max: 3.67 GB Min: 3.21 GB
127
- ```
128
-
129
- ## Edge Cases Handled
130
-
131
- 1. **Missing Data:** Fields are optional, gracefully handles old snapshots
132
- 2. **Process Not Running:** Returns null, charts skip those data points
133
- 3. **Multi-Core:** CPU can exceed 100% (expected behavior)
134
- 4. **Cache Expiry:** 3-second TTL prevents stale data
135
- 5. **Batch Collection:** Efficient when monitoring multiple servers
136
-
137
- ## Testing Recommendations
138
-
139
- 1. **Single Server:**
140
- ```bash
141
- npm run dev -- server monitor <server-id>
142
- # Press 'H' to view historical data
143
- ```
144
- - Verify CPU shows reasonable per-process values (not system-wide)
145
- - Verify memory shows model size in GB (not total RAM %)
146
-
147
- 2. **Multi-Server:**
148
- ```bash
149
- npm run dev -- server monitor
150
- # Press 'H' to view comparison table
151
- ```
152
- - Verify each server shows different CPU/memory values
153
- - Verify table shows GB for memory column
154
-
155
- 3. **Compare with Activity Monitor:**
156
- - Open Activity Monitor
157
- - Filter for `llama-server` process
158
- - CPU % should match within 5-10%
159
- - Memory should match within 0.1 GB
160
-
161
- 4. **Compare with `ps`:**
162
- ```bash
163
- ps -p <pid> -o %cpu,rss
164
- ```
165
- - CPU % should match
166
- - RSS (memory) should match when converted to GB
167
-
168
- ## Backward Compatibility
169
-
170
- - Old history files still work (missing fields treated as undefined)
171
- - System-wide metrics still collected for live monitoring
172
- - Live monitoring TUI unchanged (still shows system-wide for context)
173
- - Only historical view changed to per-process
174
-
175
- ## Related Files
176
-
177
- - `src/utils/process-utils.ts` - Added CPU collection functions
178
- - `src/types/monitor-types.ts` - Added processCpuUsage field
179
- - `src/types/history-types.ts` - Added processCpuUsage to snapshots
180
- - `src/lib/metrics-aggregator.ts` - Collects CPU metrics
181
- - `src/lib/history-manager.ts` - Saves CPU metrics
182
- - `src/tui/HistoricalMonitorApp.ts` - Displays per-process charts
183
-
184
- ## Future Improvements
185
-
186
- 1. **Per-Process GPU:** Investigate Metal API for GPU attribution
187
- 2. **Network I/O:** Track per-process network usage
188
- 3. **Disk I/O:** Track per-process disk reads/writes
189
- 4. **Thread Count:** Show number of threads used by process
190
- 5. **Context Switches:** Show voluntary/involuntary context switches
@@ -1 +0,0 @@
1
- # Screenshots directory