@appkit/llamacpp-cli 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. package/README.md +271 -277
  2. package/dist/cli.js +133 -23
  3. package/dist/cli.js.map +1 -1
  4. package/dist/commands/admin/config.d.ts +1 -1
  5. package/dist/commands/admin/config.js +5 -5
  6. package/dist/commands/admin/config.js.map +1 -1
  7. package/dist/commands/admin/log-config.d.ts +11 -0
  8. package/dist/commands/admin/log-config.d.ts.map +1 -0
  9. package/dist/commands/admin/log-config.js +159 -0
  10. package/dist/commands/admin/log-config.js.map +1 -0
  11. package/dist/commands/admin/logs.d.ts +2 -3
  12. package/dist/commands/admin/logs.d.ts.map +1 -1
  13. package/dist/commands/admin/logs.js +6 -48
  14. package/dist/commands/admin/logs.js.map +1 -1
  15. package/dist/commands/admin/status.d.ts.map +1 -1
  16. package/dist/commands/admin/status.js +1 -0
  17. package/dist/commands/admin/status.js.map +1 -1
  18. package/dist/commands/config.d.ts +1 -0
  19. package/dist/commands/config.d.ts.map +1 -1
  20. package/dist/commands/config.js +74 -196
  21. package/dist/commands/config.js.map +1 -1
  22. package/dist/commands/create.d.ts +3 -2
  23. package/dist/commands/create.d.ts.map +1 -1
  24. package/dist/commands/create.js +36 -98
  25. package/dist/commands/create.js.map +1 -1
  26. package/dist/commands/delete.d.ts.map +1 -1
  27. package/dist/commands/delete.js +7 -24
  28. package/dist/commands/delete.js.map +1 -1
  29. package/dist/commands/internal/server-wrapper.d.ts +15 -0
  30. package/dist/commands/internal/server-wrapper.d.ts.map +1 -0
  31. package/dist/commands/internal/server-wrapper.js +126 -0
  32. package/dist/commands/internal/server-wrapper.js.map +1 -0
  33. package/dist/commands/logs-all.d.ts +0 -2
  34. package/dist/commands/logs-all.d.ts.map +1 -1
  35. package/dist/commands/logs-all.js +1 -61
  36. package/dist/commands/logs-all.js.map +1 -1
  37. package/dist/commands/logs.d.ts +2 -5
  38. package/dist/commands/logs.d.ts.map +1 -1
  39. package/dist/commands/logs.js +104 -120
  40. package/dist/commands/logs.js.map +1 -1
  41. package/dist/commands/migrate-labels.d.ts +12 -0
  42. package/dist/commands/migrate-labels.d.ts.map +1 -0
  43. package/dist/commands/migrate-labels.js +160 -0
  44. package/dist/commands/migrate-labels.js.map +1 -0
  45. package/dist/commands/ps.d.ts.map +1 -1
  46. package/dist/commands/ps.js +2 -1
  47. package/dist/commands/ps.js.map +1 -1
  48. package/dist/commands/rm.d.ts.map +1 -1
  49. package/dist/commands/rm.js +22 -48
  50. package/dist/commands/rm.js.map +1 -1
  51. package/dist/commands/router/config.d.ts +1 -1
  52. package/dist/commands/router/config.js +6 -6
  53. package/dist/commands/router/config.js.map +1 -1
  54. package/dist/commands/router/logs.d.ts +2 -4
  55. package/dist/commands/router/logs.d.ts.map +1 -1
  56. package/dist/commands/router/logs.js +34 -189
  57. package/dist/commands/router/logs.js.map +1 -1
  58. package/dist/commands/router/status.d.ts.map +1 -1
  59. package/dist/commands/router/status.js +1 -0
  60. package/dist/commands/router/status.js.map +1 -1
  61. package/dist/commands/server-show.d.ts.map +1 -1
  62. package/dist/commands/server-show.js +3 -0
  63. package/dist/commands/server-show.js.map +1 -1
  64. package/dist/commands/start.d.ts.map +1 -1
  65. package/dist/commands/start.js +21 -72
  66. package/dist/commands/start.js.map +1 -1
  67. package/dist/commands/stop.d.ts.map +1 -1
  68. package/dist/commands/stop.js +10 -26
  69. package/dist/commands/stop.js.map +1 -1
  70. package/dist/launchers/llamacpp-admin +8 -0
  71. package/dist/launchers/llamacpp-router +8 -0
  72. package/dist/launchers/llamacpp-server +8 -0
  73. package/dist/lib/admin-manager.d.ts +4 -0
  74. package/dist/lib/admin-manager.d.ts.map +1 -1
  75. package/dist/lib/admin-manager.js +42 -18
  76. package/dist/lib/admin-manager.js.map +1 -1
  77. package/dist/lib/admin-server.d.ts +48 -1
  78. package/dist/lib/admin-server.d.ts.map +1 -1
  79. package/dist/lib/admin-server.js +632 -238
  80. package/dist/lib/admin-server.js.map +1 -1
  81. package/dist/lib/config-generator.d.ts +1 -0
  82. package/dist/lib/config-generator.d.ts.map +1 -1
  83. package/dist/lib/config-generator.js +12 -5
  84. package/dist/lib/config-generator.js.map +1 -1
  85. package/dist/lib/keyboard-manager.d.ts +162 -0
  86. package/dist/lib/keyboard-manager.d.ts.map +1 -0
  87. package/dist/lib/keyboard-manager.js +247 -0
  88. package/dist/lib/keyboard-manager.js.map +1 -0
  89. package/dist/lib/label-migration.d.ts +65 -0
  90. package/dist/lib/label-migration.d.ts.map +1 -0
  91. package/dist/lib/label-migration.js +458 -0
  92. package/dist/lib/label-migration.js.map +1 -0
  93. package/dist/lib/launchctl-manager.d.ts +9 -0
  94. package/dist/lib/launchctl-manager.d.ts.map +1 -1
  95. package/dist/lib/launchctl-manager.js +65 -19
  96. package/dist/lib/launchctl-manager.js.map +1 -1
  97. package/dist/lib/log-management-service.d.ts +51 -0
  98. package/dist/lib/log-management-service.d.ts.map +1 -0
  99. package/dist/lib/log-management-service.js +124 -0
  100. package/dist/lib/log-management-service.js.map +1 -0
  101. package/dist/lib/log-workers.d.ts +70 -0
  102. package/dist/lib/log-workers.d.ts.map +1 -0
  103. package/dist/lib/log-workers.js +217 -0
  104. package/dist/lib/log-workers.js.map +1 -0
  105. package/dist/lib/model-downloader.d.ts +9 -1
  106. package/dist/lib/model-downloader.d.ts.map +1 -1
  107. package/dist/lib/model-downloader.js +98 -1
  108. package/dist/lib/model-downloader.js.map +1 -1
  109. package/dist/lib/model-management-service.d.ts +60 -0
  110. package/dist/lib/model-management-service.d.ts.map +1 -0
  111. package/dist/lib/model-management-service.js +246 -0
  112. package/dist/lib/model-management-service.js.map +1 -0
  113. package/dist/lib/model-management-service.test.d.ts +2 -0
  114. package/dist/lib/model-management-service.test.d.ts.map +1 -0
  115. package/dist/lib/model-management-service.test.js.map +1 -0
  116. package/dist/lib/model-scanner.d.ts +15 -3
  117. package/dist/lib/model-scanner.d.ts.map +1 -1
  118. package/dist/lib/model-scanner.js +174 -17
  119. package/dist/lib/model-scanner.js.map +1 -1
  120. package/dist/lib/openapi-spec.d.ts +1335 -0
  121. package/dist/lib/openapi-spec.d.ts.map +1 -0
  122. package/dist/lib/openapi-spec.js +1017 -0
  123. package/dist/lib/openapi-spec.js.map +1 -0
  124. package/dist/lib/router-logger.d.ts +1 -1
  125. package/dist/lib/router-logger.d.ts.map +1 -1
  126. package/dist/lib/router-logger.js +13 -11
  127. package/dist/lib/router-logger.js.map +1 -1
  128. package/dist/lib/router-manager.d.ts +4 -0
  129. package/dist/lib/router-manager.d.ts.map +1 -1
  130. package/dist/lib/router-manager.js +30 -18
  131. package/dist/lib/router-manager.js.map +1 -1
  132. package/dist/lib/router-server.d.ts +6 -0
  133. package/dist/lib/router-server.d.ts.map +1 -1
  134. package/dist/lib/router-server.js +534 -20
  135. package/dist/lib/router-server.js.map +1 -1
  136. package/dist/lib/server-config-service.d.ts +51 -0
  137. package/dist/lib/server-config-service.d.ts.map +1 -0
  138. package/dist/lib/server-config-service.js +328 -0
  139. package/dist/lib/server-config-service.js.map +1 -0
  140. package/dist/lib/server-config-service.test.d.ts +2 -0
  141. package/dist/lib/server-config-service.test.d.ts.map +1 -0
  142. package/dist/lib/server-config-service.test.js.map +1 -0
  143. package/dist/lib/server-lifecycle-service.d.ts +172 -0
  144. package/dist/lib/server-lifecycle-service.d.ts.map +1 -0
  145. package/dist/lib/server-lifecycle-service.js +619 -0
  146. package/dist/lib/server-lifecycle-service.js.map +1 -0
  147. package/dist/lib/state-manager.d.ts +18 -1
  148. package/dist/lib/state-manager.d.ts.map +1 -1
  149. package/dist/lib/state-manager.js +51 -2
  150. package/dist/lib/state-manager.js.map +1 -1
  151. package/dist/lib/status-checker.d.ts +11 -4
  152. package/dist/lib/status-checker.d.ts.map +1 -1
  153. package/dist/lib/status-checker.js +34 -1
  154. package/dist/lib/status-checker.js.map +1 -1
  155. package/dist/lib/validation-service.d.ts +43 -0
  156. package/dist/lib/validation-service.d.ts.map +1 -0
  157. package/dist/lib/validation-service.js +112 -0
  158. package/dist/lib/validation-service.js.map +1 -0
  159. package/dist/lib/validation-service.test.d.ts +2 -0
  160. package/dist/lib/validation-service.test.d.ts.map +1 -0
  161. package/dist/lib/validation-service.test.js.map +1 -0
  162. package/dist/scripts/http-log-filter.sh +8 -0
  163. package/dist/tui/ConfigApp.d.ts.map +1 -1
  164. package/dist/tui/ConfigApp.js +222 -184
  165. package/dist/tui/ConfigApp.js.map +1 -1
  166. package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -1
  167. package/dist/tui/HistoricalMonitorApp.js +12 -0
  168. package/dist/tui/HistoricalMonitorApp.js.map +1 -1
  169. package/dist/tui/ModelsApp.d.ts.map +1 -1
  170. package/dist/tui/ModelsApp.js +93 -17
  171. package/dist/tui/ModelsApp.js.map +1 -1
  172. package/dist/tui/MonitorApp.d.ts.map +1 -1
  173. package/dist/tui/MonitorApp.js +1 -3
  174. package/dist/tui/MonitorApp.js.map +1 -1
  175. package/dist/tui/MultiServerMonitorApp.d.ts +3 -3
  176. package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
  177. package/dist/tui/MultiServerMonitorApp.js +724 -508
  178. package/dist/tui/MultiServerMonitorApp.js.map +1 -1
  179. package/dist/tui/RootNavigator.d.ts.map +1 -1
  180. package/dist/tui/RootNavigator.js +17 -1
  181. package/dist/tui/RootNavigator.js.map +1 -1
  182. package/dist/tui/RouterApp.d.ts +6 -0
  183. package/dist/tui/RouterApp.d.ts.map +1 -0
  184. package/dist/tui/RouterApp.js +928 -0
  185. package/dist/tui/RouterApp.js.map +1 -0
  186. package/dist/tui/SearchApp.d.ts.map +1 -1
  187. package/dist/tui/SearchApp.js +27 -6
  188. package/dist/tui/SearchApp.js.map +1 -1
  189. package/dist/tui/shared/modal-controller.d.ts +65 -0
  190. package/dist/tui/shared/modal-controller.d.ts.map +1 -0
  191. package/dist/tui/shared/modal-controller.js +625 -0
  192. package/dist/tui/shared/modal-controller.js.map +1 -0
  193. package/dist/tui/shared/overlay-utils.d.ts +7 -0
  194. package/dist/tui/shared/overlay-utils.d.ts.map +1 -0
  195. package/dist/tui/shared/overlay-utils.js +54 -0
  196. package/dist/tui/shared/overlay-utils.js.map +1 -0
  197. package/dist/types/admin-config.d.ts +15 -2
  198. package/dist/types/admin-config.d.ts.map +1 -1
  199. package/dist/types/model-info.d.ts +5 -0
  200. package/dist/types/model-info.d.ts.map +1 -1
  201. package/dist/types/router-config.d.ts +2 -2
  202. package/dist/types/router-config.d.ts.map +1 -1
  203. package/dist/types/server-config.d.ts +8 -0
  204. package/dist/types/server-config.d.ts.map +1 -1
  205. package/dist/types/server-config.js +25 -0
  206. package/dist/types/server-config.js.map +1 -1
  207. package/dist/utils/http-log-filter.d.ts +10 -0
  208. package/dist/utils/http-log-filter.d.ts.map +1 -0
  209. package/dist/utils/http-log-filter.js +84 -0
  210. package/dist/utils/http-log-filter.js.map +1 -0
  211. package/dist/utils/log-parser.d.ts.map +1 -1
  212. package/dist/utils/log-parser.js +7 -4
  213. package/dist/utils/log-parser.js.map +1 -1
  214. package/dist/utils/log-utils.d.ts +59 -4
  215. package/dist/utils/log-utils.d.ts.map +1 -1
  216. package/dist/utils/log-utils.js +150 -11
  217. package/dist/utils/log-utils.js.map +1 -1
  218. package/dist/utils/shard-utils.d.ts +72 -0
  219. package/dist/utils/shard-utils.d.ts.map +1 -0
  220. package/dist/utils/shard-utils.js +168 -0
  221. package/dist/utils/shard-utils.js.map +1 -0
  222. package/package.json +18 -4
  223. package/src/launchers/llamacpp-admin +8 -0
  224. package/src/launchers/llamacpp-router +8 -0
  225. package/src/launchers/llamacpp-server +8 -0
  226. package/web/dist/assets/index-C7zA1ach.js +50 -0
  227. package/web/dist/assets/index-aLnMmRR8.css +1 -0
  228. package/web/dist/index.html +2 -2
  229. package/web/dist/assets/index-Bin89Lwr.css +0 -1
  230. package/web/dist/assets/index-CVmonw3T.js +0 -17
@@ -42,6 +42,216 @@ const fs = __importStar(require("fs/promises"));
42
42
  const path = __importStar(require("path"));
43
43
  const file_utils_1 = require("../utils/file-utils");
44
44
  const router_logger_1 = require("./router-logger");
45
+ /**
46
+ * Parse Qwen3-Coder XML tool calls from text content.
47
+ * Handles: <tool_call><function=NAME\n<parameter=P>V</parameter></function></tool_call>
48
+ * Returns extracted tool calls and cleaned text (XML removed).
49
+ */
50
+ function parseXmlToolCalls(text) {
51
+ const toolCalls = [];
52
+ const skippedNames = [];
53
+ const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
54
+ let match;
55
+ while ((match = toolCallRegex.exec(text)) !== null) {
56
+ const inner = match[1];
57
+ const funcMatch = /<function=(\w+)/.exec(inner);
58
+ if (!funcMatch)
59
+ continue;
60
+ const name = funcMatch[1];
61
+ const input = {};
62
+ const paramRegex = /<parameter=(\w+)>([\s\S]*?)<\/parameter>/g;
63
+ let paramMatch;
64
+ while ((paramMatch = paramRegex.exec(inner)) !== null) {
65
+ input[paramMatch[1]] = paramMatch[2].trim();
66
+ }
67
+ // Skip malformed tool calls with no parameters (model generation failure)
68
+ if (Object.keys(input).length === 0) {
69
+ skippedNames.push(name);
70
+ continue;
71
+ }
72
+ toolCalls.push({ name, input });
73
+ }
74
+ const cleanText = text.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '').trim();
75
+ return { toolCalls, cleanText, skippedNames };
76
+ }
77
+ function generateToolUseId() {
78
+ return 'toolu_' + Array.from({ length: 16 }, () => Math.floor(Math.random() * 16).toString(16)).join('');
79
+ }
80
+ /**
81
+ * Count how many consecutive recent user messages contained only error tool_results.
82
+ * Used to detect infinite error-feedback loops: if >= 2, stop sending error feedback and strip.
83
+ */
84
+ function countConsecutiveErrorCycles(requestBody) {
85
+ try {
86
+ const body = JSON.parse(requestBody);
87
+ const messages = body.messages ?? [];
88
+ let count = 0;
89
+ for (let i = messages.length - 1; i >= 0; i--) {
90
+ const msg = messages[i];
91
+ if (msg.role === 'assistant')
92
+ continue; // skip assistant turns
93
+ if (msg.role !== 'user')
94
+ break;
95
+ const content = Array.isArray(msg.content) ? msg.content : [];
96
+ const toolResults = content.filter((c) => c.type === 'tool_result');
97
+ if (toolResults.length === 0)
98
+ break; // non-tool user message, stop
99
+ if (toolResults.every((c) => c.is_error)) {
100
+ count++;
101
+ }
102
+ else {
103
+ break; // mixed or all-success results, stop counting
104
+ }
105
+ }
106
+ return count;
107
+ }
108
+ catch {
109
+ return 0;
110
+ }
111
+ }
112
+ function emitSseEvent(res, eventType, data) {
113
+ res.write(`event: ${eventType}\ndata: ${JSON.stringify(data)}\n\n`);
114
+ }
115
+ /**
116
+ * Emit a fully reconstructed SSE stream from parsed block state.
117
+ * Used when the original stream needs modification (XML tool calls or thinking-only).
118
+ */
119
+ function emitReconstructedSseStream(res, messageStartData, blocks, stopReason, outputTokens) {
120
+ if (messageStartData) {
121
+ emitSseEvent(res, 'message_start', messageStartData);
122
+ }
123
+ let idx = 0;
124
+ for (const block of blocks) {
125
+ if (block.type === 'text') {
126
+ if (!block.content)
127
+ continue;
128
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'text', text: '' } });
129
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'text_delta', text: block.content } });
130
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
131
+ }
132
+ else if (block.type === 'thinking') {
133
+ if (!block.content)
134
+ continue;
135
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'thinking', thinking: '' } });
136
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'thinking_delta', thinking: block.content } });
137
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
138
+ }
139
+ else if (block.type === 'tool_use') {
140
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'tool_use', id: block.id, name: block.name, input: {} } });
141
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } });
142
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
143
+ }
144
+ idx++;
145
+ }
146
+ emitSseEvent(res, 'message_delta', { type: 'message_delta', delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: outputTokens } });
147
+ emitSseEvent(res, 'message_stop', { type: 'message_stop' });
148
+ }
149
+ /**
150
+ * Make a backend HTTP request, buffer the full SSE stream, and return parsed state.
151
+ * Used for the initial request AND for retries when the model generates malformed output.
152
+ */
153
+ function bufferSseRequest(options, requestBody) {
154
+ return new Promise((resolve, reject) => {
155
+ const req = http.request(options, (res) => {
156
+ let sseBuffer = '';
157
+ const rawEvents = [];
158
+ let messageStartData = null;
159
+ let messageDeltaData = null;
160
+ const parsedBlocks = {};
161
+ let outputTokens = 0;
162
+ res.on('data', (chunk) => {
163
+ sseBuffer += chunk.toString();
164
+ const parts = sseBuffer.split('\n\n');
165
+ sseBuffer = parts.pop() ?? '';
166
+ for (const part of parts) {
167
+ if (!part.trim())
168
+ continue;
169
+ rawEvents.push(part);
170
+ let dataStr = '';
171
+ for (const line of part.split('\n')) {
172
+ if (line.startsWith('data: '))
173
+ dataStr = line.slice(6);
174
+ }
175
+ try {
176
+ const data = JSON.parse(dataStr);
177
+ if (data.type === 'message_start')
178
+ messageStartData = data;
179
+ else if (data.type === 'content_block_start') {
180
+ const idx = data.index ?? 0;
181
+ parsedBlocks[idx] = { type: data.content_block?.type ?? 'text', content: '', name: data.content_block?.name, id: data.content_block?.id };
182
+ }
183
+ else if (data.type === 'content_block_delta') {
184
+ const block = parsedBlocks[data.index];
185
+ if (block) {
186
+ if (data.delta?.type === 'text_delta')
187
+ block.content += data.delta.text ?? '';
188
+ else if (data.delta?.type === 'thinking_delta')
189
+ block.content += data.delta.thinking ?? '';
190
+ }
191
+ }
192
+ else if (data.type === 'message_delta') {
193
+ messageDeltaData = data;
194
+ outputTokens = data.usage?.output_tokens ?? 0;
195
+ }
196
+ }
197
+ catch { /* non-JSON SSE (ping etc.) */ }
198
+ }
199
+ });
200
+ res.on('end', () => {
201
+ if (sseBuffer.trim())
202
+ rawEvents.push(sseBuffer);
203
+ resolve({
204
+ rawEvents,
205
+ messageStartData,
206
+ stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
207
+ blocks: Object.values(parsedBlocks),
208
+ outputTokens,
209
+ });
210
+ });
211
+ res.on('error', reject);
212
+ });
213
+ req.on('error', reject);
214
+ req.write(requestBody);
215
+ req.end();
216
+ });
217
+ }
218
+ /**
219
+ * Apply Qwen3 model quirk fixes to a buffered SSE result.
220
+ * Returns the action to take (what to emit) without actually emitting anything.
221
+ */
222
+ function classifyBufferedResult(result) {
223
+ const { blocks, stopReason } = result;
224
+ const textBlocks = blocks.filter(b => b.type === 'text');
225
+ const thinkingBlocks = blocks.filter(b => b.type === 'thinking');
226
+ const allText = textBlocks.map(b => b.content).join('');
227
+ const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
228
+ if (toolCalls.length > 0) {
229
+ const newBlocks = [
230
+ ...blocks.filter(b => b.type !== 'text'),
231
+ ...(cleanText ? [{ type: 'text', content: cleanText }] : []),
232
+ ...toolCalls.map(tc => ({ type: 'tool_use', content: '', id: generateToolUseId(), name: tc.name, input: tc.input })),
233
+ ];
234
+ return { action: 'fix1', newBlocks, stopReason: 'tool_use' };
235
+ }
236
+ if (allText && !cleanText) {
237
+ return { action: 'fix3', skippedNames };
238
+ }
239
+ // fix4: text + malformed tool calls — emit text and empty tool_use blocks for error feedback
240
+ if (cleanText && skippedNames.length > 0) {
241
+ const newBlocks = [
242
+ ...blocks.filter(b => b.type !== 'text'),
243
+ { type: 'text', content: cleanText },
244
+ ...skippedNames.map(name => ({ type: 'tool_use', content: '', id: generateToolUseId(), name, input: {} })),
245
+ ];
246
+ return { action: 'fix4', newBlocks, stopReason: 'tool_use', skippedNames };
247
+ }
248
+ if (stopReason === 'end_turn' && thinkingBlocks.length > 0 && !textBlocks.some(b => b.content)) {
249
+ const thinkingText = thinkingBlocks.map(b => b.content).join('\n');
250
+ const newBlocks = [...thinkingBlocks, { type: 'text', content: thinkingText }];
251
+ return { action: 'fix2', newBlocks, stopReason: 'end_turn' };
252
+ }
253
+ return { action: 'raw' };
254
+ }
45
255
  /**
46
256
  * Router HTTP server - proxies requests to backend llama.cpp servers
47
257
  */
@@ -53,8 +263,8 @@ class RouterServer {
53
263
  throw new Error('Router configuration not found');
54
264
  }
55
265
  this.config = await (0, file_utils_1.readJson)(configPath);
56
- // Initialize logger with verbose setting
57
- this.logger = new router_logger_1.RouterLogger(this.config.verbose);
266
+ // Initialize logger with logging setting
267
+ this.logger = new router_logger_1.RouterLogger(this.config.logging);
58
268
  // Rotate log file if needed
59
269
  await this.logger.rotateIfNeeded();
60
270
  // Create HTTP server
@@ -110,6 +320,9 @@ class RouterServer {
110
320
  else if (url === '/health' && method === 'GET') {
111
321
  await this.handleHealth(req, res);
112
322
  }
323
+ else if (url.startsWith('/props') && method === 'GET') {
324
+ await this.handleProps(req, res, url);
325
+ }
113
326
  else if (url === '/v1/models' && method === 'GET') {
114
327
  await this.handleModels(req, res);
115
328
  }
@@ -149,18 +362,65 @@ class RouterServer {
149
362
  timestamp: new Date().toISOString(),
150
363
  }));
151
364
  }
365
+ /**
366
+ * Proxy llama.cpp's /props to a backend server. Pass ?model=<name> to
367
+ * select which backend; otherwise picks the first running server.
368
+ * Used by clients (e.g. lcode) to discover the loaded n_ctx.
369
+ */
370
+ async handleProps(req, res, url) {
371
+ const query = new url_1.URL(url, 'http://localhost').searchParams;
372
+ const requestedModel = query.get('model');
373
+ const servers = await this.getAllServers();
374
+ const running = servers.filter((s) => s.status === 'running');
375
+ const target = requestedModel
376
+ ? await this.findServerForModel(requestedModel)
377
+ : running[0] ?? null;
378
+ if (!target || target.status !== 'running') {
379
+ this.sendError(res, 404, 'Not Found', requestedModel
380
+ ? `No running server for model: ${requestedModel}`
381
+ : 'No running servers');
382
+ return;
383
+ }
384
+ const host = target.host === '0.0.0.0' ? '127.0.0.1' : target.host;
385
+ const backendReq = http.request({
386
+ hostname: host,
387
+ port: target.port,
388
+ path: '/props',
389
+ method: 'GET',
390
+ timeout: this.config.requestTimeout,
391
+ }, (backendRes) => {
392
+ res.writeHead(backendRes.statusCode || 200, {
393
+ 'Content-Type': backendRes.headers['content-type'] ?? 'application/json',
394
+ });
395
+ backendRes.pipe(res);
396
+ });
397
+ backendReq.on('error', (err) => {
398
+ if (!res.headersSent) {
399
+ this.sendError(res, 502, 'Bad Gateway', `Backend /props failed: ${err.message}`);
400
+ }
401
+ });
402
+ backendReq.on('timeout', () => {
403
+ backendReq.destroy();
404
+ if (!res.headersSent) {
405
+ this.sendError(res, 504, 'Gateway Timeout', 'Backend /props did not respond in time');
406
+ }
407
+ });
408
+ backendReq.end();
409
+ }
152
410
  /**
153
411
  * List models endpoint - aggregate from all running servers
154
412
  */
155
413
  async handleModels(req, res) {
156
414
  const servers = await this.getAllServers();
157
415
  const runningServers = servers.filter(s => s.status === 'running');
158
- const models = runningServers.map(server => ({
159
- id: server.modelName,
160
- object: 'model',
161
- created: Math.floor(new Date(server.createdAt).getTime() / 1000),
162
- owned_by: 'llamacpp',
163
- }));
416
+ const models = runningServers.flatMap(server => {
417
+ const created = Math.floor(new Date(server.createdAt).getTime() / 1000);
418
+ const entries = [{ id: server.modelName, object: 'model', created, owned_by: 'llamacpp' }];
419
+ if (server.alias) {
420
+ entries.push({ id: server.alias, object: 'model', created, owned_by: 'llamacpp' });
421
+ }
422
+ return entries;
423
+ });
164
424
  const response = {
165
425
  object: 'list',
166
426
  data: models,
@@ -276,6 +536,20 @@ class RouterServer {
276
536
  await this.logRequest(modelName, '/v1/messages', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
277
537
  return;
278
538
  }
539
+ // Inject tool call guidance when tools are present (Qwen3-Coder workaround:
540
+ // the model sometimes generates tool calls with no parameters when context is long)
541
+ if (anthropicRequest.tools && anthropicRequest.tools.length > 0) {
542
+ const guidance = 'When using tools, always include ALL required parameters with their complete values. Never omit parameters from tool calls.';
543
+ if (typeof anthropicRequest.system === 'string' && anthropicRequest.system) {
544
+ anthropicRequest.system = guidance + '\n\n' + anthropicRequest.system;
545
+ }
546
+ else if (Array.isArray(anthropicRequest.system)) {
547
+ anthropicRequest.system = [{ type: 'text', text: guidance }, ...anthropicRequest.system];
548
+ }
549
+ else {
550
+ anthropicRequest.system = guidance;
551
+ }
552
+ }
279
553
  // Find server for model
280
554
  const server = await this.findServerForModel(modelName);
281
555
  if (!server) {
@@ -333,30 +607,262 @@ class RouterServer {
333
607
  const backendReq = http.request(options, (backendRes) => {
334
608
  // Handle streaming vs non-streaming
335
609
  if (anthropicRequest.stream) {
336
- // For streaming, set SSE headers and pipe response
337
610
  res.writeHead(backendRes.statusCode || 200, {
338
611
  'Content-Type': 'text/event-stream',
339
612
  'Cache-Control': 'no-cache',
340
613
  'Connection': 'keep-alive',
341
614
  });
342
- // Pipe response directly (llama.cpp sends correct Anthropic SSE format)
343
- backendRes.pipe(res);
615
+ // Buffer the full SSE stream so we can detect and fix Qwen3 model quirks before
616
+ // forwarding to the client. Headers are sent above but NO events are emitted until
617
+ // we've finished processing (enabling transparent retry for Fix 3).
618
+ let sseBuffer = '';
619
+ const rawEvents = [];
620
+ let messageStartData = null;
621
+ let messageDeltaData = null;
622
+ const parsedBlocks = {};
623
+ let outputTokens = 0;
624
+ backendRes.on('data', (chunk) => {
625
+ sseBuffer += chunk.toString();
626
+ const parts = sseBuffer.split('\n\n');
627
+ sseBuffer = parts.pop() ?? '';
628
+ for (const part of parts) {
629
+ if (!part.trim())
630
+ continue;
631
+ rawEvents.push(part);
632
+ let dataStr = '';
633
+ for (const line of part.split('\n')) {
634
+ if (line.startsWith('data: '))
635
+ dataStr = line.slice(6);
636
+ }
637
+ try {
638
+ const data = JSON.parse(dataStr);
639
+ if (data.type === 'message_start') {
640
+ messageStartData = data;
641
+ }
642
+ else if (data.type === 'content_block_start') {
643
+ const idx = data.index ?? 0;
644
+ parsedBlocks[idx] = {
645
+ type: data.content_block?.type ?? 'text',
646
+ content: '',
647
+ name: data.content_block?.name,
648
+ id: data.content_block?.id,
649
+ };
650
+ }
651
+ else if (data.type === 'content_block_delta') {
652
+ const block = parsedBlocks[data.index];
653
+ if (block) {
654
+ if (data.delta?.type === 'text_delta')
655
+ block.content += data.delta.text ?? '';
656
+ else if (data.delta?.type === 'thinking_delta')
657
+ block.content += data.delta.thinking ?? '';
658
+ }
659
+ }
660
+ else if (data.type === 'message_delta') {
661
+ messageDeltaData = data;
662
+ outputTokens = data.usage?.output_tokens ?? 0;
663
+ }
664
+ }
665
+ catch {
666
+ // Non-JSON SSE data (e.g. ping) — still buffered in rawEvents
667
+ }
668
+ }
669
+ });
344
670
  backendRes.on('end', async () => {
671
+ if (sseBuffer.trim())
672
+ rawEvents.push(sseBuffer);
673
+ const firstResult = {
674
+ rawEvents,
675
+ messageStartData,
676
+ stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
677
+ blocks: Object.values(parsedBlocks),
678
+ outputTokens,
679
+ };
680
+ let classified = classifyBufferedResult(firstResult);
681
+ let finalResult = firstResult;
682
+ if (classified.action === 'fix3') {
683
+ const skipped = classified.skippedNames ?? [];
684
+ // Only retry for single empty call glitches (random sampling failure).
685
+ // If 2+ empty calls were generated the model is in a stuck pattern — retry
686
+ // would just double the wait time with the same degenerate result.
687
+ if (skipped.length === 1) {
688
+ console.error(`[Router] Retrying single malformed XML call (attempted: ${skipped.join(', ')})`);
689
+ try {
690
+ const retryResult = await bufferSseRequest(options, requestBody);
691
+ const retryClassified = classifyBufferedResult(retryResult);
692
+ if (retryClassified.action !== 'fix3') {
693
+ classified = retryClassified;
694
+ finalResult = retryResult;
695
+ console.error(`[Router] Retry succeeded (action: ${retryClassified.action})`);
696
+ }
697
+ else {
698
+ console.error(`[Router] Retry also malformed, giving up`);
699
+ }
700
+ }
701
+ catch (err) {
702
+ console.error('[Router] Retry request failed:', err);
703
+ }
704
+ }
705
+ else {
706
+ console.error(`[Router] Skipping retry — model stuck generating ${skipped.length} malformed calls (${skipped.join(', ')})`);
707
+ }
708
+ }
709
+ if (classified.action === 'fix1') {
710
+ console.error(`[Router] Converting ${classified.newBlocks.filter(b => b.type === 'tool_use').length} XML tool call(s) to tool_use blocks`);
711
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
712
+ }
713
+ else if (classified.action === 'fix2') {
714
+ console.error('[Router] Injecting fallback text block (thinking-only response detected)');
715
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
716
+ }
717
+ else if (classified.action === 'fix3') {
718
+ const skipped = classified.skippedNames ?? [];
719
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
720
+ if (errorCycles >= 2) {
721
+ // Already tried error feedback twice — model is stuck, strip to avoid infinite loop
722
+ console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skipped.join(', ')})`);
723
+ const newBlocks = finalResult.blocks.filter(b => b.type === 'thinking');
724
+ emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, finalResult.stopReason, finalResult.outputTokens);
725
+ }
726
+ else {
727
+ // Send empty tool_use blocks so Claude Code returns parameter errors for model self-correction
728
+ console.error(`[Router] Forwarding ${skipped.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
729
+ const emptyToolBlocks = skipped.map(name => ({
730
+ type: 'tool_use',
731
+ content: '',
732
+ name,
733
+ id: generateToolUseId(),
734
+ input: {},
735
+ }));
736
+ const newBlocks = [
737
+ ...finalResult.blocks.filter(b => b.type === 'thinking'),
738
+ ...emptyToolBlocks,
739
+ ];
740
+ emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, 'tool_use', finalResult.outputTokens);
741
+ }
742
+ }
743
+ else if (classified.action === 'fix4') {
744
+ // Text + malformed tool calls
745
+ const skipped = classified.skippedNames ?? [];
746
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
747
+ if (errorCycles >= 2) {
748
+ // Already tried error feedback twice — strip malformed calls, return just the text
749
+ console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skipped.join(', ')})`);
750
+ const textOnlyBlocks = classified.newBlocks.filter(b => b.type !== 'tool_use');
751
+ emitReconstructedSseStream(res, finalResult.messageStartData, textOnlyBlocks, 'end_turn', finalResult.outputTokens);
752
+ }
753
+ else {
754
+ console.error(`[Router] Text + ${skipped.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
755
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, 'tool_use', finalResult.outputTokens);
756
+ }
757
+ }
758
+ else {
759
+ // Raw passthrough
760
+ for (const event of finalResult.rawEvents) {
761
+ res.write(event + '\n\n');
762
+ }
763
+ }
764
+ res.end();
345
765
  await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
346
766
  resolve();
347
767
  });
348
768
  }
349
769
  else {
350
- // For non-streaming, collect response and forward
770
+ // Non-streaming: collect full response then apply fixes
351
771
  let responseData = '';
352
772
  backendRes.on('data', (chunk) => {
353
773
  responseData += chunk.toString();
354
774
  });
355
775
  backendRes.on('end', async () => {
356
- res.writeHead(backendRes.statusCode || 200, {
357
- 'Content-Type': 'application/json',
358
- });
359
- res.end(responseData);
776
+ let finalResponse = responseData;
777
+ try {
778
+ const responseObj = JSON.parse(responseData);
779
+ if (Array.isArray(responseObj.content)) {
780
+ const textBlocks = responseObj.content.filter((c) => c.type === 'text');
781
+ const allText = textBlocks.map((c) => c.text ?? '').join('');
782
+ const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
783
+ if (toolCalls.length > 0) {
784
+ // Fix 1: XML tool calls
785
+ console.error(`[Router] Converting ${toolCalls.length} XML tool call(s) to tool_use blocks`);
786
+ const newContent = responseObj.content.filter((c) => c.type !== 'text');
787
+ if (cleanText)
788
+ newContent.push({ type: 'text', text: cleanText });
789
+ for (const tc of toolCalls) {
790
+ newContent.push({ type: 'tool_use', id: generateToolUseId(), name: tc.name, input: tc.input });
791
+ }
792
+ responseObj.content = newContent;
793
+ responseObj.stop_reason = 'tool_use';
794
+ finalResponse = JSON.stringify(responseObj);
795
+ }
796
+ else if (allText && !cleanText) {
797
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
798
+ // Fix 3: error feedback with loop detection
799
+ if (errorCycles >= 2) {
800
+ console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
801
+ responseObj.content = responseObj.content.filter((c) => c.type !== 'text');
802
+ finalResponse = JSON.stringify(responseObj);
803
+ }
804
+ else {
805
+ console.error(`[Router] Forwarding ${skippedNames.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
806
+ const emptyToolUseBlocks = skippedNames.map(name => ({
807
+ type: 'tool_use',
808
+ id: generateToolUseId(),
809
+ name,
810
+ input: {},
811
+ }));
812
+ responseObj.content = [
813
+ ...responseObj.content.filter((c) => c.type !== 'text'),
814
+ ...emptyToolUseBlocks,
815
+ ];
816
+ responseObj.stop_reason = 'tool_use';
817
+ finalResponse = JSON.stringify(responseObj);
818
+ }
819
+ }
820
+ else if (cleanText && skippedNames.length > 0) {
821
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
822
+ // Fix 4: text + malformed tool calls with loop detection
823
+ if (errorCycles >= 2) {
824
+ console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
825
+ responseObj.content = [
826
+ ...responseObj.content.filter((c) => c.type !== 'text'),
827
+ { type: 'text', text: cleanText },
828
+ ];
829
+ finalResponse = JSON.stringify(responseObj);
830
+ }
831
+ else {
832
+ console.error(`[Router] Text + ${skippedNames.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
833
+ const emptyToolUseBlocks = skippedNames.map(name => ({
834
+ type: 'tool_use',
835
+ id: generateToolUseId(),
836
+ name,
837
+ input: {},
838
+ }));
839
+ responseObj.content = [
840
+ ...responseObj.content.filter((c) => c.type !== 'text'),
841
+ { type: 'text', text: cleanText },
842
+ ...emptyToolUseBlocks,
843
+ ];
844
+ responseObj.stop_reason = 'tool_use';
845
+ finalResponse = JSON.stringify(responseObj);
846
+ }
847
+ }
848
+ else {
849
+ // Fix 2: Thinking-only
850
+ const hasText = responseObj.content.some((c) => c.type === 'text' && c.text);
851
+ const thinkingBlocks = responseObj.content.filter((c) => c.type === 'thinking');
852
+ if (!hasText && thinkingBlocks.length > 0) {
853
+ console.error('[Router] Injecting fallback text block (thinking-only response detected)');
854
+ const thinkingText = thinkingBlocks.map((c) => c.thinking ?? '').join('\n');
855
+ responseObj.content.push({ type: 'text', text: thinkingText });
856
+ finalResponse = JSON.stringify(responseObj);
857
+ }
858
+ }
859
+ }
860
+ }
861
+ catch {
862
+ // Not valid JSON or unexpected shape — forward original
863
+ }
864
+ res.writeHead(backendRes.statusCode || 200, { 'Content-Type': 'application/json' });
865
+ res.end(finalResponse);
360
866
  await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
361
867
  resolve();
362
868
  });
@@ -678,6 +1184,14 @@ class RouterServer {
678
1184
  */
679
1185
  async findServerForModel(modelName) {
680
1186
  const servers = await this.getAllServers();
1187
+ // 1. Check aliases first (exact match, case-sensitive)
1188
+ const aliasMatch = servers.find(s => s.alias === modelName);
1189
+ if (aliasMatch)
1190
+ return aliasMatch;
1191
+ // 2. Check aliases with case-insensitive matching
1192
+ const aliasMatchCaseInsensitive = servers.find(s => s.alias && s.alias.toLowerCase() === modelName.toLowerCase());
1193
+ if (aliasMatchCaseInsensitive)
1194
+ return aliasMatchCaseInsensitive;
681
1195
  // Normalize a model name for flexible matching (lowercase, no extension, normalize separators)
682
1196
  const normalize = (name) => {
683
1197
  return name
@@ -686,22 +1200,22 @@ class RouterServer {
686
1200
  .replace(/[_-]/g, '-'); // Normalize underscores and hyphens to hyphens
687
1201
  };
688
1202
  const normalizedRequest = normalize(modelName);
689
- // Try exact match first
1203
+ // 3. Try exact model name match
690
1204
  const exactMatch = servers.find(s => s.modelName === modelName);
691
1205
  if (exactMatch)
692
1206
  return exactMatch;
693
- // Try case-insensitive match
1207
+ // 4. Try case-insensitive model name match
694
1208
  const caseInsensitiveMatch = servers.find(s => s.modelName.toLowerCase() === modelName.toLowerCase());
695
1209
  if (caseInsensitiveMatch)
696
1210
  return caseInsensitiveMatch;
697
- // Try adding .gguf extension if not present
1211
+ // 5. Try adding .gguf extension if not present
698
1212
  if (!modelName.endsWith('.gguf')) {
699
1213
  const withExtension = modelName + '.gguf';
700
1214
  const extensionMatch = servers.find(s => s.modelName.toLowerCase() === withExtension.toLowerCase());
701
1215
  if (extensionMatch)
702
1216
  return extensionMatch;
703
1217
  }
704
- // Try normalized matching (handles case, extension, and underscore/hyphen variations)
1218
+ // 6. Try normalized matching (handles case, extension, and underscore/hyphen variations)
705
1219
  const normalizedMatch = servers.find(s => normalize(s.modelName) === normalizedRequest);
706
1220
  if (normalizedMatch)
707
1221
  return normalizedMatch;