@appkit/llamacpp-cli 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +271 -277
- package/dist/cli.js +133 -23
- package/dist/cli.js.map +1 -1
- package/dist/commands/admin/config.d.ts +1 -1
- package/dist/commands/admin/config.js +5 -5
- package/dist/commands/admin/config.js.map +1 -1
- package/dist/commands/admin/log-config.d.ts +11 -0
- package/dist/commands/admin/log-config.d.ts.map +1 -0
- package/dist/commands/admin/log-config.js +159 -0
- package/dist/commands/admin/log-config.js.map +1 -0
- package/dist/commands/admin/logs.d.ts +2 -3
- package/dist/commands/admin/logs.d.ts.map +1 -1
- package/dist/commands/admin/logs.js +6 -48
- package/dist/commands/admin/logs.js.map +1 -1
- package/dist/commands/admin/status.d.ts.map +1 -1
- package/dist/commands/admin/status.js +1 -0
- package/dist/commands/admin/status.js.map +1 -1
- package/dist/commands/config.d.ts +1 -0
- package/dist/commands/config.d.ts.map +1 -1
- package/dist/commands/config.js +74 -196
- package/dist/commands/config.js.map +1 -1
- package/dist/commands/create.d.ts +3 -2
- package/dist/commands/create.d.ts.map +1 -1
- package/dist/commands/create.js +36 -98
- package/dist/commands/create.js.map +1 -1
- package/dist/commands/delete.d.ts.map +1 -1
- package/dist/commands/delete.js +7 -24
- package/dist/commands/delete.js.map +1 -1
- package/dist/commands/internal/server-wrapper.d.ts +15 -0
- package/dist/commands/internal/server-wrapper.d.ts.map +1 -0
- package/dist/commands/internal/server-wrapper.js +126 -0
- package/dist/commands/internal/server-wrapper.js.map +1 -0
- package/dist/commands/logs-all.d.ts +0 -2
- package/dist/commands/logs-all.d.ts.map +1 -1
- package/dist/commands/logs-all.js +1 -61
- package/dist/commands/logs-all.js.map +1 -1
- package/dist/commands/logs.d.ts +2 -5
- package/dist/commands/logs.d.ts.map +1 -1
- package/dist/commands/logs.js +104 -120
- package/dist/commands/logs.js.map +1 -1
- package/dist/commands/migrate-labels.d.ts +12 -0
- package/dist/commands/migrate-labels.d.ts.map +1 -0
- package/dist/commands/migrate-labels.js +160 -0
- package/dist/commands/migrate-labels.js.map +1 -0
- package/dist/commands/ps.d.ts.map +1 -1
- package/dist/commands/ps.js +2 -1
- package/dist/commands/ps.js.map +1 -1
- package/dist/commands/rm.d.ts.map +1 -1
- package/dist/commands/rm.js +22 -48
- package/dist/commands/rm.js.map +1 -1
- package/dist/commands/router/config.d.ts +1 -1
- package/dist/commands/router/config.js +6 -6
- package/dist/commands/router/config.js.map +1 -1
- package/dist/commands/router/logs.d.ts +2 -4
- package/dist/commands/router/logs.d.ts.map +1 -1
- package/dist/commands/router/logs.js +34 -189
- package/dist/commands/router/logs.js.map +1 -1
- package/dist/commands/router/status.d.ts.map +1 -1
- package/dist/commands/router/status.js +1 -0
- package/dist/commands/router/status.js.map +1 -1
- package/dist/commands/server-show.d.ts.map +1 -1
- package/dist/commands/server-show.js +3 -0
- package/dist/commands/server-show.js.map +1 -1
- package/dist/commands/start.d.ts.map +1 -1
- package/dist/commands/start.js +21 -72
- package/dist/commands/start.js.map +1 -1
- package/dist/commands/stop.d.ts.map +1 -1
- package/dist/commands/stop.js +10 -26
- package/dist/commands/stop.js.map +1 -1
- package/dist/launchers/llamacpp-admin +8 -0
- package/dist/launchers/llamacpp-router +8 -0
- package/dist/launchers/llamacpp-server +8 -0
- package/dist/lib/admin-manager.d.ts +4 -0
- package/dist/lib/admin-manager.d.ts.map +1 -1
- package/dist/lib/admin-manager.js +42 -18
- package/dist/lib/admin-manager.js.map +1 -1
- package/dist/lib/admin-server.d.ts +48 -1
- package/dist/lib/admin-server.d.ts.map +1 -1
- package/dist/lib/admin-server.js +632 -238
- package/dist/lib/admin-server.js.map +1 -1
- package/dist/lib/config-generator.d.ts +1 -0
- package/dist/lib/config-generator.d.ts.map +1 -1
- package/dist/lib/config-generator.js +12 -5
- package/dist/lib/config-generator.js.map +1 -1
- package/dist/lib/keyboard-manager.d.ts +162 -0
- package/dist/lib/keyboard-manager.d.ts.map +1 -0
- package/dist/lib/keyboard-manager.js +247 -0
- package/dist/lib/keyboard-manager.js.map +1 -0
- package/dist/lib/label-migration.d.ts +65 -0
- package/dist/lib/label-migration.d.ts.map +1 -0
- package/dist/lib/label-migration.js +458 -0
- package/dist/lib/label-migration.js.map +1 -0
- package/dist/lib/launchctl-manager.d.ts +9 -0
- package/dist/lib/launchctl-manager.d.ts.map +1 -1
- package/dist/lib/launchctl-manager.js +65 -19
- package/dist/lib/launchctl-manager.js.map +1 -1
- package/dist/lib/log-management-service.d.ts +51 -0
- package/dist/lib/log-management-service.d.ts.map +1 -0
- package/dist/lib/log-management-service.js +124 -0
- package/dist/lib/log-management-service.js.map +1 -0
- package/dist/lib/log-workers.d.ts +70 -0
- package/dist/lib/log-workers.d.ts.map +1 -0
- package/dist/lib/log-workers.js +217 -0
- package/dist/lib/log-workers.js.map +1 -0
- package/dist/lib/model-downloader.d.ts +9 -1
- package/dist/lib/model-downloader.d.ts.map +1 -1
- package/dist/lib/model-downloader.js +98 -1
- package/dist/lib/model-downloader.js.map +1 -1
- package/dist/lib/model-management-service.d.ts +60 -0
- package/dist/lib/model-management-service.d.ts.map +1 -0
- package/dist/lib/model-management-service.js +246 -0
- package/dist/lib/model-management-service.js.map +1 -0
- package/dist/lib/model-management-service.test.d.ts +2 -0
- package/dist/lib/model-management-service.test.d.ts.map +1 -0
- package/dist/lib/model-management-service.test.js.map +1 -0
- package/dist/lib/model-scanner.d.ts +15 -3
- package/dist/lib/model-scanner.d.ts.map +1 -1
- package/dist/lib/model-scanner.js +174 -17
- package/dist/lib/model-scanner.js.map +1 -1
- package/dist/lib/openapi-spec.d.ts +1335 -0
- package/dist/lib/openapi-spec.d.ts.map +1 -0
- package/dist/lib/openapi-spec.js +1017 -0
- package/dist/lib/openapi-spec.js.map +1 -0
- package/dist/lib/router-logger.d.ts +1 -1
- package/dist/lib/router-logger.d.ts.map +1 -1
- package/dist/lib/router-logger.js +13 -11
- package/dist/lib/router-logger.js.map +1 -1
- package/dist/lib/router-manager.d.ts +4 -0
- package/dist/lib/router-manager.d.ts.map +1 -1
- package/dist/lib/router-manager.js +30 -18
- package/dist/lib/router-manager.js.map +1 -1
- package/dist/lib/router-server.d.ts +6 -0
- package/dist/lib/router-server.d.ts.map +1 -1
- package/dist/lib/router-server.js +534 -20
- package/dist/lib/router-server.js.map +1 -1
- package/dist/lib/server-config-service.d.ts +51 -0
- package/dist/lib/server-config-service.d.ts.map +1 -0
- package/dist/lib/server-config-service.js +328 -0
- package/dist/lib/server-config-service.js.map +1 -0
- package/dist/lib/server-config-service.test.d.ts +2 -0
- package/dist/lib/server-config-service.test.d.ts.map +1 -0
- package/dist/lib/server-config-service.test.js.map +1 -0
- package/dist/lib/server-lifecycle-service.d.ts +172 -0
- package/dist/lib/server-lifecycle-service.d.ts.map +1 -0
- package/dist/lib/server-lifecycle-service.js +619 -0
- package/dist/lib/server-lifecycle-service.js.map +1 -0
- package/dist/lib/state-manager.d.ts +18 -1
- package/dist/lib/state-manager.d.ts.map +1 -1
- package/dist/lib/state-manager.js +51 -2
- package/dist/lib/state-manager.js.map +1 -1
- package/dist/lib/status-checker.d.ts +11 -4
- package/dist/lib/status-checker.d.ts.map +1 -1
- package/dist/lib/status-checker.js +34 -1
- package/dist/lib/status-checker.js.map +1 -1
- package/dist/lib/validation-service.d.ts +43 -0
- package/dist/lib/validation-service.d.ts.map +1 -0
- package/dist/lib/validation-service.js +112 -0
- package/dist/lib/validation-service.js.map +1 -0
- package/dist/lib/validation-service.test.d.ts +2 -0
- package/dist/lib/validation-service.test.d.ts.map +1 -0
- package/dist/lib/validation-service.test.js.map +1 -0
- package/dist/scripts/http-log-filter.sh +8 -0
- package/dist/tui/ConfigApp.d.ts.map +1 -1
- package/dist/tui/ConfigApp.js +222 -184
- package/dist/tui/ConfigApp.js.map +1 -1
- package/dist/tui/HistoricalMonitorApp.d.ts.map +1 -1
- package/dist/tui/HistoricalMonitorApp.js +12 -0
- package/dist/tui/HistoricalMonitorApp.js.map +1 -1
- package/dist/tui/ModelsApp.d.ts.map +1 -1
- package/dist/tui/ModelsApp.js +93 -17
- package/dist/tui/ModelsApp.js.map +1 -1
- package/dist/tui/MonitorApp.d.ts.map +1 -1
- package/dist/tui/MonitorApp.js +1 -3
- package/dist/tui/MonitorApp.js.map +1 -1
- package/dist/tui/MultiServerMonitorApp.d.ts +3 -3
- package/dist/tui/MultiServerMonitorApp.d.ts.map +1 -1
- package/dist/tui/MultiServerMonitorApp.js +724 -508
- package/dist/tui/MultiServerMonitorApp.js.map +1 -1
- package/dist/tui/RootNavigator.d.ts.map +1 -1
- package/dist/tui/RootNavigator.js +17 -1
- package/dist/tui/RootNavigator.js.map +1 -1
- package/dist/tui/RouterApp.d.ts +6 -0
- package/dist/tui/RouterApp.d.ts.map +1 -0
- package/dist/tui/RouterApp.js +928 -0
- package/dist/tui/RouterApp.js.map +1 -0
- package/dist/tui/SearchApp.d.ts.map +1 -1
- package/dist/tui/SearchApp.js +27 -6
- package/dist/tui/SearchApp.js.map +1 -1
- package/dist/tui/shared/modal-controller.d.ts +65 -0
- package/dist/tui/shared/modal-controller.d.ts.map +1 -0
- package/dist/tui/shared/modal-controller.js +625 -0
- package/dist/tui/shared/modal-controller.js.map +1 -0
- package/dist/tui/shared/overlay-utils.d.ts +7 -0
- package/dist/tui/shared/overlay-utils.d.ts.map +1 -0
- package/dist/tui/shared/overlay-utils.js +54 -0
- package/dist/tui/shared/overlay-utils.js.map +1 -0
- package/dist/types/admin-config.d.ts +15 -2
- package/dist/types/admin-config.d.ts.map +1 -1
- package/dist/types/model-info.d.ts +5 -0
- package/dist/types/model-info.d.ts.map +1 -1
- package/dist/types/router-config.d.ts +2 -2
- package/dist/types/router-config.d.ts.map +1 -1
- package/dist/types/server-config.d.ts +8 -0
- package/dist/types/server-config.d.ts.map +1 -1
- package/dist/types/server-config.js +25 -0
- package/dist/types/server-config.js.map +1 -1
- package/dist/utils/http-log-filter.d.ts +10 -0
- package/dist/utils/http-log-filter.d.ts.map +1 -0
- package/dist/utils/http-log-filter.js +84 -0
- package/dist/utils/http-log-filter.js.map +1 -0
- package/dist/utils/log-parser.d.ts.map +1 -1
- package/dist/utils/log-parser.js +7 -4
- package/dist/utils/log-parser.js.map +1 -1
- package/dist/utils/log-utils.d.ts +59 -4
- package/dist/utils/log-utils.d.ts.map +1 -1
- package/dist/utils/log-utils.js +150 -11
- package/dist/utils/log-utils.js.map +1 -1
- package/dist/utils/shard-utils.d.ts +72 -0
- package/dist/utils/shard-utils.d.ts.map +1 -0
- package/dist/utils/shard-utils.js +168 -0
- package/dist/utils/shard-utils.js.map +1 -0
- package/package.json +18 -4
- package/src/launchers/llamacpp-admin +8 -0
- package/src/launchers/llamacpp-router +8 -0
- package/src/launchers/llamacpp-server +8 -0
- package/web/dist/assets/index-C7zA1ach.js +50 -0
- package/web/dist/assets/index-aLnMmRR8.css +1 -0
- package/web/dist/index.html +2 -2
- package/web/dist/assets/index-Bin89Lwr.css +0 -1
- package/web/dist/assets/index-CVmonw3T.js +0 -17
|
@@ -42,6 +42,216 @@ const fs = __importStar(require("fs/promises"));
|
|
|
42
42
|
const path = __importStar(require("path"));
|
|
43
43
|
const file_utils_1 = require("../utils/file-utils");
|
|
44
44
|
const router_logger_1 = require("./router-logger");
|
|
45
|
+
/**
|
|
46
|
+
* Parse Qwen3-Coder XML tool calls from text content.
|
|
47
|
+
* Handles: <tool_call><function=NAME\n<parameter=P>V</parameter></function></tool_call>
|
|
48
|
+
* Returns extracted tool calls and cleaned text (XML removed).
|
|
49
|
+
*/
|
|
50
|
+
function parseXmlToolCalls(text) {
|
|
51
|
+
const toolCalls = [];
|
|
52
|
+
const skippedNames = [];
|
|
53
|
+
const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
|
|
54
|
+
let match;
|
|
55
|
+
while ((match = toolCallRegex.exec(text)) !== null) {
|
|
56
|
+
const inner = match[1];
|
|
57
|
+
const funcMatch = /<function=(\w+)/.exec(inner);
|
|
58
|
+
if (!funcMatch)
|
|
59
|
+
continue;
|
|
60
|
+
const name = funcMatch[1];
|
|
61
|
+
const input = {};
|
|
62
|
+
const paramRegex = /<parameter=(\w+)>([\s\S]*?)<\/parameter>/g;
|
|
63
|
+
let paramMatch;
|
|
64
|
+
while ((paramMatch = paramRegex.exec(inner)) !== null) {
|
|
65
|
+
input[paramMatch[1]] = paramMatch[2].trim();
|
|
66
|
+
}
|
|
67
|
+
// Skip malformed tool calls with no parameters (model generation failure)
|
|
68
|
+
if (Object.keys(input).length === 0) {
|
|
69
|
+
skippedNames.push(name);
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
toolCalls.push({ name, input });
|
|
73
|
+
}
|
|
74
|
+
const cleanText = text.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '').trim();
|
|
75
|
+
return { toolCalls, cleanText, skippedNames };
|
|
76
|
+
}
|
|
77
|
+
function generateToolUseId() {
|
|
78
|
+
return 'toolu_' + Array.from({ length: 16 }, () => Math.floor(Math.random() * 16).toString(16)).join('');
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Count how many consecutive recent user messages contained only error tool_results.
|
|
82
|
+
* Used to detect infinite error-feedback loops: if >= 2, stop sending error feedback and strip.
|
|
83
|
+
*/
|
|
84
|
+
function countConsecutiveErrorCycles(requestBody) {
|
|
85
|
+
try {
|
|
86
|
+
const body = JSON.parse(requestBody);
|
|
87
|
+
const messages = body.messages ?? [];
|
|
88
|
+
let count = 0;
|
|
89
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
90
|
+
const msg = messages[i];
|
|
91
|
+
if (msg.role === 'assistant')
|
|
92
|
+
continue; // skip assistant turns
|
|
93
|
+
if (msg.role !== 'user')
|
|
94
|
+
break;
|
|
95
|
+
const content = Array.isArray(msg.content) ? msg.content : [];
|
|
96
|
+
const toolResults = content.filter((c) => c.type === 'tool_result');
|
|
97
|
+
if (toolResults.length === 0)
|
|
98
|
+
break; // non-tool user message, stop
|
|
99
|
+
if (toolResults.every((c) => c.is_error)) {
|
|
100
|
+
count++;
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
break; // mixed or all-success results, stop counting
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return count;
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return 0;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
function emitSseEvent(res, eventType, data) {
|
|
113
|
+
res.write(`event: ${eventType}\ndata: ${JSON.stringify(data)}\n\n`);
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Emit a fully reconstructed SSE stream from parsed block state.
|
|
117
|
+
* Used when the original stream needs modification (XML tool calls or thinking-only).
|
|
118
|
+
*/
|
|
119
|
+
function emitReconstructedSseStream(res, messageStartData, blocks, stopReason, outputTokens) {
|
|
120
|
+
if (messageStartData) {
|
|
121
|
+
emitSseEvent(res, 'message_start', messageStartData);
|
|
122
|
+
}
|
|
123
|
+
let idx = 0;
|
|
124
|
+
for (const block of blocks) {
|
|
125
|
+
if (block.type === 'text') {
|
|
126
|
+
if (!block.content)
|
|
127
|
+
continue;
|
|
128
|
+
emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'text', text: '' } });
|
|
129
|
+
emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'text_delta', text: block.content } });
|
|
130
|
+
emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
|
|
131
|
+
}
|
|
132
|
+
else if (block.type === 'thinking') {
|
|
133
|
+
if (!block.content)
|
|
134
|
+
continue;
|
|
135
|
+
emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'thinking', thinking: '' } });
|
|
136
|
+
emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'thinking_delta', thinking: block.content } });
|
|
137
|
+
emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
|
|
138
|
+
}
|
|
139
|
+
else if (block.type === 'tool_use') {
|
|
140
|
+
emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'tool_use', id: block.id, name: block.name, input: {} } });
|
|
141
|
+
emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } });
|
|
142
|
+
emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
|
|
143
|
+
}
|
|
144
|
+
idx++;
|
|
145
|
+
}
|
|
146
|
+
emitSseEvent(res, 'message_delta', { type: 'message_delta', delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: outputTokens } });
|
|
147
|
+
emitSseEvent(res, 'message_stop', { type: 'message_stop' });
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Make a backend HTTP request, buffer the full SSE stream, and return parsed state.
|
|
151
|
+
* Used for the initial request AND for retries when the model generates malformed output.
|
|
152
|
+
*/
|
|
153
|
+
function bufferSseRequest(options, requestBody) {
|
|
154
|
+
return new Promise((resolve, reject) => {
|
|
155
|
+
const req = http.request(options, (res) => {
|
|
156
|
+
let sseBuffer = '';
|
|
157
|
+
const rawEvents = [];
|
|
158
|
+
let messageStartData = null;
|
|
159
|
+
let messageDeltaData = null;
|
|
160
|
+
const parsedBlocks = {};
|
|
161
|
+
let outputTokens = 0;
|
|
162
|
+
res.on('data', (chunk) => {
|
|
163
|
+
sseBuffer += chunk.toString();
|
|
164
|
+
const parts = sseBuffer.split('\n\n');
|
|
165
|
+
sseBuffer = parts.pop() ?? '';
|
|
166
|
+
for (const part of parts) {
|
|
167
|
+
if (!part.trim())
|
|
168
|
+
continue;
|
|
169
|
+
rawEvents.push(part);
|
|
170
|
+
let dataStr = '';
|
|
171
|
+
for (const line of part.split('\n')) {
|
|
172
|
+
if (line.startsWith('data: '))
|
|
173
|
+
dataStr = line.slice(6);
|
|
174
|
+
}
|
|
175
|
+
try {
|
|
176
|
+
const data = JSON.parse(dataStr);
|
|
177
|
+
if (data.type === 'message_start')
|
|
178
|
+
messageStartData = data;
|
|
179
|
+
else if (data.type === 'content_block_start') {
|
|
180
|
+
const idx = data.index ?? 0;
|
|
181
|
+
parsedBlocks[idx] = { type: data.content_block?.type ?? 'text', content: '', name: data.content_block?.name, id: data.content_block?.id };
|
|
182
|
+
}
|
|
183
|
+
else if (data.type === 'content_block_delta') {
|
|
184
|
+
const block = parsedBlocks[data.index];
|
|
185
|
+
if (block) {
|
|
186
|
+
if (data.delta?.type === 'text_delta')
|
|
187
|
+
block.content += data.delta.text ?? '';
|
|
188
|
+
else if (data.delta?.type === 'thinking_delta')
|
|
189
|
+
block.content += data.delta.thinking ?? '';
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
else if (data.type === 'message_delta') {
|
|
193
|
+
messageDeltaData = data;
|
|
194
|
+
outputTokens = data.usage?.output_tokens ?? 0;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
catch { /* non-JSON SSE (ping etc.) */ }
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
res.on('end', () => {
|
|
201
|
+
if (sseBuffer.trim())
|
|
202
|
+
rawEvents.push(sseBuffer);
|
|
203
|
+
resolve({
|
|
204
|
+
rawEvents,
|
|
205
|
+
messageStartData,
|
|
206
|
+
stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
|
|
207
|
+
blocks: Object.values(parsedBlocks),
|
|
208
|
+
outputTokens,
|
|
209
|
+
});
|
|
210
|
+
});
|
|
211
|
+
res.on('error', reject);
|
|
212
|
+
});
|
|
213
|
+
req.on('error', reject);
|
|
214
|
+
req.write(requestBody);
|
|
215
|
+
req.end();
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Apply Qwen3 model quirk fixes to a buffered SSE result.
|
|
220
|
+
* Returns the action to take (what to emit) without actually emitting anything.
|
|
221
|
+
*/
|
|
222
|
+
function classifyBufferedResult(result) {
|
|
223
|
+
const { blocks, stopReason } = result;
|
|
224
|
+
const textBlocks = blocks.filter(b => b.type === 'text');
|
|
225
|
+
const thinkingBlocks = blocks.filter(b => b.type === 'thinking');
|
|
226
|
+
const allText = textBlocks.map(b => b.content).join('');
|
|
227
|
+
const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
|
|
228
|
+
if (toolCalls.length > 0) {
|
|
229
|
+
const newBlocks = [
|
|
230
|
+
...blocks.filter(b => b.type !== 'text'),
|
|
231
|
+
...(cleanText ? [{ type: 'text', content: cleanText }] : []),
|
|
232
|
+
...toolCalls.map(tc => ({ type: 'tool_use', content: '', id: generateToolUseId(), name: tc.name, input: tc.input })),
|
|
233
|
+
];
|
|
234
|
+
return { action: 'fix1', newBlocks, stopReason: 'tool_use' };
|
|
235
|
+
}
|
|
236
|
+
if (allText && !cleanText) {
|
|
237
|
+
return { action: 'fix3', skippedNames };
|
|
238
|
+
}
|
|
239
|
+
// fix4: text + malformed tool calls — emit text and empty tool_use blocks for error feedback
|
|
240
|
+
if (cleanText && skippedNames.length > 0) {
|
|
241
|
+
const newBlocks = [
|
|
242
|
+
...blocks.filter(b => b.type !== 'text'),
|
|
243
|
+
{ type: 'text', content: cleanText },
|
|
244
|
+
...skippedNames.map(name => ({ type: 'tool_use', content: '', id: generateToolUseId(), name, input: {} })),
|
|
245
|
+
];
|
|
246
|
+
return { action: 'fix4', newBlocks, stopReason: 'tool_use', skippedNames };
|
|
247
|
+
}
|
|
248
|
+
if (stopReason === 'end_turn' && thinkingBlocks.length > 0 && !textBlocks.some(b => b.content)) {
|
|
249
|
+
const thinkingText = thinkingBlocks.map(b => b.content).join('\n');
|
|
250
|
+
const newBlocks = [...thinkingBlocks, { type: 'text', content: thinkingText }];
|
|
251
|
+
return { action: 'fix2', newBlocks, stopReason: 'end_turn' };
|
|
252
|
+
}
|
|
253
|
+
return { action: 'raw' };
|
|
254
|
+
}
|
|
45
255
|
/**
|
|
46
256
|
* Router HTTP server - proxies requests to backend llama.cpp servers
|
|
47
257
|
*/
|
|
@@ -53,8 +263,8 @@ class RouterServer {
|
|
|
53
263
|
throw new Error('Router configuration not found');
|
|
54
264
|
}
|
|
55
265
|
this.config = await (0, file_utils_1.readJson)(configPath);
|
|
56
|
-
// Initialize logger with
|
|
57
|
-
this.logger = new router_logger_1.RouterLogger(this.config.
|
|
266
|
+
// Initialize logger with logging setting
|
|
267
|
+
this.logger = new router_logger_1.RouterLogger(this.config.logging);
|
|
58
268
|
// Rotate log file if needed
|
|
59
269
|
await this.logger.rotateIfNeeded();
|
|
60
270
|
// Create HTTP server
|
|
@@ -110,6 +320,9 @@ class RouterServer {
|
|
|
110
320
|
else if (url === '/health' && method === 'GET') {
|
|
111
321
|
await this.handleHealth(req, res);
|
|
112
322
|
}
|
|
323
|
+
else if (url.startsWith('/props') && method === 'GET') {
|
|
324
|
+
await this.handleProps(req, res, url);
|
|
325
|
+
}
|
|
113
326
|
else if (url === '/v1/models' && method === 'GET') {
|
|
114
327
|
await this.handleModels(req, res);
|
|
115
328
|
}
|
|
@@ -149,18 +362,65 @@ class RouterServer {
|
|
|
149
362
|
timestamp: new Date().toISOString(),
|
|
150
363
|
}));
|
|
151
364
|
}
|
|
365
|
+
/**
|
|
366
|
+
* Proxy llama.cpp's /props to a backend server. Pass ?model=<name> to
|
|
367
|
+
* select which backend; otherwise picks the first running server.
|
|
368
|
+
* Used by clients (e.g. lcode) to discover the loaded n_ctx.
|
|
369
|
+
*/
|
|
370
|
+
async handleProps(req, res, url) {
|
|
371
|
+
const query = new url_1.URL(url, 'http://localhost').searchParams;
|
|
372
|
+
const requestedModel = query.get('model');
|
|
373
|
+
const servers = await this.getAllServers();
|
|
374
|
+
const running = servers.filter((s) => s.status === 'running');
|
|
375
|
+
const target = requestedModel
|
|
376
|
+
? await this.findServerForModel(requestedModel)
|
|
377
|
+
: running[0] ?? null;
|
|
378
|
+
if (!target || target.status !== 'running') {
|
|
379
|
+
this.sendError(res, 404, 'Not Found', requestedModel
|
|
380
|
+
? `No running server for model: ${requestedModel}`
|
|
381
|
+
: 'No running servers');
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
const host = target.host === '0.0.0.0' ? '127.0.0.1' : target.host;
|
|
385
|
+
const backendReq = http.request({
|
|
386
|
+
hostname: host,
|
|
387
|
+
port: target.port,
|
|
388
|
+
path: '/props',
|
|
389
|
+
method: 'GET',
|
|
390
|
+
timeout: this.config.requestTimeout,
|
|
391
|
+
}, (backendRes) => {
|
|
392
|
+
res.writeHead(backendRes.statusCode || 200, {
|
|
393
|
+
'Content-Type': backendRes.headers['content-type'] ?? 'application/json',
|
|
394
|
+
});
|
|
395
|
+
backendRes.pipe(res);
|
|
396
|
+
});
|
|
397
|
+
backendReq.on('error', (err) => {
|
|
398
|
+
if (!res.headersSent) {
|
|
399
|
+
this.sendError(res, 502, 'Bad Gateway', `Backend /props failed: ${err.message}`);
|
|
400
|
+
}
|
|
401
|
+
});
|
|
402
|
+
backendReq.on('timeout', () => {
|
|
403
|
+
backendReq.destroy();
|
|
404
|
+
if (!res.headersSent) {
|
|
405
|
+
this.sendError(res, 504, 'Gateway Timeout', 'Backend /props did not respond in time');
|
|
406
|
+
}
|
|
407
|
+
});
|
|
408
|
+
backendReq.end();
|
|
409
|
+
}
|
|
152
410
|
/**
|
|
153
411
|
* List models endpoint - aggregate from all running servers
|
|
154
412
|
*/
|
|
155
413
|
async handleModels(req, res) {
|
|
156
414
|
const servers = await this.getAllServers();
|
|
157
415
|
const runningServers = servers.filter(s => s.status === 'running');
|
|
158
|
-
const models = runningServers.
|
|
159
|
-
|
|
160
|
-
object: 'model',
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
416
|
+
const models = runningServers.flatMap(server => {
|
|
417
|
+
const created = Math.floor(new Date(server.createdAt).getTime() / 1000);
|
|
418
|
+
const entries = [{ id: server.modelName, object: 'model', created, owned_by: 'llamacpp' }];
|
|
419
|
+
if (server.alias) {
|
|
420
|
+
entries.push({ id: server.alias, object: 'model', created, owned_by: 'llamacpp' });
|
|
421
|
+
}
|
|
422
|
+
return entries;
|
|
423
|
+
});
|
|
164
424
|
const response = {
|
|
165
425
|
object: 'list',
|
|
166
426
|
data: models,
|
|
@@ -276,6 +536,20 @@ class RouterServer {
|
|
|
276
536
|
await this.logRequest(modelName, '/v1/messages', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
|
|
277
537
|
return;
|
|
278
538
|
}
|
|
539
|
+
// Inject tool call guidance when tools are present (Qwen3-Coder workaround:
|
|
540
|
+
// the model sometimes generates tool calls with no parameters when context is long)
|
|
541
|
+
if (anthropicRequest.tools && anthropicRequest.tools.length > 0) {
|
|
542
|
+
const guidance = 'When using tools, always include ALL required parameters with their complete values. Never omit parameters from tool calls.';
|
|
543
|
+
if (typeof anthropicRequest.system === 'string' && anthropicRequest.system) {
|
|
544
|
+
anthropicRequest.system = guidance + '\n\n' + anthropicRequest.system;
|
|
545
|
+
}
|
|
546
|
+
else if (Array.isArray(anthropicRequest.system)) {
|
|
547
|
+
anthropicRequest.system = [{ type: 'text', text: guidance }, ...anthropicRequest.system];
|
|
548
|
+
}
|
|
549
|
+
else {
|
|
550
|
+
anthropicRequest.system = guidance;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
279
553
|
// Find server for model
|
|
280
554
|
const server = await this.findServerForModel(modelName);
|
|
281
555
|
if (!server) {
|
|
@@ -333,30 +607,262 @@ class RouterServer {
|
|
|
333
607
|
const backendReq = http.request(options, (backendRes) => {
|
|
334
608
|
// Handle streaming vs non-streaming
|
|
335
609
|
if (anthropicRequest.stream) {
|
|
336
|
-
// For streaming, set SSE headers and pipe response
|
|
337
610
|
res.writeHead(backendRes.statusCode || 200, {
|
|
338
611
|
'Content-Type': 'text/event-stream',
|
|
339
612
|
'Cache-Control': 'no-cache',
|
|
340
613
|
'Connection': 'keep-alive',
|
|
341
614
|
});
|
|
342
|
-
//
|
|
343
|
-
|
|
615
|
+
// Buffer the full SSE stream so we can detect and fix Qwen3 model quirks before
|
|
616
|
+
// forwarding to the client. Headers are sent above but NO events are emitted until
|
|
617
|
+
// we've finished processing (enabling transparent retry for Fix 3).
|
|
618
|
+
let sseBuffer = '';
|
|
619
|
+
const rawEvents = [];
|
|
620
|
+
let messageStartData = null;
|
|
621
|
+
let messageDeltaData = null;
|
|
622
|
+
const parsedBlocks = {};
|
|
623
|
+
let outputTokens = 0;
|
|
624
|
+
backendRes.on('data', (chunk) => {
|
|
625
|
+
sseBuffer += chunk.toString();
|
|
626
|
+
const parts = sseBuffer.split('\n\n');
|
|
627
|
+
sseBuffer = parts.pop() ?? '';
|
|
628
|
+
for (const part of parts) {
|
|
629
|
+
if (!part.trim())
|
|
630
|
+
continue;
|
|
631
|
+
rawEvents.push(part);
|
|
632
|
+
let dataStr = '';
|
|
633
|
+
for (const line of part.split('\n')) {
|
|
634
|
+
if (line.startsWith('data: '))
|
|
635
|
+
dataStr = line.slice(6);
|
|
636
|
+
}
|
|
637
|
+
try {
|
|
638
|
+
const data = JSON.parse(dataStr);
|
|
639
|
+
if (data.type === 'message_start') {
|
|
640
|
+
messageStartData = data;
|
|
641
|
+
}
|
|
642
|
+
else if (data.type === 'content_block_start') {
|
|
643
|
+
const idx = data.index ?? 0;
|
|
644
|
+
parsedBlocks[idx] = {
|
|
645
|
+
type: data.content_block?.type ?? 'text',
|
|
646
|
+
content: '',
|
|
647
|
+
name: data.content_block?.name,
|
|
648
|
+
id: data.content_block?.id,
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
else if (data.type === 'content_block_delta') {
|
|
652
|
+
const block = parsedBlocks[data.index];
|
|
653
|
+
if (block) {
|
|
654
|
+
if (data.delta?.type === 'text_delta')
|
|
655
|
+
block.content += data.delta.text ?? '';
|
|
656
|
+
else if (data.delta?.type === 'thinking_delta')
|
|
657
|
+
block.content += data.delta.thinking ?? '';
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
else if (data.type === 'message_delta') {
|
|
661
|
+
messageDeltaData = data;
|
|
662
|
+
outputTokens = data.usage?.output_tokens ?? 0;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
catch {
|
|
666
|
+
// Non-JSON SSE data (e.g. ping) — still buffered in rawEvents
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
});
|
|
344
670
|
backendRes.on('end', async () => {
|
|
671
|
+
if (sseBuffer.trim())
|
|
672
|
+
rawEvents.push(sseBuffer);
|
|
673
|
+
const firstResult = {
|
|
674
|
+
rawEvents,
|
|
675
|
+
messageStartData,
|
|
676
|
+
stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
|
|
677
|
+
blocks: Object.values(parsedBlocks),
|
|
678
|
+
outputTokens,
|
|
679
|
+
};
|
|
680
|
+
let classified = classifyBufferedResult(firstResult);
|
|
681
|
+
let finalResult = firstResult;
|
|
682
|
+
if (classified.action === 'fix3') {
|
|
683
|
+
const skipped = classified.skippedNames ?? [];
|
|
684
|
+
// Only retry for single empty call glitches (random sampling failure).
|
|
685
|
+
// If 2+ empty calls were generated the model is in a stuck pattern — retry
|
|
686
|
+
// would just double the wait time with the same degenerate result.
|
|
687
|
+
if (skipped.length === 1) {
|
|
688
|
+
console.error(`[Router] Retrying single malformed XML call (attempted: ${skipped.join(', ')})`);
|
|
689
|
+
try {
|
|
690
|
+
const retryResult = await bufferSseRequest(options, requestBody);
|
|
691
|
+
const retryClassified = classifyBufferedResult(retryResult);
|
|
692
|
+
if (retryClassified.action !== 'fix3') {
|
|
693
|
+
classified = retryClassified;
|
|
694
|
+
finalResult = retryResult;
|
|
695
|
+
console.error(`[Router] Retry succeeded (action: ${retryClassified.action})`);
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
console.error(`[Router] Retry also malformed, giving up`);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
catch (err) {
|
|
702
|
+
console.error('[Router] Retry request failed:', err);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
else {
|
|
706
|
+
console.error(`[Router] Skipping retry — model stuck generating ${skipped.length} malformed calls (${skipped.join(', ')})`);
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
if (classified.action === 'fix1') {
|
|
710
|
+
console.error(`[Router] Converting ${classified.newBlocks.filter(b => b.type === 'tool_use').length} XML tool call(s) to tool_use blocks`);
|
|
711
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
|
|
712
|
+
}
|
|
713
|
+
else if (classified.action === 'fix2') {
|
|
714
|
+
console.error('[Router] Injecting fallback text block (thinking-only response detected)');
|
|
715
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
|
|
716
|
+
}
|
|
717
|
+
else if (classified.action === 'fix3') {
|
|
718
|
+
const skipped = classified.skippedNames ?? [];
|
|
719
|
+
const errorCycles = countConsecutiveErrorCycles(requestBody);
|
|
720
|
+
if (errorCycles >= 2) {
|
|
721
|
+
// Already tried error feedback twice — model is stuck, strip to avoid infinite loop
|
|
722
|
+
console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skipped.join(', ')})`);
|
|
723
|
+
const newBlocks = finalResult.blocks.filter(b => b.type === 'thinking');
|
|
724
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, finalResult.stopReason, finalResult.outputTokens);
|
|
725
|
+
}
|
|
726
|
+
else {
|
|
727
|
+
// Send empty tool_use blocks so Claude Code returns parameter errors for model self-correction
|
|
728
|
+
console.error(`[Router] Forwarding ${skipped.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
|
|
729
|
+
const emptyToolBlocks = skipped.map(name => ({
|
|
730
|
+
type: 'tool_use',
|
|
731
|
+
content: '',
|
|
732
|
+
name,
|
|
733
|
+
id: generateToolUseId(),
|
|
734
|
+
input: {},
|
|
735
|
+
}));
|
|
736
|
+
const newBlocks = [
|
|
737
|
+
...finalResult.blocks.filter(b => b.type === 'thinking'),
|
|
738
|
+
...emptyToolBlocks,
|
|
739
|
+
];
|
|
740
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, 'tool_use', finalResult.outputTokens);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
else if (classified.action === 'fix4') {
|
|
744
|
+
// Text + malformed tool calls
|
|
745
|
+
const skipped = classified.skippedNames ?? [];
|
|
746
|
+
const errorCycles = countConsecutiveErrorCycles(requestBody);
|
|
747
|
+
if (errorCycles >= 2) {
|
|
748
|
+
// Already tried error feedback twice — strip malformed calls, return just the text
|
|
749
|
+
console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skipped.join(', ')})`);
|
|
750
|
+
const textOnlyBlocks = classified.newBlocks.filter(b => b.type !== 'tool_use');
|
|
751
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, textOnlyBlocks, 'end_turn', finalResult.outputTokens);
|
|
752
|
+
}
|
|
753
|
+
else {
|
|
754
|
+
console.error(`[Router] Text + ${skipped.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
|
|
755
|
+
emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, 'tool_use', finalResult.outputTokens);
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
else {
|
|
759
|
+
// Raw passthrough
|
|
760
|
+
for (const event of finalResult.rawEvents) {
|
|
761
|
+
res.write(event + '\n\n');
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
res.end();
|
|
345
765
|
await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
|
|
346
766
|
resolve();
|
|
347
767
|
});
|
|
348
768
|
}
|
|
349
769
|
else {
|
|
350
|
-
//
|
|
770
|
+
// Non-streaming: collect full response then apply fixes
|
|
351
771
|
let responseData = '';
|
|
352
772
|
backendRes.on('data', (chunk) => {
|
|
353
773
|
responseData += chunk.toString();
|
|
354
774
|
});
|
|
355
775
|
backendRes.on('end', async () => {
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
776
|
+
let finalResponse = responseData;
|
|
777
|
+
try {
|
|
778
|
+
const responseObj = JSON.parse(responseData);
|
|
779
|
+
if (Array.isArray(responseObj.content)) {
|
|
780
|
+
const textBlocks = responseObj.content.filter((c) => c.type === 'text');
|
|
781
|
+
const allText = textBlocks.map((c) => c.text ?? '').join('');
|
|
782
|
+
const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
|
|
783
|
+
if (toolCalls.length > 0) {
|
|
784
|
+
// Fix 1: XML tool calls
|
|
785
|
+
console.error(`[Router] Converting ${toolCalls.length} XML tool call(s) to tool_use blocks`);
|
|
786
|
+
const newContent = responseObj.content.filter((c) => c.type !== 'text');
|
|
787
|
+
if (cleanText)
|
|
788
|
+
newContent.push({ type: 'text', text: cleanText });
|
|
789
|
+
for (const tc of toolCalls) {
|
|
790
|
+
newContent.push({ type: 'tool_use', id: generateToolUseId(), name: tc.name, input: tc.input });
|
|
791
|
+
}
|
|
792
|
+
responseObj.content = newContent;
|
|
793
|
+
responseObj.stop_reason = 'tool_use';
|
|
794
|
+
finalResponse = JSON.stringify(responseObj);
|
|
795
|
+
}
|
|
796
|
+
else if (allText && !cleanText) {
|
|
797
|
+
const errorCycles = countConsecutiveErrorCycles(requestBody);
|
|
798
|
+
// Fix 3: error feedback with loop detection
|
|
799
|
+
if (errorCycles >= 2) {
|
|
800
|
+
console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
|
|
801
|
+
responseObj.content = responseObj.content.filter((c) => c.type !== 'text');
|
|
802
|
+
finalResponse = JSON.stringify(responseObj);
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
console.error(`[Router] Forwarding ${skippedNames.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
|
|
806
|
+
const emptyToolUseBlocks = skippedNames.map(name => ({
|
|
807
|
+
type: 'tool_use',
|
|
808
|
+
id: generateToolUseId(),
|
|
809
|
+
name,
|
|
810
|
+
input: {},
|
|
811
|
+
}));
|
|
812
|
+
responseObj.content = [
|
|
813
|
+
...responseObj.content.filter((c) => c.type !== 'text'),
|
|
814
|
+
...emptyToolUseBlocks,
|
|
815
|
+
];
|
|
816
|
+
responseObj.stop_reason = 'tool_use';
|
|
817
|
+
finalResponse = JSON.stringify(responseObj);
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
else if (cleanText && skippedNames.length > 0) {
|
|
821
|
+
const errorCycles = countConsecutiveErrorCycles(requestBody);
|
|
822
|
+
// Fix 4: text + malformed tool calls with loop detection
|
|
823
|
+
if (errorCycles >= 2) {
|
|
824
|
+
console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
|
|
825
|
+
responseObj.content = [
|
|
826
|
+
...responseObj.content.filter((c) => c.type !== 'text'),
|
|
827
|
+
{ type: 'text', text: cleanText },
|
|
828
|
+
];
|
|
829
|
+
finalResponse = JSON.stringify(responseObj);
|
|
830
|
+
}
|
|
831
|
+
else {
|
|
832
|
+
console.error(`[Router] Text + ${skippedNames.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
|
|
833
|
+
const emptyToolUseBlocks = skippedNames.map(name => ({
|
|
834
|
+
type: 'tool_use',
|
|
835
|
+
id: generateToolUseId(),
|
|
836
|
+
name,
|
|
837
|
+
input: {},
|
|
838
|
+
}));
|
|
839
|
+
responseObj.content = [
|
|
840
|
+
...responseObj.content.filter((c) => c.type !== 'text'),
|
|
841
|
+
{ type: 'text', text: cleanText },
|
|
842
|
+
...emptyToolUseBlocks,
|
|
843
|
+
];
|
|
844
|
+
responseObj.stop_reason = 'tool_use';
|
|
845
|
+
finalResponse = JSON.stringify(responseObj);
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
else {
|
|
849
|
+
// Fix 2: Thinking-only
|
|
850
|
+
const hasText = responseObj.content.some((c) => c.type === 'text' && c.text);
|
|
851
|
+
const thinkingBlocks = responseObj.content.filter((c) => c.type === 'thinking');
|
|
852
|
+
if (!hasText && thinkingBlocks.length > 0) {
|
|
853
|
+
console.error('[Router] Injecting fallback text block (thinking-only response detected)');
|
|
854
|
+
const thinkingText = thinkingBlocks.map((c) => c.thinking ?? '').join('\n');
|
|
855
|
+
responseObj.content.push({ type: 'text', text: thinkingText });
|
|
856
|
+
finalResponse = JSON.stringify(responseObj);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
catch {
|
|
862
|
+
// Not valid JSON or unexpected shape — forward original
|
|
863
|
+
}
|
|
864
|
+
res.writeHead(backendRes.statusCode || 200, { 'Content-Type': 'application/json' });
|
|
865
|
+
res.end(finalResponse);
|
|
360
866
|
await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
|
|
361
867
|
resolve();
|
|
362
868
|
});
|
|
@@ -678,6 +1184,14 @@ class RouterServer {
|
|
|
678
1184
|
*/
|
|
679
1185
|
async findServerForModel(modelName) {
|
|
680
1186
|
const servers = await this.getAllServers();
|
|
1187
|
+
// 1. Check aliases first (exact match, case-sensitive)
|
|
1188
|
+
const aliasMatch = servers.find(s => s.alias === modelName);
|
|
1189
|
+
if (aliasMatch)
|
|
1190
|
+
return aliasMatch;
|
|
1191
|
+
// 2. Check aliases with case-insensitive matching
|
|
1192
|
+
const aliasMatchCaseInsensitive = servers.find(s => s.alias && s.alias.toLowerCase() === modelName.toLowerCase());
|
|
1193
|
+
if (aliasMatchCaseInsensitive)
|
|
1194
|
+
return aliasMatchCaseInsensitive;
|
|
681
1195
|
// Normalize a model name for flexible matching (lowercase, no extension, normalize separators)
|
|
682
1196
|
const normalize = (name) => {
|
|
683
1197
|
return name
|
|
@@ -686,22 +1200,22 @@ class RouterServer {
|
|
|
686
1200
|
.replace(/[_-]/g, '-'); // Normalize underscores and hyphens to hyphens
|
|
687
1201
|
};
|
|
688
1202
|
const normalizedRequest = normalize(modelName);
|
|
689
|
-
// Try exact match
|
|
1203
|
+
// 3. Try exact model name match
|
|
690
1204
|
const exactMatch = servers.find(s => s.modelName === modelName);
|
|
691
1205
|
if (exactMatch)
|
|
692
1206
|
return exactMatch;
|
|
693
|
-
// Try case-insensitive match
|
|
1207
|
+
// 4. Try case-insensitive model name match
|
|
694
1208
|
const caseInsensitiveMatch = servers.find(s => s.modelName.toLowerCase() === modelName.toLowerCase());
|
|
695
1209
|
if (caseInsensitiveMatch)
|
|
696
1210
|
return caseInsensitiveMatch;
|
|
697
|
-
// Try adding .gguf extension if not present
|
|
1211
|
+
// 5. Try adding .gguf extension if not present
|
|
698
1212
|
if (!modelName.endsWith('.gguf')) {
|
|
699
1213
|
const withExtension = modelName + '.gguf';
|
|
700
1214
|
const extensionMatch = servers.find(s => s.modelName.toLowerCase() === withExtension.toLowerCase());
|
|
701
1215
|
if (extensionMatch)
|
|
702
1216
|
return extensionMatch;
|
|
703
1217
|
}
|
|
704
|
-
// Try normalized matching (handles case, extension, and underscore/hyphen variations)
|
|
1218
|
+
// 6. Try normalized matching (handles case, extension, and underscore/hyphen variations)
|
|
705
1219
|
const normalizedMatch = servers.find(s => normalize(s.modelName) === normalizedRequest);
|
|
706
1220
|
if (normalizedMatch)
|
|
707
1221
|
return normalizedMatch;
|