@appkit/llamacpp-cli 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,216 @@ const fs = __importStar(require("fs/promises"));
42
42
  const path = __importStar(require("path"));
43
43
  const file_utils_1 = require("../utils/file-utils");
44
44
  const router_logger_1 = require("./router-logger");
45
+ /**
46
+ * Parse Qwen3-Coder XML tool calls from text content.
47
+ * Handles: <tool_call><function=NAME\n<parameter=P>V</parameter></function></tool_call>
48
+ * Returns extracted tool calls and cleaned text (XML removed).
49
+ */
50
+ function parseXmlToolCalls(text) {
51
+ const toolCalls = [];
52
+ const skippedNames = [];
53
+ const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
54
+ let match;
55
+ while ((match = toolCallRegex.exec(text)) !== null) {
56
+ const inner = match[1];
57
+ const funcMatch = /<function=(\w+)/.exec(inner);
58
+ if (!funcMatch)
59
+ continue;
60
+ const name = funcMatch[1];
61
+ const input = {};
62
+ const paramRegex = /<parameter=(\w+)>([\s\S]*?)<\/parameter>/g;
63
+ let paramMatch;
64
+ while ((paramMatch = paramRegex.exec(inner)) !== null) {
65
+ input[paramMatch[1]] = paramMatch[2].trim();
66
+ }
67
+ // Skip malformed tool calls with no parameters (model generation failure)
68
+ if (Object.keys(input).length === 0) {
69
+ skippedNames.push(name);
70
+ continue;
71
+ }
72
+ toolCalls.push({ name, input });
73
+ }
74
+ const cleanText = text.replace(/<tool_call>[\s\S]*?<\/tool_call>/g, '').trim();
75
+ return { toolCalls, cleanText, skippedNames };
76
+ }
77
+ function generateToolUseId() {
78
+ return 'toolu_' + Array.from({ length: 16 }, () => Math.floor(Math.random() * 16).toString(16)).join('');
79
+ }
80
+ /**
81
+ * Count how many consecutive recent user messages contained only error tool_results.
82
+ * Used to detect infinite error-feedback loops: if >= 2, stop sending error feedback and strip.
83
+ */
84
+ function countConsecutiveErrorCycles(requestBody) {
85
+ try {
86
+ const body = JSON.parse(requestBody);
87
+ const messages = body.messages ?? [];
88
+ let count = 0;
89
+ for (let i = messages.length - 1; i >= 0; i--) {
90
+ const msg = messages[i];
91
+ if (msg.role === 'assistant')
92
+ continue; // skip assistant turns
93
+ if (msg.role !== 'user')
94
+ break;
95
+ const content = Array.isArray(msg.content) ? msg.content : [];
96
+ const toolResults = content.filter((c) => c.type === 'tool_result');
97
+ if (toolResults.length === 0)
98
+ break; // non-tool user message, stop
99
+ if (toolResults.every((c) => c.is_error)) {
100
+ count++;
101
+ }
102
+ else {
103
+ break; // mixed or all-success results, stop counting
104
+ }
105
+ }
106
+ return count;
107
+ }
108
+ catch {
109
+ return 0;
110
+ }
111
+ }
112
+ function emitSseEvent(res, eventType, data) {
113
+ res.write(`event: ${eventType}\ndata: ${JSON.stringify(data)}\n\n`);
114
+ }
115
+ /**
116
+ * Emit a fully reconstructed SSE stream from parsed block state.
117
+ * Used when the original stream needs modification (XML tool calls or thinking-only).
118
+ */
119
+ function emitReconstructedSseStream(res, messageStartData, blocks, stopReason, outputTokens) {
120
+ if (messageStartData) {
121
+ emitSseEvent(res, 'message_start', messageStartData);
122
+ }
123
+ let idx = 0;
124
+ for (const block of blocks) {
125
+ if (block.type === 'text') {
126
+ if (!block.content)
127
+ continue;
128
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'text', text: '' } });
129
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'text_delta', text: block.content } });
130
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
131
+ }
132
+ else if (block.type === 'thinking') {
133
+ if (!block.content)
134
+ continue;
135
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'thinking', thinking: '' } });
136
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'thinking_delta', thinking: block.content } });
137
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
138
+ }
139
+ else if (block.type === 'tool_use') {
140
+ emitSseEvent(res, 'content_block_start', { type: 'content_block_start', index: idx, content_block: { type: 'tool_use', id: block.id, name: block.name, input: {} } });
141
+ emitSseEvent(res, 'content_block_delta', { type: 'content_block_delta', index: idx, delta: { type: 'input_json_delta', partial_json: JSON.stringify(block.input ?? {}) } });
142
+ emitSseEvent(res, 'content_block_stop', { type: 'content_block_stop', index: idx });
143
+ }
144
+ idx++;
145
+ }
146
+ emitSseEvent(res, 'message_delta', { type: 'message_delta', delta: { stop_reason: stopReason, stop_sequence: null }, usage: { output_tokens: outputTokens } });
147
+ emitSseEvent(res, 'message_stop', { type: 'message_stop' });
148
+ }
149
+ /**
150
+ * Make a backend HTTP request, buffer the full SSE stream, and return parsed state.
151
+ * Used for the initial request AND for retries when the model generates malformed output.
152
+ */
153
+ function bufferSseRequest(options, requestBody) {
154
+ return new Promise((resolve, reject) => {
155
+ const req = http.request(options, (res) => {
156
+ let sseBuffer = '';
157
+ const rawEvents = [];
158
+ let messageStartData = null;
159
+ let messageDeltaData = null;
160
+ const parsedBlocks = {};
161
+ let outputTokens = 0;
162
+ res.on('data', (chunk) => {
163
+ sseBuffer += chunk.toString();
164
+ const parts = sseBuffer.split('\n\n');
165
+ sseBuffer = parts.pop() ?? '';
166
+ for (const part of parts) {
167
+ if (!part.trim())
168
+ continue;
169
+ rawEvents.push(part);
170
+ let dataStr = '';
171
+ for (const line of part.split('\n')) {
172
+ if (line.startsWith('data: '))
173
+ dataStr = line.slice(6);
174
+ }
175
+ try {
176
+ const data = JSON.parse(dataStr);
177
+ if (data.type === 'message_start')
178
+ messageStartData = data;
179
+ else if (data.type === 'content_block_start') {
180
+ const idx = data.index ?? 0;
181
+ parsedBlocks[idx] = { type: data.content_block?.type ?? 'text', content: '', name: data.content_block?.name, id: data.content_block?.id };
182
+ }
183
+ else if (data.type === 'content_block_delta') {
184
+ const block = parsedBlocks[data.index];
185
+ if (block) {
186
+ if (data.delta?.type === 'text_delta')
187
+ block.content += data.delta.text ?? '';
188
+ else if (data.delta?.type === 'thinking_delta')
189
+ block.content += data.delta.thinking ?? '';
190
+ }
191
+ }
192
+ else if (data.type === 'message_delta') {
193
+ messageDeltaData = data;
194
+ outputTokens = data.usage?.output_tokens ?? 0;
195
+ }
196
+ }
197
+ catch { /* non-JSON SSE (ping etc.) */ }
198
+ }
199
+ });
200
+ res.on('end', () => {
201
+ if (sseBuffer.trim())
202
+ rawEvents.push(sseBuffer);
203
+ resolve({
204
+ rawEvents,
205
+ messageStartData,
206
+ stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
207
+ blocks: Object.values(parsedBlocks),
208
+ outputTokens,
209
+ });
210
+ });
211
+ res.on('error', reject);
212
+ });
213
+ req.on('error', reject);
214
+ req.write(requestBody);
215
+ req.end();
216
+ });
217
+ }
218
+ /**
219
+ * Apply Qwen3 model quirk fixes to a buffered SSE result.
220
+ * Returns the action to take (what to emit) without actually emitting anything.
221
+ */
222
+ function classifyBufferedResult(result) {
223
+ const { blocks, stopReason } = result;
224
+ const textBlocks = blocks.filter(b => b.type === 'text');
225
+ const thinkingBlocks = blocks.filter(b => b.type === 'thinking');
226
+ const allText = textBlocks.map(b => b.content).join('');
227
+ const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
228
+ if (toolCalls.length > 0) {
229
+ const newBlocks = [
230
+ ...blocks.filter(b => b.type !== 'text'),
231
+ ...(cleanText ? [{ type: 'text', content: cleanText }] : []),
232
+ ...toolCalls.map(tc => ({ type: 'tool_use', content: '', id: generateToolUseId(), name: tc.name, input: tc.input })),
233
+ ];
234
+ return { action: 'fix1', newBlocks, stopReason: 'tool_use' };
235
+ }
236
+ if (allText && !cleanText) {
237
+ return { action: 'fix3', skippedNames };
238
+ }
239
+ // fix4: text + malformed tool calls — emit text and empty tool_use blocks for error feedback
240
+ if (cleanText && skippedNames.length > 0) {
241
+ const newBlocks = [
242
+ ...blocks.filter(b => b.type !== 'text'),
243
+ { type: 'text', content: cleanText },
244
+ ...skippedNames.map(name => ({ type: 'tool_use', content: '', id: generateToolUseId(), name, input: {} })),
245
+ ];
246
+ return { action: 'fix4', newBlocks, stopReason: 'tool_use', skippedNames };
247
+ }
248
+ if (stopReason === 'end_turn' && thinkingBlocks.length > 0 && !textBlocks.some(b => b.content)) {
249
+ const thinkingText = thinkingBlocks.map(b => b.content).join('\n');
250
+ const newBlocks = [...thinkingBlocks, { type: 'text', content: thinkingText }];
251
+ return { action: 'fix2', newBlocks, stopReason: 'end_turn' };
252
+ }
253
+ return { action: 'raw' };
254
+ }
45
255
  /**
46
256
  * Router HTTP server - proxies requests to backend llama.cpp servers
47
257
  */
@@ -110,6 +320,9 @@ class RouterServer {
110
320
  else if (url === '/health' && method === 'GET') {
111
321
  await this.handleHealth(req, res);
112
322
  }
323
+ else if (url.startsWith('/props') && method === 'GET') {
324
+ await this.handleProps(req, res, url);
325
+ }
113
326
  else if (url === '/v1/models' && method === 'GET') {
114
327
  await this.handleModels(req, res);
115
328
  }
@@ -149,6 +362,51 @@ class RouterServer {
149
362
  timestamp: new Date().toISOString(),
150
363
  }));
151
364
  }
365
+ /**
366
+ * Proxy llama.cpp's /props to a backend server. Pass ?model=<name> to
367
+ * select which backend; otherwise picks the first running server.
368
+ * Used by clients (e.g. lcode) to discover the loaded n_ctx.
369
+ */
370
+ async handleProps(req, res, url) {
371
+ const query = new url_1.URL(url, 'http://localhost').searchParams;
372
+ const requestedModel = query.get('model');
373
+ const servers = await this.getAllServers();
374
+ const running = servers.filter((s) => s.status === 'running');
375
+ const target = requestedModel
376
+ ? await this.findServerForModel(requestedModel)
377
+ : running[0] ?? null;
378
+ if (!target || target.status !== 'running') {
379
+ this.sendError(res, 404, 'Not Found', requestedModel
380
+ ? `No running server for model: ${requestedModel}`
381
+ : 'No running servers');
382
+ return;
383
+ }
384
+ const host = target.host === '0.0.0.0' ? '127.0.0.1' : target.host;
385
+ const backendReq = http.request({
386
+ hostname: host,
387
+ port: target.port,
388
+ path: '/props',
389
+ method: 'GET',
390
+ timeout: this.config.requestTimeout,
391
+ }, (backendRes) => {
392
+ res.writeHead(backendRes.statusCode || 200, {
393
+ 'Content-Type': backendRes.headers['content-type'] ?? 'application/json',
394
+ });
395
+ backendRes.pipe(res);
396
+ });
397
+ backendReq.on('error', (err) => {
398
+ if (!res.headersSent) {
399
+ this.sendError(res, 502, 'Bad Gateway', `Backend /props failed: ${err.message}`);
400
+ }
401
+ });
402
+ backendReq.on('timeout', () => {
403
+ backendReq.destroy();
404
+ if (!res.headersSent) {
405
+ this.sendError(res, 504, 'Gateway Timeout', 'Backend /props did not respond in time');
406
+ }
407
+ });
408
+ backendReq.end();
409
+ }
152
410
  /**
153
411
  * List models endpoint - aggregate from all running servers
154
412
  */
@@ -278,6 +536,20 @@ class RouterServer {
278
536
  await this.logRequest(modelName, '/v1/messages', statusCode, timer.elapsed(), errorMsg, undefined, promptPreview);
279
537
  return;
280
538
  }
539
+ // Inject tool call guidance when tools are present (Qwen3-Coder workaround:
540
+ // the model sometimes generates tool calls with no parameters when context is long)
541
+ if (anthropicRequest.tools && anthropicRequest.tools.length > 0) {
542
+ const guidance = 'When using tools, always include ALL required parameters with their complete values. Never omit parameters from tool calls.';
543
+ if (typeof anthropicRequest.system === 'string' && anthropicRequest.system) {
544
+ anthropicRequest.system = guidance + '\n\n' + anthropicRequest.system;
545
+ }
546
+ else if (Array.isArray(anthropicRequest.system)) {
547
+ anthropicRequest.system = [{ type: 'text', text: guidance }, ...anthropicRequest.system];
548
+ }
549
+ else {
550
+ anthropicRequest.system = guidance;
551
+ }
552
+ }
281
553
  // Find server for model
282
554
  const server = await this.findServerForModel(modelName);
283
555
  if (!server) {
@@ -335,30 +607,262 @@ class RouterServer {
335
607
  const backendReq = http.request(options, (backendRes) => {
336
608
  // Handle streaming vs non-streaming
337
609
  if (anthropicRequest.stream) {
338
- // For streaming, set SSE headers and pipe response
339
610
  res.writeHead(backendRes.statusCode || 200, {
340
611
  'Content-Type': 'text/event-stream',
341
612
  'Cache-Control': 'no-cache',
342
613
  'Connection': 'keep-alive',
343
614
  });
344
- // Pipe response directly (llama.cpp sends correct Anthropic SSE format)
345
- backendRes.pipe(res);
615
+ // Buffer the full SSE stream so we can detect and fix Qwen3 model quirks before
616
+ // forwarding to the client. Headers are sent above but NO events are emitted until
617
+ // we've finished processing (enabling transparent retry for Fix 3).
618
+ let sseBuffer = '';
619
+ const rawEvents = [];
620
+ let messageStartData = null;
621
+ let messageDeltaData = null;
622
+ const parsedBlocks = {};
623
+ let outputTokens = 0;
624
+ backendRes.on('data', (chunk) => {
625
+ sseBuffer += chunk.toString();
626
+ const parts = sseBuffer.split('\n\n');
627
+ sseBuffer = parts.pop() ?? '';
628
+ for (const part of parts) {
629
+ if (!part.trim())
630
+ continue;
631
+ rawEvents.push(part);
632
+ let dataStr = '';
633
+ for (const line of part.split('\n')) {
634
+ if (line.startsWith('data: '))
635
+ dataStr = line.slice(6);
636
+ }
637
+ try {
638
+ const data = JSON.parse(dataStr);
639
+ if (data.type === 'message_start') {
640
+ messageStartData = data;
641
+ }
642
+ else if (data.type === 'content_block_start') {
643
+ const idx = data.index ?? 0;
644
+ parsedBlocks[idx] = {
645
+ type: data.content_block?.type ?? 'text',
646
+ content: '',
647
+ name: data.content_block?.name,
648
+ id: data.content_block?.id,
649
+ };
650
+ }
651
+ else if (data.type === 'content_block_delta') {
652
+ const block = parsedBlocks[data.index];
653
+ if (block) {
654
+ if (data.delta?.type === 'text_delta')
655
+ block.content += data.delta.text ?? '';
656
+ else if (data.delta?.type === 'thinking_delta')
657
+ block.content += data.delta.thinking ?? '';
658
+ }
659
+ }
660
+ else if (data.type === 'message_delta') {
661
+ messageDeltaData = data;
662
+ outputTokens = data.usage?.output_tokens ?? 0;
663
+ }
664
+ }
665
+ catch {
666
+ // Non-JSON SSE data (e.g. ping) — still buffered in rawEvents
667
+ }
668
+ }
669
+ });
346
670
  backendRes.on('end', async () => {
671
+ if (sseBuffer.trim())
672
+ rawEvents.push(sseBuffer);
673
+ const firstResult = {
674
+ rawEvents,
675
+ messageStartData,
676
+ stopReason: messageDeltaData?.delta?.stop_reason ?? 'end_turn',
677
+ blocks: Object.values(parsedBlocks),
678
+ outputTokens,
679
+ };
680
+ let classified = classifyBufferedResult(firstResult);
681
+ let finalResult = firstResult;
682
+ if (classified.action === 'fix3') {
683
+ const skipped = classified.skippedNames ?? [];
684
+ // Only retry for single empty call glitches (random sampling failure).
685
+ // If 2+ empty calls were generated the model is in a stuck pattern — retry
686
+ // would just double the wait time with the same degenerate result.
687
+ if (skipped.length === 1) {
688
+ console.error(`[Router] Retrying single malformed XML call (attempted: ${skipped.join(', ')})`);
689
+ try {
690
+ const retryResult = await bufferSseRequest(options, requestBody);
691
+ const retryClassified = classifyBufferedResult(retryResult);
692
+ if (retryClassified.action !== 'fix3') {
693
+ classified = retryClassified;
694
+ finalResult = retryResult;
695
+ console.error(`[Router] Retry succeeded (action: ${retryClassified.action})`);
696
+ }
697
+ else {
698
+ console.error(`[Router] Retry also malformed, giving up`);
699
+ }
700
+ }
701
+ catch (err) {
702
+ console.error('[Router] Retry request failed:', err);
703
+ }
704
+ }
705
+ else {
706
+ console.error(`[Router] Skipping retry — model stuck generating ${skipped.length} malformed calls (${skipped.join(', ')})`);
707
+ }
708
+ }
709
+ if (classified.action === 'fix1') {
710
+ console.error(`[Router] Converting ${classified.newBlocks.filter(b => b.type === 'tool_use').length} XML tool call(s) to tool_use blocks`);
711
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
712
+ }
713
+ else if (classified.action === 'fix2') {
714
+ console.error('[Router] Injecting fallback text block (thinking-only response detected)');
715
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, classified.stopReason, finalResult.outputTokens);
716
+ }
717
+ else if (classified.action === 'fix3') {
718
+ const skipped = classified.skippedNames ?? [];
719
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
720
+ if (errorCycles >= 2) {
721
+ // Already tried error feedback twice — model is stuck, strip to avoid infinite loop
722
+ console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skipped.join(', ')})`);
723
+ const newBlocks = finalResult.blocks.filter(b => b.type === 'thinking');
724
+ emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, finalResult.stopReason, finalResult.outputTokens);
725
+ }
726
+ else {
727
+ // Send empty tool_use blocks so Claude Code returns parameter errors for model self-correction
728
+ console.error(`[Router] Forwarding ${skipped.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
729
+ const emptyToolBlocks = skipped.map(name => ({
730
+ type: 'tool_use',
731
+ content: '',
732
+ name,
733
+ id: generateToolUseId(),
734
+ input: {},
735
+ }));
736
+ const newBlocks = [
737
+ ...finalResult.blocks.filter(b => b.type === 'thinking'),
738
+ ...emptyToolBlocks,
739
+ ];
740
+ emitReconstructedSseStream(res, finalResult.messageStartData, newBlocks, 'tool_use', finalResult.outputTokens);
741
+ }
742
+ }
743
+ else if (classified.action === 'fix4') {
744
+ // Text + malformed tool calls
745
+ const skipped = classified.skippedNames ?? [];
746
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
747
+ if (errorCycles >= 2) {
748
+ // Already tried error feedback twice — strip malformed calls, return just the text
749
+ console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skipped.join(', ')})`);
750
+ const textOnlyBlocks = classified.newBlocks.filter(b => b.type !== 'tool_use');
751
+ emitReconstructedSseStream(res, finalResult.messageStartData, textOnlyBlocks, 'end_turn', finalResult.outputTokens);
752
+ }
753
+ else {
754
+ console.error(`[Router] Text + ${skipped.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skipped.join(', ')})`);
755
+ emitReconstructedSseStream(res, finalResult.messageStartData, classified.newBlocks, 'tool_use', finalResult.outputTokens);
756
+ }
757
+ }
758
+ else {
759
+ // Raw passthrough
760
+ for (const event of finalResult.rawEvents) {
761
+ res.write(event + '\n\n');
762
+ }
763
+ }
764
+ res.end();
347
765
  await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
348
766
  resolve();
349
767
  });
350
768
  }
351
769
  else {
352
- // For non-streaming, collect response and forward
770
+ // Non-streaming: collect full response then apply fixes
353
771
  let responseData = '';
354
772
  backendRes.on('data', (chunk) => {
355
773
  responseData += chunk.toString();
356
774
  });
357
775
  backendRes.on('end', async () => {
358
- res.writeHead(backendRes.statusCode || 200, {
359
- 'Content-Type': 'application/json',
360
- });
361
- res.end(responseData);
776
+ let finalResponse = responseData;
777
+ try {
778
+ const responseObj = JSON.parse(responseData);
779
+ if (Array.isArray(responseObj.content)) {
780
+ const textBlocks = responseObj.content.filter((c) => c.type === 'text');
781
+ const allText = textBlocks.map((c) => c.text ?? '').join('');
782
+ const { toolCalls, cleanText, skippedNames } = parseXmlToolCalls(allText);
783
+ if (toolCalls.length > 0) {
784
+ // Fix 1: XML tool calls
785
+ console.error(`[Router] Converting ${toolCalls.length} XML tool call(s) to tool_use blocks`);
786
+ const newContent = responseObj.content.filter((c) => c.type !== 'text');
787
+ if (cleanText)
788
+ newContent.push({ type: 'text', text: cleanText });
789
+ for (const tc of toolCalls) {
790
+ newContent.push({ type: 'tool_use', id: generateToolUseId(), name: tc.name, input: tc.input });
791
+ }
792
+ responseObj.content = newContent;
793
+ responseObj.stop_reason = 'tool_use';
794
+ finalResponse = JSON.stringify(responseObj);
795
+ }
796
+ else if (allText && !cleanText) {
797
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
798
+ // Fix 3: error feedback with loop detection
799
+ if (errorCycles >= 2) {
800
+ console.error(`[Router] Stripping fix3 after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
801
+ responseObj.content = responseObj.content.filter((c) => c.type !== 'text');
802
+ finalResponse = JSON.stringify(responseObj);
803
+ }
804
+ else {
805
+ console.error(`[Router] Forwarding ${skippedNames.length} empty tool_use block(s) for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
806
+ const emptyToolUseBlocks = skippedNames.map(name => ({
807
+ type: 'tool_use',
808
+ id: generateToolUseId(),
809
+ name,
810
+ input: {},
811
+ }));
812
+ responseObj.content = [
813
+ ...responseObj.content.filter((c) => c.type !== 'text'),
814
+ ...emptyToolUseBlocks,
815
+ ];
816
+ responseObj.stop_reason = 'tool_use';
817
+ finalResponse = JSON.stringify(responseObj);
818
+ }
819
+ }
820
+ else if (cleanText && skippedNames.length > 0) {
821
+ const errorCycles = countConsecutiveErrorCycles(requestBody);
822
+ // Fix 4: text + malformed tool calls with loop detection
823
+ if (errorCycles >= 2) {
824
+ console.error(`[Router] Stripping fix4 malformed call(s) after ${errorCycles} error cycles (${skippedNames.join(', ')})`);
825
+ responseObj.content = [
826
+ ...responseObj.content.filter((c) => c.type !== 'text'),
827
+ { type: 'text', text: cleanText },
828
+ ];
829
+ finalResponse = JSON.stringify(responseObj);
830
+ }
831
+ else {
832
+ console.error(`[Router] Text + ${skippedNames.length} malformed tool call(s), forwarding empty tool_use for error feedback [cycle ${errorCycles + 1}] (${skippedNames.join(', ')})`);
833
+ const emptyToolUseBlocks = skippedNames.map(name => ({
834
+ type: 'tool_use',
835
+ id: generateToolUseId(),
836
+ name,
837
+ input: {},
838
+ }));
839
+ responseObj.content = [
840
+ ...responseObj.content.filter((c) => c.type !== 'text'),
841
+ { type: 'text', text: cleanText },
842
+ ...emptyToolUseBlocks,
843
+ ];
844
+ responseObj.stop_reason = 'tool_use';
845
+ finalResponse = JSON.stringify(responseObj);
846
+ }
847
+ }
848
+ else {
849
+ // Fix 2: Thinking-only
850
+ const hasText = responseObj.content.some((c) => c.type === 'text' && c.text);
851
+ const thinkingBlocks = responseObj.content.filter((c) => c.type === 'thinking');
852
+ if (!hasText && thinkingBlocks.length > 0) {
853
+ console.error('[Router] Injecting fallback text block (thinking-only response detected)');
854
+ const thinkingText = thinkingBlocks.map((c) => c.thinking ?? '').join('\n');
855
+ responseObj.content.push({ type: 'text', text: thinkingText });
856
+ finalResponse = JSON.stringify(responseObj);
857
+ }
858
+ }
859
+ }
860
+ }
861
+ catch {
862
+ // Not valid JSON or unexpected shape — forward original
863
+ }
864
+ res.writeHead(backendRes.statusCode || 200, { 'Content-Type': 'application/json' });
865
+ res.end(finalResponse);
362
866
  await this.logRequest(modelName, '/v1/messages', backendRes.statusCode || 200, timer.elapsed(), undefined, `${server.host}:${server.port}`, promptPreview);
363
867
  resolve();
364
868
  });