lynkr 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -116,9 +116,9 @@ Lynkr reduces AI costs by **60-80%** through intelligent token optimization:
116
116
 
117
117
  ### IDE Integration
118
118
  - ✅ **Claude Code CLI** - Drop-in replacement for Anthropic backend
119
- - ✅ **Cursor IDE** - Full OpenAI API compatibility
119
+ - ✅ **Cursor IDE** - Full OpenAI API compatibility (Requires Cursor Pro)
120
120
  - ✅ **Continue.dev** - Works with any OpenAI-compatible client
121
- - ✅ **All Features Work** - Chat, file operations, tool calling, streaming
121
+ - ✅ **Cline +VSCode** - Confgiure it similar to cursor in openai compatible section
122
122
 
123
123
  ### Advanced Capabilities
124
124
  - 🧠 **Long-Term Memory** - Titans-inspired memory system with surprise-based filtering
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "4.1.0",
3
+ "version": "4.2.0",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -900,6 +900,193 @@ router.post("/embeddings", async (req, res) => {
900
900
  }
901
901
  });
902
902
 
903
+ /**
904
+ * POST /v1/responses
905
+ *
906
+ * OpenAI Responses API endpoint (used by GPT-5-Codex and newer models).
907
+ * Converts Responses API format to Chat Completions → processes → converts back.
908
+ */
909
+ router.post("/responses", async (req, res) => {
910
+ const startTime = Date.now();
911
+ const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "responses-session";
912
+
913
+ try {
914
+ const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
915
+
916
+ // Comprehensive debug logging
917
+ logger.info({
918
+ endpoint: "/v1/responses",
919
+ inputType: typeof req.body.input,
920
+ inputIsArray: Array.isArray(req.body.input),
921
+ inputLength: Array.isArray(req.body.input) ? req.body.input.length : req.body.input?.length,
922
+ inputPreview: typeof req.body.input === 'string'
923
+ ? req.body.input.substring(0, 100)
924
+ : Array.isArray(req.body.input)
925
+ ? req.body.input.map(m => ({role: m?.role, hasContent: !!m?.content, hasTool: !!m?.tool_calls}))
926
+ : 'unknown',
927
+ model: req.body.model,
928
+ hasTools: !!req.body.tools,
929
+ stream: req.body.stream || false,
930
+ fullRequestBodyKeys: Object.keys(req.body)
931
+ }, "=== RESPONSES API REQUEST ===");
932
+
933
+ // Convert Responses API to Chat Completions format
934
+ const chatRequest = convertResponsesToChat(req.body);
935
+
936
+ logger.info({
937
+ chatRequestMessageCount: chatRequest.messages?.length,
938
+ chatRequestMessages: chatRequest.messages?.map(m => ({
939
+ role: m.role,
940
+ hasContent: !!m.content,
941
+ contentPreview: typeof m.content === 'string' ? m.content.substring(0, 50) : m.content
942
+ }))
943
+ }, "After Responses→Chat conversion");
944
+
945
+ // Convert to Anthropic format
946
+ const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
947
+
948
+ logger.info({
949
+ anthropicMessageCount: anthropicRequest.messages?.length,
950
+ anthropicMessages: anthropicRequest.messages?.map(m => ({
951
+ role: m.role,
952
+ hasContent: !!m.content
953
+ }))
954
+ }, "After Chat→Anthropic conversion");
955
+
956
+ // Get session
957
+ const session = getSession(sessionId);
958
+
959
+ // Handle streaming vs non-streaming
960
+ if (req.body.stream) {
961
+ // Set up SSE headers for streaming
962
+ res.setHeader("Content-Type", "text/event-stream");
963
+ res.setHeader("Cache-Control", "no-cache");
964
+ res.setHeader("Connection", "keep-alive");
965
+
966
+ try {
967
+ // Force non-streaming from orchestrator
968
+ anthropicRequest.stream = false;
969
+
970
+ const result = await orchestrator.processMessage({
971
+ payload: anthropicRequest,
972
+ headers: req.headers,
973
+ session: session,
974
+ options: {
975
+ maxSteps: req.body?.max_steps
976
+ }
977
+ });
978
+
979
+ // Convert back: Anthropic → OpenAI → Responses
980
+ const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
981
+ const responsesResponse = convertChatToResponses(chatResponse);
982
+
983
+ // Simulate streaming using OpenAI Responses API SSE format
984
+ const content = responsesResponse.content || "";
985
+ const words = content.split(" ");
986
+
987
+ // Send response.created event
988
+ const createdEvent = {
989
+ id: responsesResponse.id,
990
+ object: "response.created",
991
+ created: responsesResponse.created,
992
+ model: req.body.model
993
+ };
994
+ res.write(`event: response.created\n`);
995
+ res.write(`data: ${JSON.stringify(createdEvent)}\n\n`);
996
+
997
+ // Send content in word chunks using response.output_text.delta
998
+ for (let i = 0; i < words.length; i++) {
999
+ const word = words[i] + (i < words.length - 1 ? " " : "");
1000
+ const deltaEvent = {
1001
+ id: responsesResponse.id,
1002
+ object: "response.output_text.delta",
1003
+ delta: word,
1004
+ created: responsesResponse.created
1005
+ };
1006
+ res.write(`event: response.output_text.delta\n`);
1007
+ res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
1008
+ }
1009
+
1010
+ // Send response.completed event
1011
+ const completedEvent = {
1012
+ id: responsesResponse.id,
1013
+ object: "response.completed",
1014
+ created: responsesResponse.created,
1015
+ model: req.body.model,
1016
+ content: content,
1017
+ stop_reason: responsesResponse.stop_reason,
1018
+ usage: responsesResponse.usage
1019
+ };
1020
+ res.write(`event: response.completed\n`);
1021
+ res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
1022
+
1023
+ // Optional: Send [DONE] marker
1024
+ res.write("data: [DONE]\n\n");
1025
+ res.end();
1026
+
1027
+ logger.info({
1028
+ duration: Date.now() - startTime,
1029
+ mode: "streaming",
1030
+ contentLength: content.length
1031
+ }, "=== RESPONSES API STREAMING COMPLETE ===");
1032
+
1033
+ } catch (streamError) {
1034
+ logger.error({ error: streamError.message, stack: streamError.stack }, "Responses API streaming error");
1035
+
1036
+ // Send error via SSE
1037
+ res.write(`data: ${JSON.stringify({
1038
+ error: {
1039
+ message: streamError.message || "Internal server error",
1040
+ type: "server_error",
1041
+ code: "internal_error"
1042
+ }
1043
+ })}\n\n`);
1044
+ res.end();
1045
+ }
1046
+
1047
+ } else {
1048
+ // Non-streaming response
1049
+ anthropicRequest.stream = false;
1050
+
1051
+ const result = await orchestrator.processMessage({
1052
+ payload: anthropicRequest,
1053
+ headers: req.headers,
1054
+ session: session,
1055
+ options: {
1056
+ maxSteps: req.body?.max_steps
1057
+ }
1058
+ });
1059
+
1060
+ // Convert back: Anthropic → OpenAI → Responses
1061
+ const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
1062
+ const responsesResponse = convertChatToResponses(chatResponse);
1063
+
1064
+ logger.info({
1065
+ duration: Date.now() - startTime,
1066
+ contentLength: responsesResponse.content?.length || 0,
1067
+ stopReason: responsesResponse.stop_reason
1068
+ }, "=== RESPONSES API RESPONSE ===");
1069
+
1070
+ res.json(responsesResponse);
1071
+ }
1072
+
1073
+ } catch (error) {
1074
+ logger.error({
1075
+ error: error.message,
1076
+ stack: error.stack,
1077
+ duration: Date.now() - startTime
1078
+ }, "Responses API error");
1079
+
1080
+ res.status(500).json({
1081
+ error: {
1082
+ message: error.message || "Internal server error",
1083
+ type: "server_error",
1084
+ code: "internal_error"
1085
+ }
1086
+ });
1087
+ }
1088
+ });
1089
+
903
1090
  /**
904
1091
  * GET /v1/health
905
1092
  *
package/src/api/router.js CHANGED
@@ -180,17 +180,93 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
180
180
  }
181
181
  }
182
182
 
183
- // Fallback: if no stream, wrap buffered response in SSE (old behavior)
184
- const eventPayload = {
185
- type: "message",
186
- message: result.body,
187
- };
188
- res.write(`event: message\n`);
189
- res.write(`data: ${JSON.stringify(eventPayload)}\n\n`);
190
- res.write(`event: end\n`);
191
- res.write(
192
- `data: ${JSON.stringify({ termination: result.terminationReason ?? "completion" })}\n\n`,
193
- );
183
+ // Fallback: if no stream, wrap buffered response in proper Anthropic SSE format
184
+ // Check if result.body exists
185
+ if (!result || !result.body) {
186
+ res.write(`event: error\n`);
187
+ res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
188
+ res.end();
189
+ return;
190
+ }
191
+
192
+ const msg = result.body;
193
+
194
+ // 1. message_start
195
+ res.write(`event: message_start\n`);
196
+ res.write(`data: ${JSON.stringify({
197
+ type: "message_start",
198
+ message: {
199
+ id: msg.id,
200
+ type: "message",
201
+ role: "assistant",
202
+ content: [],
203
+ model: msg.model,
204
+ stop_reason: null,
205
+ stop_sequence: null,
206
+ usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
207
+ }
208
+ })}\n\n`);
209
+
210
+ // 2. content_block_start and content_block_delta for each content block
211
+ const contentBlocks = msg.content || [];
212
+ for (let i = 0; i < contentBlocks.length; i++) {
213
+ const block = contentBlocks[i];
214
+
215
+ if (block.type === "text") {
216
+ res.write(`event: content_block_start\n`);
217
+ res.write(`data: ${JSON.stringify({
218
+ type: "content_block_start",
219
+ index: i,
220
+ content_block: { type: "text", text: "" }
221
+ })}\n\n`);
222
+
223
+ // Send text in chunks
224
+ const text = block.text || "";
225
+ const chunkSize = 20;
226
+ for (let j = 0; j < text.length; j += chunkSize) {
227
+ const chunk = text.slice(j, j + chunkSize);
228
+ res.write(`event: content_block_delta\n`);
229
+ res.write(`data: ${JSON.stringify({
230
+ type: "content_block_delta",
231
+ index: i,
232
+ delta: { type: "text_delta", text: chunk }
233
+ })}\n\n`);
234
+ }
235
+
236
+ res.write(`event: content_block_stop\n`);
237
+ res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
238
+ } else if (block.type === "tool_use") {
239
+ res.write(`event: content_block_start\n`);
240
+ res.write(`data: ${JSON.stringify({
241
+ type: "content_block_start",
242
+ index: i,
243
+ content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
244
+ })}\n\n`);
245
+
246
+ res.write(`event: content_block_delta\n`);
247
+ res.write(`data: ${JSON.stringify({
248
+ type: "content_block_delta",
249
+ index: i,
250
+ delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
251
+ })}\n\n`);
252
+
253
+ res.write(`event: content_block_stop\n`);
254
+ res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
255
+ }
256
+ }
257
+
258
+ // 3. message_delta with stop_reason
259
+ res.write(`event: message_delta\n`);
260
+ res.write(`data: ${JSON.stringify({
261
+ type: "message_delta",
262
+ delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
263
+ usage: { output_tokens: msg.usage?.output_tokens || 0 }
264
+ })}\n\n`);
265
+
266
+ // 4. message_stop
267
+ res.write(`event: message_stop\n`);
268
+ res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
269
+
194
270
  metrics.recordResponse(result.status);
195
271
  res.end();
196
272
  return;
@@ -219,17 +295,91 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
219
295
  res.flushHeaders();
220
296
  }
221
297
 
222
- const eventPayload = {
223
- type: "message",
224
- message: result.body,
225
- };
226
- res.write(`event: message\n`);
227
- res.write(`data: ${JSON.stringify(eventPayload)}\n\n`);
228
-
229
- res.write(`event: end\n`);
230
- res.write(
231
- `data: ${JSON.stringify({ termination: result.terminationReason ?? "completion" })}\n\n`,
232
- );
298
+ // Check if result.body exists
299
+ if (!result || !result.body) {
300
+ res.write(`event: error\n`);
301
+ res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
302
+ res.end();
303
+ return;
304
+ }
305
+
306
+ // Use proper Anthropic SSE format
307
+ const msg = result.body;
308
+
309
+ // 1. message_start
310
+ res.write(`event: message_start\n`);
311
+ res.write(`data: ${JSON.stringify({
312
+ type: "message_start",
313
+ message: {
314
+ id: msg.id,
315
+ type: "message",
316
+ role: "assistant",
317
+ content: [],
318
+ model: msg.model,
319
+ stop_reason: null,
320
+ stop_sequence: null,
321
+ usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
322
+ }
323
+ })}\n\n`);
324
+
325
+ // 2. content_block_start and content_block_delta for each content block
326
+ const contentBlocks = msg.content || [];
327
+ for (let i = 0; i < contentBlocks.length; i++) {
328
+ const block = contentBlocks[i];
329
+
330
+ if (block.type === "text") {
331
+ res.write(`event: content_block_start\n`);
332
+ res.write(`data: ${JSON.stringify({
333
+ type: "content_block_start",
334
+ index: i,
335
+ content_block: { type: "text", text: "" }
336
+ })}\n\n`);
337
+
338
+ const text = block.text || "";
339
+ const chunkSize = 20;
340
+ for (let j = 0; j < text.length; j += chunkSize) {
341
+ const chunk = text.slice(j, j + chunkSize);
342
+ res.write(`event: content_block_delta\n`);
343
+ res.write(`data: ${JSON.stringify({
344
+ type: "content_block_delta",
345
+ index: i,
346
+ delta: { type: "text_delta", text: chunk }
347
+ })}\n\n`);
348
+ }
349
+
350
+ res.write(`event: content_block_stop\n`);
351
+ res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
352
+ } else if (block.type === "tool_use") {
353
+ res.write(`event: content_block_start\n`);
354
+ res.write(`data: ${JSON.stringify({
355
+ type: "content_block_start",
356
+ index: i,
357
+ content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
358
+ })}\n\n`);
359
+
360
+ res.write(`event: content_block_delta\n`);
361
+ res.write(`data: ${JSON.stringify({
362
+ type: "content_block_delta",
363
+ index: i,
364
+ delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
365
+ })}\n\n`);
366
+
367
+ res.write(`event: content_block_stop\n`);
368
+ res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
369
+ }
370
+ }
371
+
372
+ // 3. message_delta with stop_reason
373
+ res.write(`event: message_delta\n`);
374
+ res.write(`data: ${JSON.stringify({
375
+ type: "message_delta",
376
+ delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
377
+ usage: { output_tokens: msg.usage?.output_tokens || 0 }
378
+ })}\n\n`);
379
+
380
+ // 4. message_stop
381
+ res.write(`event: message_stop\n`);
382
+ res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
233
383
 
234
384
  metrics.recordResponse(result.status);
235
385
  res.end();
@@ -225,10 +225,35 @@ async function invokeOllama(body) {
225
225
  };
226
226
  });
227
227
 
228
+ // FIX: Deduplicate consecutive messages with same role (Ollama may reject this)
229
+ const deduplicated = [];
230
+ let lastRole = null;
231
+ for (const msg of convertedMessages) {
232
+ if (msg.role === lastRole) {
233
+ logger.debug({
234
+ skippedRole: msg.role,
235
+ contentPreview: msg.content.substring(0, 50)
236
+ }, 'Ollama: Skipping duplicate consecutive message with same role');
237
+ continue;
238
+ }
239
+ deduplicated.push(msg);
240
+ lastRole = msg.role;
241
+ }
242
+
243
+ if (deduplicated.length !== convertedMessages.length) {
244
+ logger.info({
245
+ originalCount: convertedMessages.length,
246
+ deduplicatedCount: deduplicated.length,
247
+ removed: convertedMessages.length - deduplicated.length,
248
+ messageRoles: convertedMessages.map(m => m.role).join(' → '),
249
+ deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
250
+ }, 'Ollama: Removed consecutive duplicate roles from message sequence');
251
+ }
252
+
228
253
  const ollamaBody = {
229
254
  model: config.ollama.model,
230
- messages: convertedMessages,
231
- stream: body.stream ?? false,
255
+ messages: deduplicated,
256
+ stream: false, // Force non-streaming for Ollama - streaming format conversion not yet implemented
232
257
  options: {
233
258
  temperature: body.temperature ?? 0.7,
234
259
  num_predict: body.max_tokens ?? 4096,
@@ -240,7 +265,8 @@ async function invokeOllama(body) {
240
265
  let toolsToSend = body.tools;
241
266
  let toolsInjected = false;
242
267
 
243
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
268
+ const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
269
+ if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
244
270
  toolsToSend = STANDARD_TOOLS;
245
271
  toolsInjected = true;
246
272
  logger.info({
@@ -248,6 +274,8 @@ async function invokeOllama(body) {
248
274
  injectedToolNames: STANDARD_TOOLS.map(t => t.name),
249
275
  reason: "Client did not send tools (passthrough mode)"
250
276
  }, "=== INJECTING STANDARD TOOLS (Ollama) ===");
277
+ } else if (!injectToolsOllama) {
278
+ logger.info({}, "Tool injection disabled for Ollama (INJECT_TOOLS_OLLAMA=false)");
251
279
  }
252
280
 
253
281
  // Add tools if present (for tool-capable models)
@@ -351,10 +379,17 @@ async function invokeAzureOpenAI(body) {
351
379
  const format = detectAzureFormat(endpoint);
352
380
 
353
381
  const headers = {
354
- "api-key": config.azureOpenAI.apiKey, // Azure uses "api-key" not "Authorization"
355
382
  "Content-Type": "application/json"
356
383
  };
357
384
 
385
+ // Azure AI Foundry (services.ai.azure.com) uses Bearer auth
386
+ // Standard Azure OpenAI (openai.azure.com) uses api-key header
387
+ if (endpoint.includes("services.ai.azure.com")) {
388
+ headers["Authorization"] = `Bearer ${config.azureOpenAI.apiKey}`;
389
+ } else {
390
+ headers["api-key"] = config.azureOpenAI.apiKey;
391
+ }
392
+
358
393
  // Convert messages and handle system message
359
394
  const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
360
395
 
@@ -371,7 +406,7 @@ async function invokeAzureOpenAI(body) {
371
406
  temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
372
407
  max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
373
408
  top_p: body.top_p ?? 1.0,
374
- stream: body.stream ?? false,
409
+ stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
375
410
  model: config.azureOpenAI.deployment
376
411
  };
377
412
 
@@ -536,8 +571,35 @@ async function invokeLlamaCpp(body) {
536
571
  messages.unshift({ role: "system", content: body.system });
537
572
  }
538
573
 
574
+ // FIX: Deduplicate consecutive messages with same role (llama.cpp rejects this)
575
+ const deduplicated = [];
576
+ let lastRole = null;
577
+ for (const msg of messages) {
578
+ if (msg.role === lastRole) {
579
+ logger.debug({
580
+ skippedRole: msg.role,
581
+ contentPreview: typeof msg.content === 'string'
582
+ ? msg.content.substring(0, 50)
583
+ : JSON.stringify(msg.content).substring(0, 50)
584
+ }, 'llama.cpp: Skipping duplicate consecutive message with same role');
585
+ continue;
586
+ }
587
+ deduplicated.push(msg);
588
+ lastRole = msg.role;
589
+ }
590
+
591
+ if (deduplicated.length !== messages.length) {
592
+ logger.info({
593
+ originalCount: messages.length,
594
+ deduplicatedCount: deduplicated.length,
595
+ removed: messages.length - deduplicated.length,
596
+ messageRoles: messages.map(m => m.role).join(' → '),
597
+ deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
598
+ }, 'llama.cpp: Removed consecutive duplicate roles from message sequence');
599
+ }
600
+
539
601
  const llamacppBody = {
540
- messages,
602
+ messages: deduplicated,
541
603
  temperature: body.temperature ?? 0.7,
542
604
  max_tokens: body.max_tokens ?? 4096,
543
605
  top_p: body.top_p ?? 1.0,
@@ -548,7 +610,8 @@ async function invokeLlamaCpp(body) {
548
610
  let toolsToSend = body.tools;
549
611
  let toolsInjected = false;
550
612
 
551
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
613
+ const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
614
+ if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
552
615
  toolsToSend = STANDARD_TOOLS;
553
616
  toolsInjected = true;
554
617
  logger.info({
@@ -556,6 +619,8 @@ async function invokeLlamaCpp(body) {
556
619
  injectedToolNames: STANDARD_TOOLS.map(t => t.name),
557
620
  reason: "Client did not send tools (passthrough mode)"
558
621
  }, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
622
+ } else if (!injectToolsLlamacpp) {
623
+ logger.info({}, "Tool injection disabled for llama.cpp (INJECT_TOOLS_LLAMACPP=false)");
559
624
  }
560
625
 
561
626
  if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
@@ -574,6 +639,16 @@ async function invokeLlamaCpp(body) {
574
639
  toolCount: llamacppBody.tools?.length || 0,
575
640
  temperature: llamacppBody.temperature,
576
641
  max_tokens: llamacppBody.max_tokens,
642
+ messageCount: llamacppBody.messages?.length || 0,
643
+ messageRoles: llamacppBody.messages?.map(m => m.role).join(' → '),
644
+ messages: llamacppBody.messages?.map((m, i) => ({
645
+ index: i,
646
+ role: m.role,
647
+ hasContent: !!m.content,
648
+ contentPreview: typeof m.content === 'string' ? m.content.substring(0, 100) : JSON.stringify(m.content).substring(0, 100),
649
+ hasToolCalls: !!m.tool_calls,
650
+ toolCallCount: m.tool_calls?.length || 0,
651
+ }))
577
652
  }, "=== LLAMA.CPP REQUEST ===");
578
653
 
579
654
  return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp");
@@ -124,15 +124,17 @@ function convertOpenAIToAnthropic(openaiRequest) {
124
124
  // Convert tools format (OpenAI → Anthropic)
125
125
  let anthropicTools = null;
126
126
  if (tools && tools.length > 0) {
127
- anthropicTools = tools.map(tool => ({
128
- name: tool.function.name,
129
- description: tool.function.description || "",
130
- input_schema: tool.function.parameters || {
131
- type: "object",
132
- properties: {},
133
- required: []
134
- }
135
- }));
127
+ anthropicTools = tools
128
+ .filter(tool => tool && tool.function && tool.function.name) // Filter out invalid tools
129
+ .map(tool => ({
130
+ name: tool.function.name,
131
+ description: tool.function.description || "",
132
+ input_schema: tool.function.parameters || {
133
+ type: "object",
134
+ properties: {},
135
+ required: []
136
+ }
137
+ }));
136
138
  }
137
139
 
138
140
  // Build Anthropic request
@@ -264,9 +264,19 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
264
264
  trimmed.includes('"arguments"'));
265
265
  };
266
266
 
267
+ // Handle reasoning_content from thinking models (e.g., Kimi, o1)
268
+ let textContent = message.content || "";
269
+ if (!textContent.trim() && message.reasoning_content) {
270
+ logger.info({
271
+ hasReasoningContent: true,
272
+ reasoningLength: message.reasoning_content.length
273
+ }, "Using reasoning_content as primary content (thinking model detected)");
274
+ textContent = message.reasoning_content;
275
+ }
276
+
267
277
  // Add text content if present, but skip if it's a duplicate/malformed tool call JSON
268
- if (message.content && message.content.trim()) {
269
- const looksLikeToolJson = isToolCallJson(message.content);
278
+ if (textContent && textContent.trim()) {
279
+ const looksLikeToolJson = isToolCallJson(textContent);
270
280
 
271
281
  // Skip content in two cases:
272
282
  // 1. We have proper tool_calls AND content duplicates them (original fix)
@@ -276,14 +286,14 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
276
286
  if (hasToolCalls) {
277
287
  // Case 1: Duplicate - model provided both content and tool_calls
278
288
  logger.debug({
279
- contentPreview: message.content.substring(0, 100),
289
+ contentPreview: textContent.substring(0, 100),
280
290
  toolCallCount: message.tool_calls.length
281
291
  }, "Skipping text content that duplicates tool_calls (llama.cpp quirk)");
282
292
  } else {
283
293
  // Case 2: Malformed - model only provided JSON in content, not structured tool_calls
284
294
  // This is a model error - it should have used tool_calls, not raw JSON
285
295
  logger.warn({
286
- contentPreview: message.content.substring(0, 200)
296
+ contentPreview: textContent.substring(0, 200)
287
297
  }, "Model output tool call as JSON text instead of structured tool_calls - filtering out malformed output");
288
298
  }
289
299
  // Skip this content block in both cases
@@ -291,7 +301,7 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
291
301
  // Normal text content - include it
292
302
  contentBlocks.push({
293
303
  type: "text",
294
- text: message.content
304
+ text: textContent
295
305
  });
296
306
  }
297
307
  }