lynkr 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/src/api/openai-router.js +187 -0
- package/src/api/router.js +172 -22
- package/src/clients/databricks.js +82 -7
- package/src/clients/openai-format.js +11 -9
- package/src/clients/openrouter-utils.js +15 -5
- package/src/clients/responses-format.js +214 -0
- package/src/clients/standard-tools.js +4 -4
- package/src/orchestrator/index.js +32 -0
- package/README.md.backup +0 -2996
package/README.md
CHANGED
|
@@ -116,9 +116,9 @@ Lynkr reduces AI costs by **60-80%** through intelligent token optimization:
|
|
|
116
116
|
|
|
117
117
|
### IDE Integration
|
|
118
118
|
- ✅ **Claude Code CLI** - Drop-in replacement for Anthropic backend
|
|
119
|
-
- ✅ **Cursor IDE** - Full OpenAI API compatibility
|
|
119
|
+
- ✅ **Cursor IDE** - Full OpenAI API compatibility (Requires Cursor Pro)
|
|
120
120
|
- ✅ **Continue.dev** - Works with any OpenAI-compatible client
|
|
121
|
-
- ✅ **
|
|
121
|
+
- ✅ **Cline +VSCode** - Confgiure it similar to cursor in openai compatible section
|
|
122
122
|
|
|
123
123
|
### Advanced Capabilities
|
|
124
124
|
- 🧠 **Long-Term Memory** - Titans-inspired memory system with surprise-based filtering
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lynkr",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.2.0",
|
|
4
4
|
"description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
package/src/api/openai-router.js
CHANGED
|
@@ -900,6 +900,193 @@ router.post("/embeddings", async (req, res) => {
|
|
|
900
900
|
}
|
|
901
901
|
});
|
|
902
902
|
|
|
903
|
+
/**
|
|
904
|
+
* POST /v1/responses
|
|
905
|
+
*
|
|
906
|
+
* OpenAI Responses API endpoint (used by GPT-5-Codex and newer models).
|
|
907
|
+
* Converts Responses API format to Chat Completions → processes → converts back.
|
|
908
|
+
*/
|
|
909
|
+
router.post("/responses", async (req, res) => {
|
|
910
|
+
const startTime = Date.now();
|
|
911
|
+
const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "responses-session";
|
|
912
|
+
|
|
913
|
+
try {
|
|
914
|
+
const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
|
|
915
|
+
|
|
916
|
+
// Comprehensive debug logging
|
|
917
|
+
logger.info({
|
|
918
|
+
endpoint: "/v1/responses",
|
|
919
|
+
inputType: typeof req.body.input,
|
|
920
|
+
inputIsArray: Array.isArray(req.body.input),
|
|
921
|
+
inputLength: Array.isArray(req.body.input) ? req.body.input.length : req.body.input?.length,
|
|
922
|
+
inputPreview: typeof req.body.input === 'string'
|
|
923
|
+
? req.body.input.substring(0, 100)
|
|
924
|
+
: Array.isArray(req.body.input)
|
|
925
|
+
? req.body.input.map(m => ({role: m?.role, hasContent: !!m?.content, hasTool: !!m?.tool_calls}))
|
|
926
|
+
: 'unknown',
|
|
927
|
+
model: req.body.model,
|
|
928
|
+
hasTools: !!req.body.tools,
|
|
929
|
+
stream: req.body.stream || false,
|
|
930
|
+
fullRequestBodyKeys: Object.keys(req.body)
|
|
931
|
+
}, "=== RESPONSES API REQUEST ===");
|
|
932
|
+
|
|
933
|
+
// Convert Responses API to Chat Completions format
|
|
934
|
+
const chatRequest = convertResponsesToChat(req.body);
|
|
935
|
+
|
|
936
|
+
logger.info({
|
|
937
|
+
chatRequestMessageCount: chatRequest.messages?.length,
|
|
938
|
+
chatRequestMessages: chatRequest.messages?.map(m => ({
|
|
939
|
+
role: m.role,
|
|
940
|
+
hasContent: !!m.content,
|
|
941
|
+
contentPreview: typeof m.content === 'string' ? m.content.substring(0, 50) : m.content
|
|
942
|
+
}))
|
|
943
|
+
}, "After Responses→Chat conversion");
|
|
944
|
+
|
|
945
|
+
// Convert to Anthropic format
|
|
946
|
+
const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
|
|
947
|
+
|
|
948
|
+
logger.info({
|
|
949
|
+
anthropicMessageCount: anthropicRequest.messages?.length,
|
|
950
|
+
anthropicMessages: anthropicRequest.messages?.map(m => ({
|
|
951
|
+
role: m.role,
|
|
952
|
+
hasContent: !!m.content
|
|
953
|
+
}))
|
|
954
|
+
}, "After Chat→Anthropic conversion");
|
|
955
|
+
|
|
956
|
+
// Get session
|
|
957
|
+
const session = getSession(sessionId);
|
|
958
|
+
|
|
959
|
+
// Handle streaming vs non-streaming
|
|
960
|
+
if (req.body.stream) {
|
|
961
|
+
// Set up SSE headers for streaming
|
|
962
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
963
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
964
|
+
res.setHeader("Connection", "keep-alive");
|
|
965
|
+
|
|
966
|
+
try {
|
|
967
|
+
// Force non-streaming from orchestrator
|
|
968
|
+
anthropicRequest.stream = false;
|
|
969
|
+
|
|
970
|
+
const result = await orchestrator.processMessage({
|
|
971
|
+
payload: anthropicRequest,
|
|
972
|
+
headers: req.headers,
|
|
973
|
+
session: session,
|
|
974
|
+
options: {
|
|
975
|
+
maxSteps: req.body?.max_steps
|
|
976
|
+
}
|
|
977
|
+
});
|
|
978
|
+
|
|
979
|
+
// Convert back: Anthropic → OpenAI → Responses
|
|
980
|
+
const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
|
|
981
|
+
const responsesResponse = convertChatToResponses(chatResponse);
|
|
982
|
+
|
|
983
|
+
// Simulate streaming using OpenAI Responses API SSE format
|
|
984
|
+
const content = responsesResponse.content || "";
|
|
985
|
+
const words = content.split(" ");
|
|
986
|
+
|
|
987
|
+
// Send response.created event
|
|
988
|
+
const createdEvent = {
|
|
989
|
+
id: responsesResponse.id,
|
|
990
|
+
object: "response.created",
|
|
991
|
+
created: responsesResponse.created,
|
|
992
|
+
model: req.body.model
|
|
993
|
+
};
|
|
994
|
+
res.write(`event: response.created\n`);
|
|
995
|
+
res.write(`data: ${JSON.stringify(createdEvent)}\n\n`);
|
|
996
|
+
|
|
997
|
+
// Send content in word chunks using response.output_text.delta
|
|
998
|
+
for (let i = 0; i < words.length; i++) {
|
|
999
|
+
const word = words[i] + (i < words.length - 1 ? " " : "");
|
|
1000
|
+
const deltaEvent = {
|
|
1001
|
+
id: responsesResponse.id,
|
|
1002
|
+
object: "response.output_text.delta",
|
|
1003
|
+
delta: word,
|
|
1004
|
+
created: responsesResponse.created
|
|
1005
|
+
};
|
|
1006
|
+
res.write(`event: response.output_text.delta\n`);
|
|
1007
|
+
res.write(`data: ${JSON.stringify(deltaEvent)}\n\n`);
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
// Send response.completed event
|
|
1011
|
+
const completedEvent = {
|
|
1012
|
+
id: responsesResponse.id,
|
|
1013
|
+
object: "response.completed",
|
|
1014
|
+
created: responsesResponse.created,
|
|
1015
|
+
model: req.body.model,
|
|
1016
|
+
content: content,
|
|
1017
|
+
stop_reason: responsesResponse.stop_reason,
|
|
1018
|
+
usage: responsesResponse.usage
|
|
1019
|
+
};
|
|
1020
|
+
res.write(`event: response.completed\n`);
|
|
1021
|
+
res.write(`data: ${JSON.stringify(completedEvent)}\n\n`);
|
|
1022
|
+
|
|
1023
|
+
// Optional: Send [DONE] marker
|
|
1024
|
+
res.write("data: [DONE]\n\n");
|
|
1025
|
+
res.end();
|
|
1026
|
+
|
|
1027
|
+
logger.info({
|
|
1028
|
+
duration: Date.now() - startTime,
|
|
1029
|
+
mode: "streaming",
|
|
1030
|
+
contentLength: content.length
|
|
1031
|
+
}, "=== RESPONSES API STREAMING COMPLETE ===");
|
|
1032
|
+
|
|
1033
|
+
} catch (streamError) {
|
|
1034
|
+
logger.error({ error: streamError.message, stack: streamError.stack }, "Responses API streaming error");
|
|
1035
|
+
|
|
1036
|
+
// Send error via SSE
|
|
1037
|
+
res.write(`data: ${JSON.stringify({
|
|
1038
|
+
error: {
|
|
1039
|
+
message: streamError.message || "Internal server error",
|
|
1040
|
+
type: "server_error",
|
|
1041
|
+
code: "internal_error"
|
|
1042
|
+
}
|
|
1043
|
+
})}\n\n`);
|
|
1044
|
+
res.end();
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
} else {
|
|
1048
|
+
// Non-streaming response
|
|
1049
|
+
anthropicRequest.stream = false;
|
|
1050
|
+
|
|
1051
|
+
const result = await orchestrator.processMessage({
|
|
1052
|
+
payload: anthropicRequest,
|
|
1053
|
+
headers: req.headers,
|
|
1054
|
+
session: session,
|
|
1055
|
+
options: {
|
|
1056
|
+
maxSteps: req.body?.max_steps
|
|
1057
|
+
}
|
|
1058
|
+
});
|
|
1059
|
+
|
|
1060
|
+
// Convert back: Anthropic → OpenAI → Responses
|
|
1061
|
+
const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
|
|
1062
|
+
const responsesResponse = convertChatToResponses(chatResponse);
|
|
1063
|
+
|
|
1064
|
+
logger.info({
|
|
1065
|
+
duration: Date.now() - startTime,
|
|
1066
|
+
contentLength: responsesResponse.content?.length || 0,
|
|
1067
|
+
stopReason: responsesResponse.stop_reason
|
|
1068
|
+
}, "=== RESPONSES API RESPONSE ===");
|
|
1069
|
+
|
|
1070
|
+
res.json(responsesResponse);
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
} catch (error) {
|
|
1074
|
+
logger.error({
|
|
1075
|
+
error: error.message,
|
|
1076
|
+
stack: error.stack,
|
|
1077
|
+
duration: Date.now() - startTime
|
|
1078
|
+
}, "Responses API error");
|
|
1079
|
+
|
|
1080
|
+
res.status(500).json({
|
|
1081
|
+
error: {
|
|
1082
|
+
message: error.message || "Internal server error",
|
|
1083
|
+
type: "server_error",
|
|
1084
|
+
code: "internal_error"
|
|
1085
|
+
}
|
|
1086
|
+
});
|
|
1087
|
+
}
|
|
1088
|
+
});
|
|
1089
|
+
|
|
903
1090
|
/**
|
|
904
1091
|
* GET /v1/health
|
|
905
1092
|
*
|
package/src/api/router.js
CHANGED
|
@@ -180,17 +180,93 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
182
|
|
|
183
|
-
// Fallback: if no stream, wrap buffered response in SSE
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
183
|
+
// Fallback: if no stream, wrap buffered response in proper Anthropic SSE format
|
|
184
|
+
// Check if result.body exists
|
|
185
|
+
if (!result || !result.body) {
|
|
186
|
+
res.write(`event: error\n`);
|
|
187
|
+
res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
|
|
188
|
+
res.end();
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const msg = result.body;
|
|
193
|
+
|
|
194
|
+
// 1. message_start
|
|
195
|
+
res.write(`event: message_start\n`);
|
|
196
|
+
res.write(`data: ${JSON.stringify({
|
|
197
|
+
type: "message_start",
|
|
198
|
+
message: {
|
|
199
|
+
id: msg.id,
|
|
200
|
+
type: "message",
|
|
201
|
+
role: "assistant",
|
|
202
|
+
content: [],
|
|
203
|
+
model: msg.model,
|
|
204
|
+
stop_reason: null,
|
|
205
|
+
stop_sequence: null,
|
|
206
|
+
usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
|
|
207
|
+
}
|
|
208
|
+
})}\n\n`);
|
|
209
|
+
|
|
210
|
+
// 2. content_block_start and content_block_delta for each content block
|
|
211
|
+
const contentBlocks = msg.content || [];
|
|
212
|
+
for (let i = 0; i < contentBlocks.length; i++) {
|
|
213
|
+
const block = contentBlocks[i];
|
|
214
|
+
|
|
215
|
+
if (block.type === "text") {
|
|
216
|
+
res.write(`event: content_block_start\n`);
|
|
217
|
+
res.write(`data: ${JSON.stringify({
|
|
218
|
+
type: "content_block_start",
|
|
219
|
+
index: i,
|
|
220
|
+
content_block: { type: "text", text: "" }
|
|
221
|
+
})}\n\n`);
|
|
222
|
+
|
|
223
|
+
// Send text in chunks
|
|
224
|
+
const text = block.text || "";
|
|
225
|
+
const chunkSize = 20;
|
|
226
|
+
for (let j = 0; j < text.length; j += chunkSize) {
|
|
227
|
+
const chunk = text.slice(j, j + chunkSize);
|
|
228
|
+
res.write(`event: content_block_delta\n`);
|
|
229
|
+
res.write(`data: ${JSON.stringify({
|
|
230
|
+
type: "content_block_delta",
|
|
231
|
+
index: i,
|
|
232
|
+
delta: { type: "text_delta", text: chunk }
|
|
233
|
+
})}\n\n`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
res.write(`event: content_block_stop\n`);
|
|
237
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
238
|
+
} else if (block.type === "tool_use") {
|
|
239
|
+
res.write(`event: content_block_start\n`);
|
|
240
|
+
res.write(`data: ${JSON.stringify({
|
|
241
|
+
type: "content_block_start",
|
|
242
|
+
index: i,
|
|
243
|
+
content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
|
|
244
|
+
})}\n\n`);
|
|
245
|
+
|
|
246
|
+
res.write(`event: content_block_delta\n`);
|
|
247
|
+
res.write(`data: ${JSON.stringify({
|
|
248
|
+
type: "content_block_delta",
|
|
249
|
+
index: i,
|
|
250
|
+
delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
|
|
251
|
+
})}\n\n`);
|
|
252
|
+
|
|
253
|
+
res.write(`event: content_block_stop\n`);
|
|
254
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// 3. message_delta with stop_reason
|
|
259
|
+
res.write(`event: message_delta\n`);
|
|
260
|
+
res.write(`data: ${JSON.stringify({
|
|
261
|
+
type: "message_delta",
|
|
262
|
+
delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
|
|
263
|
+
usage: { output_tokens: msg.usage?.output_tokens || 0 }
|
|
264
|
+
})}\n\n`);
|
|
265
|
+
|
|
266
|
+
// 4. message_stop
|
|
267
|
+
res.write(`event: message_stop\n`);
|
|
268
|
+
res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
|
|
269
|
+
|
|
194
270
|
metrics.recordResponse(result.status);
|
|
195
271
|
res.end();
|
|
196
272
|
return;
|
|
@@ -219,17 +295,91 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
219
295
|
res.flushHeaders();
|
|
220
296
|
}
|
|
221
297
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
298
|
+
// Check if result.body exists
|
|
299
|
+
if (!result || !result.body) {
|
|
300
|
+
res.write(`event: error\n`);
|
|
301
|
+
res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
|
|
302
|
+
res.end();
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Use proper Anthropic SSE format
|
|
307
|
+
const msg = result.body;
|
|
308
|
+
|
|
309
|
+
// 1. message_start
|
|
310
|
+
res.write(`event: message_start\n`);
|
|
311
|
+
res.write(`data: ${JSON.stringify({
|
|
312
|
+
type: "message_start",
|
|
313
|
+
message: {
|
|
314
|
+
id: msg.id,
|
|
315
|
+
type: "message",
|
|
316
|
+
role: "assistant",
|
|
317
|
+
content: [],
|
|
318
|
+
model: msg.model,
|
|
319
|
+
stop_reason: null,
|
|
320
|
+
stop_sequence: null,
|
|
321
|
+
usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
|
|
322
|
+
}
|
|
323
|
+
})}\n\n`);
|
|
324
|
+
|
|
325
|
+
// 2. content_block_start and content_block_delta for each content block
|
|
326
|
+
const contentBlocks = msg.content || [];
|
|
327
|
+
for (let i = 0; i < contentBlocks.length; i++) {
|
|
328
|
+
const block = contentBlocks[i];
|
|
329
|
+
|
|
330
|
+
if (block.type === "text") {
|
|
331
|
+
res.write(`event: content_block_start\n`);
|
|
332
|
+
res.write(`data: ${JSON.stringify({
|
|
333
|
+
type: "content_block_start",
|
|
334
|
+
index: i,
|
|
335
|
+
content_block: { type: "text", text: "" }
|
|
336
|
+
})}\n\n`);
|
|
337
|
+
|
|
338
|
+
const text = block.text || "";
|
|
339
|
+
const chunkSize = 20;
|
|
340
|
+
for (let j = 0; j < text.length; j += chunkSize) {
|
|
341
|
+
const chunk = text.slice(j, j + chunkSize);
|
|
342
|
+
res.write(`event: content_block_delta\n`);
|
|
343
|
+
res.write(`data: ${JSON.stringify({
|
|
344
|
+
type: "content_block_delta",
|
|
345
|
+
index: i,
|
|
346
|
+
delta: { type: "text_delta", text: chunk }
|
|
347
|
+
})}\n\n`);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
res.write(`event: content_block_stop\n`);
|
|
351
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
352
|
+
} else if (block.type === "tool_use") {
|
|
353
|
+
res.write(`event: content_block_start\n`);
|
|
354
|
+
res.write(`data: ${JSON.stringify({
|
|
355
|
+
type: "content_block_start",
|
|
356
|
+
index: i,
|
|
357
|
+
content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
|
|
358
|
+
})}\n\n`);
|
|
359
|
+
|
|
360
|
+
res.write(`event: content_block_delta\n`);
|
|
361
|
+
res.write(`data: ${JSON.stringify({
|
|
362
|
+
type: "content_block_delta",
|
|
363
|
+
index: i,
|
|
364
|
+
delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
|
|
365
|
+
})}\n\n`);
|
|
366
|
+
|
|
367
|
+
res.write(`event: content_block_stop\n`);
|
|
368
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// 3. message_delta with stop_reason
|
|
373
|
+
res.write(`event: message_delta\n`);
|
|
374
|
+
res.write(`data: ${JSON.stringify({
|
|
375
|
+
type: "message_delta",
|
|
376
|
+
delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
|
|
377
|
+
usage: { output_tokens: msg.usage?.output_tokens || 0 }
|
|
378
|
+
})}\n\n`);
|
|
379
|
+
|
|
380
|
+
// 4. message_stop
|
|
381
|
+
res.write(`event: message_stop\n`);
|
|
382
|
+
res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
|
|
233
383
|
|
|
234
384
|
metrics.recordResponse(result.status);
|
|
235
385
|
res.end();
|
|
@@ -225,10 +225,35 @@ async function invokeOllama(body) {
|
|
|
225
225
|
};
|
|
226
226
|
});
|
|
227
227
|
|
|
228
|
+
// FIX: Deduplicate consecutive messages with same role (Ollama may reject this)
|
|
229
|
+
const deduplicated = [];
|
|
230
|
+
let lastRole = null;
|
|
231
|
+
for (const msg of convertedMessages) {
|
|
232
|
+
if (msg.role === lastRole) {
|
|
233
|
+
logger.debug({
|
|
234
|
+
skippedRole: msg.role,
|
|
235
|
+
contentPreview: msg.content.substring(0, 50)
|
|
236
|
+
}, 'Ollama: Skipping duplicate consecutive message with same role');
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
deduplicated.push(msg);
|
|
240
|
+
lastRole = msg.role;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (deduplicated.length !== convertedMessages.length) {
|
|
244
|
+
logger.info({
|
|
245
|
+
originalCount: convertedMessages.length,
|
|
246
|
+
deduplicatedCount: deduplicated.length,
|
|
247
|
+
removed: convertedMessages.length - deduplicated.length,
|
|
248
|
+
messageRoles: convertedMessages.map(m => m.role).join(' → '),
|
|
249
|
+
deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
|
|
250
|
+
}, 'Ollama: Removed consecutive duplicate roles from message sequence');
|
|
251
|
+
}
|
|
252
|
+
|
|
228
253
|
const ollamaBody = {
|
|
229
254
|
model: config.ollama.model,
|
|
230
|
-
messages:
|
|
231
|
-
stream:
|
|
255
|
+
messages: deduplicated,
|
|
256
|
+
stream: false, // Force non-streaming for Ollama - streaming format conversion not yet implemented
|
|
232
257
|
options: {
|
|
233
258
|
temperature: body.temperature ?? 0.7,
|
|
234
259
|
num_predict: body.max_tokens ?? 4096,
|
|
@@ -240,7 +265,8 @@ async function invokeOllama(body) {
|
|
|
240
265
|
let toolsToSend = body.tools;
|
|
241
266
|
let toolsInjected = false;
|
|
242
267
|
|
|
243
|
-
|
|
268
|
+
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
|
|
269
|
+
if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
|
|
244
270
|
toolsToSend = STANDARD_TOOLS;
|
|
245
271
|
toolsInjected = true;
|
|
246
272
|
logger.info({
|
|
@@ -248,6 +274,8 @@ async function invokeOllama(body) {
|
|
|
248
274
|
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
249
275
|
reason: "Client did not send tools (passthrough mode)"
|
|
250
276
|
}, "=== INJECTING STANDARD TOOLS (Ollama) ===");
|
|
277
|
+
} else if (!injectToolsOllama) {
|
|
278
|
+
logger.info({}, "Tool injection disabled for Ollama (INJECT_TOOLS_OLLAMA=false)");
|
|
251
279
|
}
|
|
252
280
|
|
|
253
281
|
// Add tools if present (for tool-capable models)
|
|
@@ -351,10 +379,17 @@ async function invokeAzureOpenAI(body) {
|
|
|
351
379
|
const format = detectAzureFormat(endpoint);
|
|
352
380
|
|
|
353
381
|
const headers = {
|
|
354
|
-
"api-key": config.azureOpenAI.apiKey, // Azure uses "api-key" not "Authorization"
|
|
355
382
|
"Content-Type": "application/json"
|
|
356
383
|
};
|
|
357
384
|
|
|
385
|
+
// Azure AI Foundry (services.ai.azure.com) uses Bearer auth
|
|
386
|
+
// Standard Azure OpenAI (openai.azure.com) uses api-key header
|
|
387
|
+
if (endpoint.includes("services.ai.azure.com")) {
|
|
388
|
+
headers["Authorization"] = `Bearer ${config.azureOpenAI.apiKey}`;
|
|
389
|
+
} else {
|
|
390
|
+
headers["api-key"] = config.azureOpenAI.apiKey;
|
|
391
|
+
}
|
|
392
|
+
|
|
358
393
|
// Convert messages and handle system message
|
|
359
394
|
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
360
395
|
|
|
@@ -371,7 +406,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
371
406
|
temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
|
|
372
407
|
max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
|
|
373
408
|
top_p: body.top_p ?? 1.0,
|
|
374
|
-
stream:
|
|
409
|
+
stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
|
|
375
410
|
model: config.azureOpenAI.deployment
|
|
376
411
|
};
|
|
377
412
|
|
|
@@ -536,8 +571,35 @@ async function invokeLlamaCpp(body) {
|
|
|
536
571
|
messages.unshift({ role: "system", content: body.system });
|
|
537
572
|
}
|
|
538
573
|
|
|
574
|
+
// FIX: Deduplicate consecutive messages with same role (llama.cpp rejects this)
|
|
575
|
+
const deduplicated = [];
|
|
576
|
+
let lastRole = null;
|
|
577
|
+
for (const msg of messages) {
|
|
578
|
+
if (msg.role === lastRole) {
|
|
579
|
+
logger.debug({
|
|
580
|
+
skippedRole: msg.role,
|
|
581
|
+
contentPreview: typeof msg.content === 'string'
|
|
582
|
+
? msg.content.substring(0, 50)
|
|
583
|
+
: JSON.stringify(msg.content).substring(0, 50)
|
|
584
|
+
}, 'llama.cpp: Skipping duplicate consecutive message with same role');
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
deduplicated.push(msg);
|
|
588
|
+
lastRole = msg.role;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
if (deduplicated.length !== messages.length) {
|
|
592
|
+
logger.info({
|
|
593
|
+
originalCount: messages.length,
|
|
594
|
+
deduplicatedCount: deduplicated.length,
|
|
595
|
+
removed: messages.length - deduplicated.length,
|
|
596
|
+
messageRoles: messages.map(m => m.role).join(' → '),
|
|
597
|
+
deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
|
|
598
|
+
}, 'llama.cpp: Removed consecutive duplicate roles from message sequence');
|
|
599
|
+
}
|
|
600
|
+
|
|
539
601
|
const llamacppBody = {
|
|
540
|
-
messages,
|
|
602
|
+
messages: deduplicated,
|
|
541
603
|
temperature: body.temperature ?? 0.7,
|
|
542
604
|
max_tokens: body.max_tokens ?? 4096,
|
|
543
605
|
top_p: body.top_p ?? 1.0,
|
|
@@ -548,7 +610,8 @@ async function invokeLlamaCpp(body) {
|
|
|
548
610
|
let toolsToSend = body.tools;
|
|
549
611
|
let toolsInjected = false;
|
|
550
612
|
|
|
551
|
-
|
|
613
|
+
const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
|
|
614
|
+
if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
|
|
552
615
|
toolsToSend = STANDARD_TOOLS;
|
|
553
616
|
toolsInjected = true;
|
|
554
617
|
logger.info({
|
|
@@ -556,6 +619,8 @@ async function invokeLlamaCpp(body) {
|
|
|
556
619
|
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
557
620
|
reason: "Client did not send tools (passthrough mode)"
|
|
558
621
|
}, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
|
|
622
|
+
} else if (!injectToolsLlamacpp) {
|
|
623
|
+
logger.info({}, "Tool injection disabled for llama.cpp (INJECT_TOOLS_LLAMACPP=false)");
|
|
559
624
|
}
|
|
560
625
|
|
|
561
626
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
@@ -574,6 +639,16 @@ async function invokeLlamaCpp(body) {
|
|
|
574
639
|
toolCount: llamacppBody.tools?.length || 0,
|
|
575
640
|
temperature: llamacppBody.temperature,
|
|
576
641
|
max_tokens: llamacppBody.max_tokens,
|
|
642
|
+
messageCount: llamacppBody.messages?.length || 0,
|
|
643
|
+
messageRoles: llamacppBody.messages?.map(m => m.role).join(' → '),
|
|
644
|
+
messages: llamacppBody.messages?.map((m, i) => ({
|
|
645
|
+
index: i,
|
|
646
|
+
role: m.role,
|
|
647
|
+
hasContent: !!m.content,
|
|
648
|
+
contentPreview: typeof m.content === 'string' ? m.content.substring(0, 100) : JSON.stringify(m.content).substring(0, 100),
|
|
649
|
+
hasToolCalls: !!m.tool_calls,
|
|
650
|
+
toolCallCount: m.tool_calls?.length || 0,
|
|
651
|
+
}))
|
|
577
652
|
}, "=== LLAMA.CPP REQUEST ===");
|
|
578
653
|
|
|
579
654
|
return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp");
|
|
@@ -124,15 +124,17 @@ function convertOpenAIToAnthropic(openaiRequest) {
|
|
|
124
124
|
// Convert tools format (OpenAI → Anthropic)
|
|
125
125
|
let anthropicTools = null;
|
|
126
126
|
if (tools && tools.length > 0) {
|
|
127
|
-
anthropicTools = tools
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
127
|
+
anthropicTools = tools
|
|
128
|
+
.filter(tool => tool && tool.function && tool.function.name) // Filter out invalid tools
|
|
129
|
+
.map(tool => ({
|
|
130
|
+
name: tool.function.name,
|
|
131
|
+
description: tool.function.description || "",
|
|
132
|
+
input_schema: tool.function.parameters || {
|
|
133
|
+
type: "object",
|
|
134
|
+
properties: {},
|
|
135
|
+
required: []
|
|
136
|
+
}
|
|
137
|
+
}));
|
|
136
138
|
}
|
|
137
139
|
|
|
138
140
|
// Build Anthropic request
|
|
@@ -264,9 +264,19 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
|
|
|
264
264
|
trimmed.includes('"arguments"'));
|
|
265
265
|
};
|
|
266
266
|
|
|
267
|
+
// Handle reasoning_content from thinking models (e.g., Kimi, o1)
|
|
268
|
+
let textContent = message.content || "";
|
|
269
|
+
if (!textContent.trim() && message.reasoning_content) {
|
|
270
|
+
logger.info({
|
|
271
|
+
hasReasoningContent: true,
|
|
272
|
+
reasoningLength: message.reasoning_content.length
|
|
273
|
+
}, "Using reasoning_content as primary content (thinking model detected)");
|
|
274
|
+
textContent = message.reasoning_content;
|
|
275
|
+
}
|
|
276
|
+
|
|
267
277
|
// Add text content if present, but skip if it's a duplicate/malformed tool call JSON
|
|
268
|
-
if (
|
|
269
|
-
const looksLikeToolJson = isToolCallJson(
|
|
278
|
+
if (textContent && textContent.trim()) {
|
|
279
|
+
const looksLikeToolJson = isToolCallJson(textContent);
|
|
270
280
|
|
|
271
281
|
// Skip content in two cases:
|
|
272
282
|
// 1. We have proper tool_calls AND content duplicates them (original fix)
|
|
@@ -276,14 +286,14 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
|
|
|
276
286
|
if (hasToolCalls) {
|
|
277
287
|
// Case 1: Duplicate - model provided both content and tool_calls
|
|
278
288
|
logger.debug({
|
|
279
|
-
contentPreview:
|
|
289
|
+
contentPreview: textContent.substring(0, 100),
|
|
280
290
|
toolCallCount: message.tool_calls.length
|
|
281
291
|
}, "Skipping text content that duplicates tool_calls (llama.cpp quirk)");
|
|
282
292
|
} else {
|
|
283
293
|
// Case 2: Malformed - model only provided JSON in content, not structured tool_calls
|
|
284
294
|
// This is a model error - it should have used tool_calls, not raw JSON
|
|
285
295
|
logger.warn({
|
|
286
|
-
contentPreview:
|
|
296
|
+
contentPreview: textContent.substring(0, 200)
|
|
287
297
|
}, "Model output tool call as JSON text instead of structured tool_calls - filtering out malformed output");
|
|
288
298
|
}
|
|
289
299
|
// Skip this content block in both cases
|
|
@@ -291,7 +301,7 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
|
|
|
291
301
|
// Normal text content - include it
|
|
292
302
|
contentBlocks.push({
|
|
293
303
|
type: "text",
|
|
294
|
-
text:
|
|
304
|
+
text: textContent
|
|
295
305
|
});
|
|
296
306
|
}
|
|
297
307
|
}
|