lynkr 7.2.4 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +5 -3
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +82 -5
- package/src/config/index.js +119 -35
- package/src/context/toon.js +173 -0
- package/src/headroom/launcher.js +8 -3
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +765 -212
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-registry.js +437 -0
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/src/tools/workspace.js +35 -4
- package/src/workspace/index.js +30 -0
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
|
@@ -6,11 +6,12 @@ const { getCircuitBreakerRegistry } = require("./circuit-breaker");
|
|
|
6
6
|
const { getMetricsCollector } = require("../observability/metrics");
|
|
7
7
|
const { getHealthTracker } = require("../observability/health-tracker");
|
|
8
8
|
const logger = require("../logger");
|
|
9
|
-
const { STANDARD_TOOLS } = require("./standard-tools");
|
|
9
|
+
const { STANDARD_TOOLS, STANDARD_TOOL_NAMES } = require("./standard-tools");
|
|
10
10
|
const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
|
|
11
11
|
const {
|
|
12
12
|
detectModelFamily
|
|
13
13
|
} = require("./bedrock-utils");
|
|
14
|
+
const { getGPTSystemPromptAddendum } = require("./gpt-utils");
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
|
|
@@ -183,9 +184,9 @@ async function invokeDatabricks(body) {
|
|
|
183
184
|
// Inject standard tools if client didn't send any (passthrough mode)
|
|
184
185
|
if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
|
|
185
186
|
databricksBody.tools = STANDARD_TOOLS;
|
|
186
|
-
logger.
|
|
187
|
+
logger.debug({
|
|
187
188
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
188
|
-
injectedToolNames:
|
|
189
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
189
190
|
reason: "Client did not send tools (passthrough mode)"
|
|
190
191
|
}, "=== INJECTING STANDARD TOOLS (Databricks) ===");
|
|
191
192
|
}
|
|
@@ -224,9 +225,9 @@ async function invokeAzureAnthropic(body) {
|
|
|
224
225
|
// Inject standard tools if client didn't send any (passthrough mode)
|
|
225
226
|
if (!Array.isArray(body.tools) || body.tools.length === 0) {
|
|
226
227
|
body.tools = STANDARD_TOOLS;
|
|
227
|
-
logger.
|
|
228
|
+
logger.debug({
|
|
228
229
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
229
|
-
injectedToolNames:
|
|
230
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
230
231
|
reason: "Client did not send tools (passthrough mode)"
|
|
231
232
|
}, "=== INJECTING STANDARD TOOLS (Azure Anthropic) ===");
|
|
232
233
|
}
|
|
@@ -248,42 +249,115 @@ async function invokeOllama(body) {
|
|
|
248
249
|
throw new Error("Ollama endpoint is not configured.");
|
|
249
250
|
}
|
|
250
251
|
|
|
251
|
-
const {
|
|
252
|
+
const { checkOllamaToolSupport, hasAnthropicEndpoint, convertAnthropicToolsToOllama } = require("./ollama-utils");
|
|
253
|
+
|
|
254
|
+
const modelName = body._suggestionModeModel || body._tierModel || config.ollama.model;
|
|
255
|
+
|
|
256
|
+
// Detect whether Ollama has the native Anthropic Messages API (v0.14.0+)
|
|
257
|
+
const useAnthropicApi = await hasAnthropicEndpoint(config.ollama.endpoint);
|
|
258
|
+
|
|
259
|
+
// Check if model supports tools FIRST (before wasteful injection)
|
|
260
|
+
const supportsTools = await checkOllamaToolSupport(config.ollama.model);
|
|
261
|
+
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
|
|
262
|
+
|
|
263
|
+
// Determine tools to send
|
|
264
|
+
let toolsToSend = body.tools;
|
|
265
|
+
let toolsInjected = false;
|
|
266
|
+
|
|
267
|
+
if (!supportsTools) {
|
|
268
|
+
toolsToSend = null;
|
|
269
|
+
} else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
|
|
270
|
+
toolsToSend = STANDARD_TOOLS;
|
|
271
|
+
toolsInjected = true;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Consolidated tool injection log
|
|
275
|
+
const toolCount = (supportsTools && Array.isArray(toolsToSend)) ? toolsToSend.length : 0;
|
|
276
|
+
let logMessage;
|
|
277
|
+
if (!supportsTools) {
|
|
278
|
+
logMessage = `Tools not supported (0 tools)`;
|
|
279
|
+
} else if (toolsInjected) {
|
|
280
|
+
logMessage = `injected ${toolCount} tools`;
|
|
281
|
+
} else if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
282
|
+
logMessage = `Using client-provided tools (${toolCount} tools)`;
|
|
283
|
+
} else if (!injectToolsOllama) {
|
|
284
|
+
logMessage = `Tool injection disabled (0 tools)`;
|
|
285
|
+
} else {
|
|
286
|
+
logMessage = `No tools (0 tools)`;
|
|
287
|
+
}
|
|
252
288
|
|
|
289
|
+
logger.debug({
|
|
290
|
+
model: modelName,
|
|
291
|
+
apiMode: useAnthropicApi ? "anthropic" : "legacy",
|
|
292
|
+
toolCount,
|
|
293
|
+
toolsInjected,
|
|
294
|
+
supportsTools,
|
|
295
|
+
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
|
|
296
|
+
}, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
|
|
297
|
+
|
|
298
|
+
// ---- Anthropic-native path (Ollama v0.14.0+) ----
|
|
299
|
+
if (useAnthropicApi) {
|
|
300
|
+
const endpoint = `${config.ollama.endpoint}/v1/messages`;
|
|
301
|
+
const headers = {
|
|
302
|
+
"Content-Type": "application/json",
|
|
303
|
+
"anthropic-version": "2023-06-01",
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
// Build body with only valid Anthropic Messages API fields
|
|
307
|
+
const ollamaBody = {
|
|
308
|
+
model: modelName,
|
|
309
|
+
messages: body.messages,
|
|
310
|
+
max_tokens: body.max_tokens || 4096,
|
|
311
|
+
stream: false,
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
if (body.system) ollamaBody.system = body.system;
|
|
315
|
+
if (body.temperature !== undefined) ollamaBody.temperature = body.temperature;
|
|
316
|
+
if (body.top_p !== undefined) ollamaBody.top_p = body.top_p;
|
|
317
|
+
if (body.top_k !== undefined) ollamaBody.top_k = body.top_k;
|
|
318
|
+
if (body.stop_sequences) ollamaBody.stop_sequences = body.stop_sequences;
|
|
319
|
+
if (body.tool_choice) ollamaBody.tool_choice = body.tool_choice;
|
|
320
|
+
if (body.metadata) ollamaBody.metadata = body.metadata;
|
|
321
|
+
|
|
322
|
+
// Tools (already Anthropic format — no conversion needed)
|
|
323
|
+
if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
324
|
+
ollamaBody.tools = toolsToSend;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (config.ollama.keepAlive !== undefined) {
|
|
328
|
+
const keepAlive = config.ollama.keepAlive;
|
|
329
|
+
ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive)
|
|
330
|
+
? parseInt(keepAlive, 10)
|
|
331
|
+
: keepAlive;
|
|
332
|
+
logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// ---- Legacy path (Ollama < v0.14.0, /api/chat with OpenAI format) ----
|
|
253
339
|
const endpoint = `${config.ollama.endpoint}/api/chat`;
|
|
254
340
|
const headers = { "Content-Type": "application/json" };
|
|
255
341
|
|
|
256
|
-
// Convert Anthropic messages
|
|
257
|
-
// Ollama expects content as string, not content blocks array
|
|
342
|
+
// Convert Anthropic messages to Ollama format (content blocks → strings)
|
|
258
343
|
const convertedMessages = [];
|
|
259
344
|
|
|
260
|
-
// Handle system prompt (same pattern as other providers)
|
|
261
345
|
if (body.system && typeof body.system === "string" && body.system.trim().length > 0) {
|
|
262
|
-
convertedMessages.push({
|
|
263
|
-
role: "system",
|
|
264
|
-
content: body.system.trim()
|
|
265
|
-
});
|
|
346
|
+
convertedMessages.push({ role: "system", content: body.system.trim() });
|
|
266
347
|
}
|
|
267
348
|
|
|
268
|
-
// Add user/assistant messages
|
|
269
349
|
(body.messages || []).forEach(msg => {
|
|
270
350
|
let content = msg.content;
|
|
271
|
-
|
|
272
|
-
// Convert content blocks array to simple string
|
|
273
351
|
if (Array.isArray(content)) {
|
|
274
352
|
content = content
|
|
275
353
|
.filter(block => block.type === 'text')
|
|
276
354
|
.map(block => block.text || '')
|
|
277
355
|
.join('\n');
|
|
278
356
|
}
|
|
279
|
-
|
|
280
|
-
convertedMessages.push({
|
|
281
|
-
role: msg.role,
|
|
282
|
-
content: content || ''
|
|
283
|
-
});
|
|
357
|
+
convertedMessages.push({ role: msg.role, content: content || '' });
|
|
284
358
|
});
|
|
285
359
|
|
|
286
|
-
//
|
|
360
|
+
// Deduplicate consecutive messages with same role
|
|
287
361
|
const deduplicated = [];
|
|
288
362
|
let lastRole = null;
|
|
289
363
|
for (const msg of convertedMessages) {
|
|
@@ -298,85 +372,30 @@ async function invokeOllama(body) {
|
|
|
298
372
|
lastRole = msg.role;
|
|
299
373
|
}
|
|
300
374
|
|
|
301
|
-
if (deduplicated.length !== convertedMessages.length) {
|
|
302
|
-
logger.info({
|
|
303
|
-
originalCount: convertedMessages.length,
|
|
304
|
-
deduplicatedCount: deduplicated.length,
|
|
305
|
-
removed: convertedMessages.length - deduplicated.length,
|
|
306
|
-
messageRoles: convertedMessages.map(m => m.role).join(' → '),
|
|
307
|
-
deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
|
|
308
|
-
}, 'Ollama: Removed consecutive duplicate roles from message sequence');
|
|
309
|
-
}
|
|
310
|
-
|
|
311
375
|
const ollamaBody = {
|
|
312
|
-
model:
|
|
376
|
+
model: modelName,
|
|
313
377
|
messages: deduplicated,
|
|
314
|
-
stream: false,
|
|
378
|
+
stream: false,
|
|
315
379
|
options: {
|
|
316
380
|
temperature: body.temperature ?? 0.7,
|
|
317
|
-
num_predict: body.max_tokens ??
|
|
381
|
+
num_predict: body.max_tokens ?? 16384,
|
|
318
382
|
top_p: body.top_p ?? 1.0,
|
|
319
383
|
},
|
|
320
384
|
};
|
|
321
385
|
|
|
322
|
-
// Add keep_alive if configured (controls how long model stays loaded)
|
|
323
|
-
// Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload)
|
|
324
386
|
if (config.ollama.keepAlive !== undefined) {
|
|
325
387
|
const keepAlive = config.ollama.keepAlive;
|
|
326
|
-
// Parse as number if it looks like one, otherwise use string
|
|
327
388
|
ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive)
|
|
328
389
|
? parseInt(keepAlive, 10)
|
|
329
390
|
: keepAlive;
|
|
330
391
|
logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
|
|
331
392
|
}
|
|
332
393
|
|
|
333
|
-
//
|
|
334
|
-
const supportsTools = await checkOllamaToolSupport(config.ollama.model);
|
|
335
|
-
|
|
336
|
-
// Inject standard tools if client didn't send any (passthrough mode)
|
|
337
|
-
let toolsToSend = body.tools;
|
|
338
|
-
let toolsInjected = false;
|
|
339
|
-
|
|
340
|
-
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
|
|
341
|
-
|
|
342
|
-
if (!supportsTools) {
|
|
343
|
-
// Model doesn't support tools - don't inject them
|
|
344
|
-
toolsToSend = null;
|
|
345
|
-
} else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
|
|
346
|
-
// Model supports tools and none provided - inject them
|
|
347
|
-
toolsToSend = STANDARD_TOOLS;
|
|
348
|
-
toolsInjected = true;
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// Add tools if present AND model supports them
|
|
394
|
+
// Tools need conversion to OpenAI function-calling format for legacy endpoint
|
|
352
395
|
if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
353
396
|
ollamaBody.tools = convertAnthropicToolsToOllama(toolsToSend);
|
|
354
397
|
}
|
|
355
398
|
|
|
356
|
-
// Single consolidated log message for all cases (easy to grep and compare across models)
|
|
357
|
-
const toolCount = (supportsTools && Array.isArray(toolsToSend)) ? toolsToSend.length : 0;
|
|
358
|
-
let logMessage;
|
|
359
|
-
|
|
360
|
-
if (!supportsTools) {
|
|
361
|
-
logMessage = `Tools not supported (0 tools)`;
|
|
362
|
-
} else if (toolsInjected) {
|
|
363
|
-
logMessage = `injected ${toolCount} tools`;
|
|
364
|
-
} else if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
365
|
-
logMessage = `Using client-provided tools (${toolCount} tools)`;
|
|
366
|
-
} else if (!injectToolsOllama) {
|
|
367
|
-
logMessage = `Tool injection disabled (0 tools)`;
|
|
368
|
-
} else {
|
|
369
|
-
logMessage = `No tools (0 tools)`;
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
logger.info({
|
|
373
|
-
model: config.ollama.model,
|
|
374
|
-
toolCount,
|
|
375
|
-
toolsInjected,
|
|
376
|
-
supportsTools,
|
|
377
|
-
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
|
|
378
|
-
}, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
|
|
379
|
-
|
|
380
399
|
return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
|
|
381
400
|
}
|
|
382
401
|
|
|
@@ -410,7 +429,7 @@ async function invokeOpenRouter(body) {
|
|
|
410
429
|
}
|
|
411
430
|
|
|
412
431
|
const openRouterBody = {
|
|
413
|
-
model: config.openrouter.model,
|
|
432
|
+
model: body._suggestionModeModel || body._tierModel || config.openrouter.model,
|
|
414
433
|
messages,
|
|
415
434
|
temperature: body.temperature ?? 0.7,
|
|
416
435
|
max_tokens: body.max_tokens ?? 4096,
|
|
@@ -426,16 +445,16 @@ async function invokeOpenRouter(body) {
|
|
|
426
445
|
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
427
446
|
toolsToSend = STANDARD_TOOLS;
|
|
428
447
|
toolsInjected = true;
|
|
429
|
-
logger.
|
|
448
|
+
logger.debug({
|
|
430
449
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
431
|
-
injectedToolNames:
|
|
450
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
432
451
|
reason: "Client did not send tools (passthrough mode)"
|
|
433
452
|
}, "=== INJECTING STANDARD TOOLS (OpenRouter) ===");
|
|
434
453
|
}
|
|
435
454
|
|
|
436
455
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
437
456
|
openRouterBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
438
|
-
logger.
|
|
457
|
+
logger.debug({
|
|
439
458
|
toolCount: toolsToSend.length,
|
|
440
459
|
toolNames: toolsToSend.map(t => t.name),
|
|
441
460
|
toolsInjected
|
|
@@ -490,13 +509,16 @@ async function invokeAzureOpenAI(body) {
|
|
|
490
509
|
});
|
|
491
510
|
}
|
|
492
511
|
|
|
512
|
+
// System prompt injection disabled - breaks model response
|
|
513
|
+
// Tool guidance now provided via tool descriptions instead
|
|
514
|
+
|
|
493
515
|
const azureBody = {
|
|
494
516
|
messages,
|
|
495
517
|
temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
|
|
496
518
|
max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
|
|
497
519
|
top_p: body.top_p ?? 1.0,
|
|
498
520
|
stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
|
|
499
|
-
model: config.azureOpenAI.deployment
|
|
521
|
+
model: body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment
|
|
500
522
|
};
|
|
501
523
|
|
|
502
524
|
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
@@ -507,18 +529,18 @@ async function invokeAzureOpenAI(body) {
|
|
|
507
529
|
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
508
530
|
toolsToSend = STANDARD_TOOLS;
|
|
509
531
|
toolsInjected = true;
|
|
510
|
-
logger.
|
|
532
|
+
logger.debug({
|
|
511
533
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
512
|
-
injectedToolNames:
|
|
534
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
513
535
|
reason: "Client did not send tools (passthrough mode)"
|
|
514
536
|
}, "=== INJECTING STANDARD TOOLS ===");
|
|
515
537
|
}
|
|
516
538
|
|
|
517
539
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
518
540
|
azureBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
519
|
-
azureBody.parallel_tool_calls = true; // Enable parallel tool
|
|
541
|
+
azureBody.parallel_tool_calls = true; // Enable parallel tool calls
|
|
520
542
|
azureBody.tool_choice = "auto"; // Explicitly enable tool use (helps GPT models understand they should use tools)
|
|
521
|
-
logger.
|
|
543
|
+
logger.debug({
|
|
522
544
|
toolCount: toolsToSend.length,
|
|
523
545
|
toolNames: toolsToSend.map(t => t.name),
|
|
524
546
|
toolsInjected,
|
|
@@ -529,7 +551,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
529
551
|
}, "=== SENDING TOOLS TO AZURE OPENAI ===");
|
|
530
552
|
}
|
|
531
553
|
|
|
532
|
-
logger.
|
|
554
|
+
logger.debug({
|
|
533
555
|
endpoint,
|
|
534
556
|
hasTools: !!azureBody.tools,
|
|
535
557
|
toolCount: azureBody.tools?.length || 0,
|
|
@@ -563,14 +585,83 @@ async function invokeAzureOpenAI(body) {
|
|
|
563
585
|
// Track function call IDs for matching with outputs
|
|
564
586
|
const pendingCallIds = [];
|
|
565
587
|
|
|
588
|
+
// Detect if this is a continuation request (has tool results)
|
|
589
|
+
// Azure content filter triggers on full system prompt in continuations
|
|
590
|
+
// Check for:
|
|
591
|
+
// 1. tool_result blocks in user messages (Anthropic format)
|
|
592
|
+
// 2. tool messages (OpenAI format)
|
|
593
|
+
// 3. assistant messages with tool_use or tool_calls (indicates prior tool invocation)
|
|
594
|
+
// 4. Flattened continuation pattern from orchestrator (contains "IMPORTANT: Focus on")
|
|
595
|
+
const hasToolResults = (body.messages || []).some(msg => {
|
|
596
|
+
// Check for Anthropic format tool_result in user messages
|
|
597
|
+
if (msg.role === "user" && Array.isArray(msg.content)) {
|
|
598
|
+
if (msg.content.some(block => block.type === "tool_result")) return true;
|
|
599
|
+
}
|
|
600
|
+
// Check for OpenAI format tool messages
|
|
601
|
+
if (msg.role === "tool") return true;
|
|
602
|
+
// Check for assistant messages with tool_use (Anthropic) or tool_calls (OpenAI)
|
|
603
|
+
// If there's a prior tool use, this is a continuation
|
|
604
|
+
if (msg.role === "assistant") {
|
|
605
|
+
if (Array.isArray(msg.content)) {
|
|
606
|
+
if (msg.content.some(block => block.type === "tool_use")) return true;
|
|
607
|
+
}
|
|
608
|
+
if (msg.tool_calls && msg.tool_calls.length > 0) return true;
|
|
609
|
+
}
|
|
610
|
+
return false;
|
|
611
|
+
}) || azureBody.messages.some(msg => {
|
|
612
|
+
// Also check converted messages for flattened continuation pattern
|
|
613
|
+
// The orchestrator flattens tool results into user message with this marker
|
|
614
|
+
if (msg.role === "user" && typeof msg.content === "string") {
|
|
615
|
+
if (msg.content.includes("IMPORTANT: Focus on and respond ONLY to my most recent request")) return true;
|
|
616
|
+
}
|
|
617
|
+
return false;
|
|
618
|
+
});
|
|
619
|
+
|
|
620
|
+
if (hasToolResults) {
|
|
621
|
+
logger.debug({
|
|
622
|
+
hasToolResults: true,
|
|
623
|
+
originalMessageCount: (body.messages || []).length,
|
|
624
|
+
convertedMessageCount: azureBody.messages.length,
|
|
625
|
+
messageRoles: (body.messages || []).map(m => m.role),
|
|
626
|
+
}, "=== CONTINUATION REQUEST DETECTED - using minimal system prompt to avoid Azure content filter ===");
|
|
627
|
+
} else {
|
|
628
|
+
logger.debug({
|
|
629
|
+
hasToolResults: false,
|
|
630
|
+
originalMessageCount: (body.messages || []).length,
|
|
631
|
+
messageRoles: (body.messages || []).map(m => m.role),
|
|
632
|
+
}, "Initial request - using full system prompt");
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// Helper function to strip <system-reminder> tags and meta-instructions from content
|
|
636
|
+
// Azure's jailbreak filter triggers on these instructions in continuation requests
|
|
637
|
+
const stripSystemReminders = (content) => {
|
|
638
|
+
if (!content || typeof content !== 'string') return content;
|
|
639
|
+
// Remove <system-reminder>...</system-reminder> blocks
|
|
640
|
+
let cleaned = content.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/gi, '');
|
|
641
|
+
// Remove the continuation marker that orchestrator adds
|
|
642
|
+
cleaned = cleaned.replace(/---\s*IMPORTANT:\s*Focus on and respond ONLY to my most recent request[^\n]*/gi, '');
|
|
643
|
+
// Trim whitespace
|
|
644
|
+
return cleaned.trim();
|
|
645
|
+
};
|
|
646
|
+
|
|
566
647
|
for (const msg of azureBody.messages) {
|
|
567
648
|
if (msg.role === "system") {
|
|
568
|
-
//
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
649
|
+
// For continuation requests, use minimal system prompt to avoid content filter
|
|
650
|
+
// Azure's jailbreak detection triggers on security-related text in continuations
|
|
651
|
+
if (hasToolResults) {
|
|
652
|
+
responsesInput.push({
|
|
653
|
+
type: "message",
|
|
654
|
+
role: "developer",
|
|
655
|
+
content: "You are a helpful coding assistant. Continue helping the user based on the tool results."
|
|
656
|
+
});
|
|
657
|
+
} else {
|
|
658
|
+
// Initial request - use full system prompt
|
|
659
|
+
responsesInput.push({
|
|
660
|
+
type: "message",
|
|
661
|
+
role: "developer",
|
|
662
|
+
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
|
|
663
|
+
});
|
|
664
|
+
}
|
|
574
665
|
} else if (msg.role === "user") {
|
|
575
666
|
// Check if content contains tool_result blocks (Anthropic format)
|
|
576
667
|
if (Array.isArray(msg.content)) {
|
|
@@ -585,19 +676,30 @@ async function invokeAzureOpenAI(body) {
|
|
|
585
676
|
output: typeof block.content === 'string' ? block.content : JSON.stringify(block.content || "")
|
|
586
677
|
});
|
|
587
678
|
} else if (block.type === "text") {
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
679
|
+
// For continuation requests, strip system-reminder tags to avoid jailbreak filter
|
|
680
|
+
const textContent = hasToolResults ? stripSystemReminders(block.text || "") : (block.text || "");
|
|
681
|
+
if (textContent) { // Only add if there's content after stripping
|
|
682
|
+
responsesInput.push({
|
|
683
|
+
type: "message",
|
|
684
|
+
role: "user",
|
|
685
|
+
content: textContent
|
|
686
|
+
});
|
|
687
|
+
}
|
|
593
688
|
}
|
|
594
689
|
}
|
|
595
690
|
} else {
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
}
|
|
691
|
+
// For continuation requests, strip system-reminder tags to avoid jailbreak filter
|
|
692
|
+
let userContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
|
|
693
|
+
if (hasToolResults) {
|
|
694
|
+
userContent = stripSystemReminders(userContent);
|
|
695
|
+
}
|
|
696
|
+
if (userContent) { // Only add if there's content after stripping
|
|
697
|
+
responsesInput.push({
|
|
698
|
+
type: "message",
|
|
699
|
+
role: "user",
|
|
700
|
+
content: userContent
|
|
701
|
+
});
|
|
702
|
+
}
|
|
601
703
|
}
|
|
602
704
|
} else if (msg.role === "assistant") {
|
|
603
705
|
// Assistant messages - handle tool_calls (OpenAI format) and tool_use blocks (Anthropic format)
|
|
@@ -663,7 +765,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
663
765
|
tool_choice: azureBody.tool_choice,
|
|
664
766
|
stream: false
|
|
665
767
|
};
|
|
666
|
-
logger.
|
|
768
|
+
logger.debug({
|
|
667
769
|
format: "responses",
|
|
668
770
|
inputCount: responsesBody.input?.length,
|
|
669
771
|
model: responsesBody.model,
|
|
@@ -681,7 +783,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
681
783
|
const textContent = messageOutput?.content?.find(c => c.type === "output_text")?.text || "";
|
|
682
784
|
|
|
683
785
|
// Find function_call outputs (tool calls are separate items in output array)
|
|
684
|
-
const
|
|
786
|
+
const rawToolCalls = outputArray
|
|
685
787
|
.filter(o => o.type === "function_call")
|
|
686
788
|
.map(tc => ({
|
|
687
789
|
id: tc.call_id || tc.id || `call_${Date.now()}`,
|
|
@@ -692,7 +794,30 @@ async function invokeAzureOpenAI(body) {
|
|
|
692
794
|
}
|
|
693
795
|
}));
|
|
694
796
|
|
|
695
|
-
|
|
797
|
+
// Deduplicate identical tool calls (GPT sometimes returns multiple identical calls)
|
|
798
|
+
const seenSignatures = new Set();
|
|
799
|
+
const toolCalls = rawToolCalls.filter(tc => {
|
|
800
|
+
const signature = `${tc.function.name}:${tc.function.arguments}`;
|
|
801
|
+
if (seenSignatures.has(signature)) {
|
|
802
|
+
logger.warn({
|
|
803
|
+
toolName: tc.function.name,
|
|
804
|
+
signature: signature.substring(0, 100),
|
|
805
|
+
}, "Filtered duplicate tool call from GPT response");
|
|
806
|
+
return false;
|
|
807
|
+
}
|
|
808
|
+
seenSignatures.add(signature);
|
|
809
|
+
return true;
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
if (rawToolCalls.length !== toolCalls.length) {
|
|
813
|
+
logger.debug({
|
|
814
|
+
originalCount: rawToolCalls.length,
|
|
815
|
+
dedupedCount: toolCalls.length,
|
|
816
|
+
removed: rawToolCalls.length - toolCalls.length,
|
|
817
|
+
}, "Deduplicated identical tool calls from single response");
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
logger.debug({
|
|
696
821
|
outputTypes: outputArray.map(o => o.type),
|
|
697
822
|
hasMessage: !!messageOutput,
|
|
698
823
|
toolCallCount: toolCalls.length,
|
|
@@ -717,7 +842,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
717
842
|
usage: result.json.usage
|
|
718
843
|
};
|
|
719
844
|
|
|
720
|
-
logger.
|
|
845
|
+
logger.debug({
|
|
721
846
|
convertedContent: textContent?.substring(0, 100),
|
|
722
847
|
hasToolCalls: toolCalls.length > 0,
|
|
723
848
|
toolCallCount: toolCalls.length
|
|
@@ -725,7 +850,7 @@ async function invokeAzureOpenAI(body) {
|
|
|
725
850
|
|
|
726
851
|
// Now convert from Chat Completions format to Anthropic format
|
|
727
852
|
const anthropicJson = convertOpenAIToAnthropic(result.json);
|
|
728
|
-
logger.
|
|
853
|
+
logger.debug({
|
|
729
854
|
anthropicContentTypes: anthropicJson.content?.map(c => c.type),
|
|
730
855
|
stopReason: anthropicJson.stop_reason
|
|
731
856
|
}, "Converted to Anthropic format");
|
|
@@ -747,67 +872,6 @@ async function invokeAzureOpenAI(body) {
|
|
|
747
872
|
}
|
|
748
873
|
}
|
|
749
874
|
|
|
750
|
-
/**
|
|
751
|
-
* Convert Azure Responses API response to Anthropic format
|
|
752
|
-
*/
|
|
753
|
-
function convertResponsesAPIToAnthropic(response, model) {
|
|
754
|
-
const content = [];
|
|
755
|
-
const outputArray = response.output || [];
|
|
756
|
-
|
|
757
|
-
// Extract text content from message output
|
|
758
|
-
const messageOutput = outputArray.find(o => o.type === "message");
|
|
759
|
-
if (messageOutput?.content) {
|
|
760
|
-
for (const item of messageOutput.content) {
|
|
761
|
-
if (item.type === "output_text" && item.text) {
|
|
762
|
-
content.push({ type: "text", text: item.text });
|
|
763
|
-
}
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
// Extract tool calls from function_call outputs
|
|
768
|
-
const toolCalls = outputArray
|
|
769
|
-
.filter(o => o.type === "function_call")
|
|
770
|
-
.map(tc => ({
|
|
771
|
-
type: "tool_use",
|
|
772
|
-
id: tc.call_id || tc.id || `call_${Date.now()}`,
|
|
773
|
-
name: tc.name,
|
|
774
|
-
input: typeof tc.arguments === 'string' ? JSON.parse(tc.arguments || "{}") : (tc.arguments || {})
|
|
775
|
-
}));
|
|
776
|
-
|
|
777
|
-
content.push(...toolCalls);
|
|
778
|
-
|
|
779
|
-
// Handle reasoning_content for thinking models
|
|
780
|
-
if (content.length === 0 && response.reasoning_content) {
|
|
781
|
-
content.push({ type: "text", text: response.reasoning_content });
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
// Ensure at least empty text if no content
|
|
785
|
-
if (content.length === 0) {
|
|
786
|
-
content.push({ type: "text", text: "" });
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
// Determine stop reason
|
|
790
|
-
let stopReason = "end_turn";
|
|
791
|
-
if (toolCalls.length > 0) {
|
|
792
|
-
stopReason = "tool_use";
|
|
793
|
-
} else if (response.status === "incomplete" && response.incomplete_details?.reason === "max_output_tokens") {
|
|
794
|
-
stopReason = "max_tokens";
|
|
795
|
-
}
|
|
796
|
-
|
|
797
|
-
return {
|
|
798
|
-
id: response.id || `msg_${Date.now()}`,
|
|
799
|
-
type: "message",
|
|
800
|
-
role: "assistant",
|
|
801
|
-
content,
|
|
802
|
-
model: model || response.model,
|
|
803
|
-
stop_reason: stopReason,
|
|
804
|
-
stop_sequence: null,
|
|
805
|
-
usage: {
|
|
806
|
-
input_tokens: response.usage?.input_tokens || 0,
|
|
807
|
-
output_tokens: response.usage?.output_tokens || 0,
|
|
808
|
-
}
|
|
809
|
-
};
|
|
810
|
-
}
|
|
811
875
|
|
|
812
876
|
async function invokeOpenAI(body) {
|
|
813
877
|
if (!config.openai?.apiKey) {
|
|
@@ -841,8 +905,10 @@ async function invokeOpenAI(body) {
|
|
|
841
905
|
});
|
|
842
906
|
}
|
|
843
907
|
|
|
908
|
+
// System prompt injection disabled - breaks model response
|
|
909
|
+
|
|
844
910
|
const openAIBody = {
|
|
845
|
-
model: config.openai.model || "gpt-4o",
|
|
911
|
+
model: body._suggestionModeModel || body._tierModel || config.openai.model || "gpt-4o",
|
|
846
912
|
messages,
|
|
847
913
|
temperature: body.temperature ?? 0.7,
|
|
848
914
|
max_tokens: body.max_tokens ?? 4096,
|
|
@@ -858,25 +924,25 @@ async function invokeOpenAI(body) {
|
|
|
858
924
|
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
859
925
|
toolsToSend = STANDARD_TOOLS;
|
|
860
926
|
toolsInjected = true;
|
|
861
|
-
logger.
|
|
927
|
+
logger.debug({
|
|
862
928
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
863
|
-
injectedToolNames:
|
|
929
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
864
930
|
reason: "Client did not send tools (passthrough mode)"
|
|
865
931
|
}, "=== INJECTING STANDARD TOOLS (OpenAI) ===");
|
|
866
932
|
}
|
|
867
933
|
|
|
868
934
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
869
935
|
openAIBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
870
|
-
openAIBody.parallel_tool_calls =
|
|
936
|
+
openAIBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls
|
|
871
937
|
openAIBody.tool_choice = "auto"; // Let the model decide when to use tools
|
|
872
|
-
logger.
|
|
938
|
+
logger.debug({
|
|
873
939
|
toolCount: toolsToSend.length,
|
|
874
940
|
toolNames: toolsToSend.map(t => t.name),
|
|
875
941
|
toolsInjected
|
|
876
942
|
}, "=== SENDING TOOLS TO OPENAI ===");
|
|
877
943
|
}
|
|
878
944
|
|
|
879
|
-
logger.
|
|
945
|
+
logger.debug({
|
|
880
946
|
endpoint,
|
|
881
947
|
model: openAIBody.model,
|
|
882
948
|
hasTools: !!openAIBody.tools,
|
|
@@ -934,7 +1000,7 @@ async function invokeLlamaCpp(body) {
|
|
|
934
1000
|
}
|
|
935
1001
|
|
|
936
1002
|
if (deduplicated.length !== messages.length) {
|
|
937
|
-
logger.
|
|
1003
|
+
logger.debug({
|
|
938
1004
|
originalCount: messages.length,
|
|
939
1005
|
deduplicatedCount: deduplicated.length,
|
|
940
1006
|
removed: messages.length - deduplicated.length,
|
|
@@ -959,26 +1025,26 @@ async function invokeLlamaCpp(body) {
|
|
|
959
1025
|
if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
|
|
960
1026
|
toolsToSend = STANDARD_TOOLS;
|
|
961
1027
|
toolsInjected = true;
|
|
962
|
-
logger.
|
|
1028
|
+
logger.debug({
|
|
963
1029
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
964
|
-
injectedToolNames:
|
|
1030
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
965
1031
|
reason: "Client did not send tools (passthrough mode)"
|
|
966
1032
|
}, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
|
|
967
1033
|
} else if (!injectToolsLlamacpp) {
|
|
968
|
-
logger.
|
|
1034
|
+
logger.debug({}, "Tool injection disabled for llama.cpp (INJECT_TOOLS_LLAMACPP=false)");
|
|
969
1035
|
}
|
|
970
1036
|
|
|
971
1037
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
972
1038
|
llamacppBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
973
1039
|
llamacppBody.tool_choice = "auto";
|
|
974
|
-
logger.
|
|
1040
|
+
logger.debug({
|
|
975
1041
|
toolCount: toolsToSend.length,
|
|
976
1042
|
toolNames: toolsToSend.map(t => t.name),
|
|
977
1043
|
toolsInjected
|
|
978
1044
|
}, "=== SENDING TOOLS TO LLAMA.CPP ===");
|
|
979
1045
|
}
|
|
980
1046
|
|
|
981
|
-
logger.
|
|
1047
|
+
logger.debug({
|
|
982
1048
|
endpoint,
|
|
983
1049
|
hasTools: !!llamacppBody.tools,
|
|
984
1050
|
toolCount: llamacppBody.tools?.length || 0,
|
|
@@ -1042,9 +1108,9 @@ async function invokeLMStudio(body) {
|
|
|
1042
1108
|
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
1043
1109
|
toolsToSend = STANDARD_TOOLS;
|
|
1044
1110
|
toolsInjected = true;
|
|
1045
|
-
logger.
|
|
1111
|
+
logger.debug({
|
|
1046
1112
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
1047
|
-
injectedToolNames:
|
|
1113
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
1048
1114
|
reason: "Client did not send tools (passthrough mode)"
|
|
1049
1115
|
}, "=== INJECTING STANDARD TOOLS (LM Studio) ===");
|
|
1050
1116
|
}
|
|
@@ -1052,14 +1118,14 @@ async function invokeLMStudio(body) {
|
|
|
1052
1118
|
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
1053
1119
|
lmstudioBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
1054
1120
|
lmstudioBody.tool_choice = "auto";
|
|
1055
|
-
logger.
|
|
1121
|
+
logger.debug({
|
|
1056
1122
|
toolCount: toolsToSend.length,
|
|
1057
1123
|
toolNames: toolsToSend.map(t => t.name),
|
|
1058
1124
|
toolsInjected
|
|
1059
1125
|
}, "=== SENDING TOOLS TO LM STUDIO ===");
|
|
1060
1126
|
}
|
|
1061
1127
|
|
|
1062
|
-
logger.
|
|
1128
|
+
logger.debug({
|
|
1063
1129
|
endpoint,
|
|
1064
1130
|
hasTools: !!lmstudioBody.tools,
|
|
1065
1131
|
toolCount: lmstudioBody.tools?.length || 0,
|
|
@@ -1080,7 +1146,7 @@ async function invokeBedrock(body) {
|
|
|
1080
1146
|
}
|
|
1081
1147
|
|
|
1082
1148
|
const bearerToken = config.bedrock.apiKey;
|
|
1083
|
-
logger.
|
|
1149
|
+
logger.debug({ authMethod: "Bearer Token" }, "=== BEDROCK AUTH ===");
|
|
1084
1150
|
|
|
1085
1151
|
// 2. Inject standard tools if needed
|
|
1086
1152
|
let toolsToSend = body.tools;
|
|
@@ -1089,9 +1155,9 @@ async function invokeBedrock(body) {
|
|
|
1089
1155
|
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
1090
1156
|
toolsToSend = STANDARD_TOOLS;
|
|
1091
1157
|
toolsInjected = true;
|
|
1092
|
-
logger.
|
|
1158
|
+
logger.debug({
|
|
1093
1159
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
1094
|
-
injectedToolNames:
|
|
1160
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
1095
1161
|
reason: "Client did not send tools (passthrough mode)"
|
|
1096
1162
|
}, "=== INJECTING STANDARD TOOLS (Bedrock) ===");
|
|
1097
1163
|
}
|
|
@@ -1099,10 +1165,10 @@ async function invokeBedrock(body) {
|
|
|
1099
1165
|
const bedrockBody = { ...body, tools: toolsToSend };
|
|
1100
1166
|
|
|
1101
1167
|
// 4. Detect model family and convert format
|
|
1102
|
-
const modelId = config.bedrock.modelId;
|
|
1168
|
+
const modelId = body._tierModel || config.bedrock.modelId;
|
|
1103
1169
|
const modelFamily = detectModelFamily(modelId);
|
|
1104
1170
|
|
|
1105
|
-
logger.
|
|
1171
|
+
logger.debug({
|
|
1106
1172
|
modelId,
|
|
1107
1173
|
modelFamily,
|
|
1108
1174
|
hasTools: !!bedrockBody.tools,
|
|
@@ -1167,7 +1233,7 @@ async function invokeBedrock(body) {
|
|
|
1167
1233
|
const host = `bedrock-runtime.${config.bedrock.region}.amazonaws.com`;
|
|
1168
1234
|
const endpoint = `https://${host}${path}`;
|
|
1169
1235
|
|
|
1170
|
-
logger.
|
|
1236
|
+
logger.debug({
|
|
1171
1237
|
endpoint,
|
|
1172
1238
|
authMethod: "Bearer Token",
|
|
1173
1239
|
hasSystem: !!converseBody.system,
|
|
@@ -1200,7 +1266,7 @@ async function invokeBedrock(body) {
|
|
|
1200
1266
|
// Parse Converse API response (already parsed by performJsonRequest)
|
|
1201
1267
|
const converseResponse = response.json; // Use property, not method
|
|
1202
1268
|
|
|
1203
|
-
logger.
|
|
1269
|
+
logger.debug({
|
|
1204
1270
|
stopReason: converseResponse.stopReason,
|
|
1205
1271
|
inputTokens: converseResponse.usage?.inputTokens || 0,
|
|
1206
1272
|
outputTokens: converseResponse.usage?.outputTokens || 0,
|
|
@@ -1280,7 +1346,7 @@ async function invokeZai(body) {
|
|
|
1280
1346
|
"claude-3-haiku": "glm-4.5-air",
|
|
1281
1347
|
};
|
|
1282
1348
|
|
|
1283
|
-
const requestedModel = body.model || config.zai.model;
|
|
1349
|
+
const requestedModel = body._tierModel || body.model || config.zai.model;
|
|
1284
1350
|
let mappedModel = modelMap[requestedModel] || config.zai.model || "glm-4.7";
|
|
1285
1351
|
mappedModel = mappedModel.toLowerCase();
|
|
1286
1352
|
|
|
@@ -1357,7 +1423,7 @@ async function invokeZai(body) {
|
|
|
1357
1423
|
// "required" was forcing tools even for simple greetings
|
|
1358
1424
|
zaiBody.tool_choice = "auto";
|
|
1359
1425
|
// Also enable parallel tool calls
|
|
1360
|
-
zaiBody.parallel_tool_calls =
|
|
1426
|
+
zaiBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls
|
|
1361
1427
|
}
|
|
1362
1428
|
|
|
1363
1429
|
headers = {
|
|
@@ -1372,9 +1438,9 @@ async function invokeZai(body) {
|
|
|
1372
1438
|
// Inject standard tools if client didn't send any (passthrough mode)
|
|
1373
1439
|
if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
|
|
1374
1440
|
zaiBody.tools = STANDARD_TOOLS;
|
|
1375
|
-
logger.
|
|
1441
|
+
logger.debug({
|
|
1376
1442
|
injectedToolCount: STANDARD_TOOLS.length,
|
|
1377
|
-
injectedToolNames:
|
|
1443
|
+
injectedToolNames: STANDARD_TOOL_NAMES,
|
|
1378
1444
|
reason: "Client did not send tools (passthrough mode)"
|
|
1379
1445
|
}, "=== INJECTING STANDARD TOOLS (Z.AI Anthropic) ===");
|
|
1380
1446
|
}
|
|
@@ -1386,7 +1452,7 @@ async function invokeZai(body) {
|
|
|
1386
1452
|
};
|
|
1387
1453
|
}
|
|
1388
1454
|
|
|
1389
|
-
logger.
|
|
1455
|
+
logger.debug({
|
|
1390
1456
|
endpoint,
|
|
1391
1457
|
format: isOpenAIFormat ? "openai" : "anthropic",
|
|
1392
1458
|
model: zaiBody.model,
|
|
@@ -1416,7 +1482,7 @@ async function invokeZai(body) {
|
|
|
1416
1482
|
|
|
1417
1483
|
const response = await performJsonRequest(endpoint, { headers, body: zaiBody }, "Z.AI");
|
|
1418
1484
|
|
|
1419
|
-
logger.
|
|
1485
|
+
logger.debug({
|
|
1420
1486
|
responseOk: response?.ok,
|
|
1421
1487
|
responseStatus: response?.status,
|
|
1422
1488
|
hasJson: !!response?.json,
|
|
@@ -1428,7 +1494,7 @@ async function invokeZai(body) {
|
|
|
1428
1494
|
// Convert OpenAI response back to Anthropic format if needed
|
|
1429
1495
|
if (isOpenAIFormat && response?.ok && response?.json) {
|
|
1430
1496
|
const anthropicJson = convertOpenAIToAnthropic(response.json);
|
|
1431
|
-
logger.
|
|
1497
|
+
logger.debug({
|
|
1432
1498
|
convertedContent: JSON.stringify(anthropicJson.content).substring(0, 200),
|
|
1433
1499
|
}, "=== Z.AI CONVERTED RESPONSE ===");
|
|
1434
1500
|
// Return in the same format as other providers (with ok, status, json)
|
|
@@ -1448,6 +1514,118 @@ async function invokeZai(body) {
|
|
|
1448
1514
|
|
|
1449
1515
|
|
|
1450
1516
|
|
|
1517
|
+
/**
|
|
1518
|
+
* Moonshot AI (Kimi) Provider
|
|
1519
|
+
*
|
|
1520
|
+
* Moonshot offers Kimi models through an OpenAI-compatible chat completions API.
|
|
1521
|
+
* Uses native system role support (unlike Z.AI which merges into user message).
|
|
1522
|
+
*/
|
|
1523
|
+
async function invokeMoonshot(body) {
|
|
1524
|
+
if (!config.moonshot?.apiKey) {
|
|
1525
|
+
throw new Error("Moonshot API key is not configured. Set MOONSHOT_API_KEY in your .env file.");
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
const {
|
|
1529
|
+
convertAnthropicToolsToOpenRouter,
|
|
1530
|
+
convertAnthropicMessagesToOpenRouter
|
|
1531
|
+
} = require("./openrouter-utils");
|
|
1532
|
+
|
|
1533
|
+
const endpoint = config.moonshot.endpoint || "https://api.moonshot.ai/v1/chat/completions";
|
|
1534
|
+
|
|
1535
|
+
// Model mapping: Anthropic names → Moonshot/Kimi names
|
|
1536
|
+
const modelMap = {
|
|
1537
|
+
"claude-sonnet-4-5-20250929": "kimi-k2-turbo-preview",
|
|
1538
|
+
"claude-sonnet-4-5": "kimi-k2-turbo-preview",
|
|
1539
|
+
"claude-sonnet-4.5": "kimi-k2-turbo-preview",
|
|
1540
|
+
"claude-3-5-sonnet": "kimi-k2-turbo-preview",
|
|
1541
|
+
"claude-haiku-4-5-20251001": "kimi-k2-turbo-preview",
|
|
1542
|
+
"claude-haiku-4-5": "kimi-k2-turbo-preview",
|
|
1543
|
+
"claude-3-haiku": "kimi-k2-turbo-preview",
|
|
1544
|
+
};
|
|
1545
|
+
|
|
1546
|
+
const requestedModel = body._tierModel || body.model || config.moonshot.model;
|
|
1547
|
+
const mappedModel = modelMap[requestedModel] || config.moonshot.model || "kimi-k2-turbo-preview";
|
|
1548
|
+
|
|
1549
|
+
// Convert messages using existing utility
|
|
1550
|
+
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
1551
|
+
|
|
1552
|
+
// Moonshot natively supports system role — add as system message
|
|
1553
|
+
if (body.system) {
|
|
1554
|
+
const systemContent = Array.isArray(body.system)
|
|
1555
|
+
? body.system.map(s => s.text || s).join("\n")
|
|
1556
|
+
: body.system;
|
|
1557
|
+
messages.unshift({ role: "system", content: systemContent });
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
const moonshotBody = {
|
|
1561
|
+
model: mappedModel,
|
|
1562
|
+
messages,
|
|
1563
|
+
max_tokens: body.max_tokens || 4096,
|
|
1564
|
+
temperature: body.temperature ?? 0.7,
|
|
1565
|
+
top_p: body.top_p ?? 1.0,
|
|
1566
|
+
stream: false, // Force non-streaming - OpenAI SSE to Anthropic SSE conversion not implemented
|
|
1567
|
+
};
|
|
1568
|
+
|
|
1569
|
+
// Convert and add tools if present
|
|
1570
|
+
if (Array.isArray(body.tools) && body.tools.length > 0) {
|
|
1571
|
+
moonshotBody.tools = convertAnthropicToolsToOpenRouter(body.tools);
|
|
1572
|
+
moonshotBody.tool_choice = "auto";
|
|
1573
|
+
moonshotBody.parallel_tool_calls = false;
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
const headers = {
|
|
1577
|
+
"Content-Type": "application/json",
|
|
1578
|
+
"Authorization": `Bearer ${config.moonshot.apiKey}`,
|
|
1579
|
+
};
|
|
1580
|
+
|
|
1581
|
+
logger.debug({
|
|
1582
|
+
endpoint,
|
|
1583
|
+
model: moonshotBody.model,
|
|
1584
|
+
originalModel: requestedModel,
|
|
1585
|
+
messageCount: moonshotBody.messages?.length || 0,
|
|
1586
|
+
hasTools: !!moonshotBody.tools,
|
|
1587
|
+
toolCount: moonshotBody.tools?.length || 0,
|
|
1588
|
+
}, "=== Moonshot REQUEST ===");
|
|
1589
|
+
|
|
1590
|
+
const response = await performJsonRequest(endpoint, { headers, body: moonshotBody }, "Moonshot");
|
|
1591
|
+
|
|
1592
|
+
const rawMsg = response?.json?.choices?.[0]?.message;
|
|
1593
|
+
logger.debug({
|
|
1594
|
+
responseOk: response?.ok,
|
|
1595
|
+
responseStatus: response?.status,
|
|
1596
|
+
hasJson: !!response?.json,
|
|
1597
|
+
contentType: typeof rawMsg?.content,
|
|
1598
|
+
contentValue: typeof rawMsg?.content === 'string' ? rawMsg.content.substring(0, 300) : String(JSON.stringify(rawMsg?.content) || '').substring(0, 300),
|
|
1599
|
+
hasReasoning: !!rawMsg?.reasoning_content,
|
|
1600
|
+
reasoningType: typeof rawMsg?.reasoning_content,
|
|
1601
|
+
reasoningValue: typeof rawMsg?.reasoning_content === 'string' ? rawMsg.reasoning_content.substring(0, 300) : String(JSON.stringify(rawMsg?.reasoning_content) || '').substring(0, 300),
|
|
1602
|
+
finishReason: response?.json?.choices?.[0]?.finish_reason,
|
|
1603
|
+
messageKeys: rawMsg ? Object.keys(rawMsg) : [],
|
|
1604
|
+
fullRawResponse: String(JSON.stringify(response?.json) || '').substring(0, 800),
|
|
1605
|
+
}, "=== Moonshot RAW RESPONSE ===");
|
|
1606
|
+
|
|
1607
|
+
// Convert OpenAI response back to Anthropic format
|
|
1608
|
+
if (response?.ok && response?.json) {
|
|
1609
|
+
const anthropicJson = convertOpenAIToAnthropic(response.json);
|
|
1610
|
+
logger.debug({
|
|
1611
|
+
convertedContent: JSON.stringify(anthropicJson.content).substring(0, 500),
|
|
1612
|
+
contentLength: anthropicJson.content?.length,
|
|
1613
|
+
firstContentType: anthropicJson.content?.[0]?.type,
|
|
1614
|
+
firstContentText: anthropicJson.content?.[0]?.text?.substring(0, 300),
|
|
1615
|
+
}, "=== Moonshot CONVERTED RESPONSE ===");
|
|
1616
|
+
return {
|
|
1617
|
+
ok: response.ok,
|
|
1618
|
+
status: response.status,
|
|
1619
|
+
json: anthropicJson,
|
|
1620
|
+
text: JSON.stringify(anthropicJson),
|
|
1621
|
+
contentType: "application/json",
|
|
1622
|
+
headers: response.headers,
|
|
1623
|
+
};
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
return response;
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1451
1629
|
/**
|
|
1452
1630
|
* Convert OpenAI response to Anthropic format
|
|
1453
1631
|
*/
|
|
@@ -1463,11 +1641,17 @@ function convertOpenAIToAnthropic(response) {
|
|
|
1463
1641
|
// Add text content from message.content
|
|
1464
1642
|
// Don't add placeholder text if there are tool_calls - tools are the actual response
|
|
1465
1643
|
const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1644
|
+
|
|
1645
|
+
// Extract text content - handle thinking models that split content/reasoning
|
|
1646
|
+
const textContent = typeof message.content === 'string' ? message.content : '';
|
|
1647
|
+
const reasoningContent = typeof message.reasoning_content === 'string' ? message.reasoning_content : '';
|
|
1648
|
+
|
|
1649
|
+
if (textContent) {
|
|
1650
|
+
// Has regular content - use it directly (ignore reasoning_content chain-of-thought)
|
|
1651
|
+
content.push({ type: "text", text: textContent });
|
|
1652
|
+
} else if (reasoningContent) {
|
|
1653
|
+
// Fallback: thinking models where content is empty but reasoning has the output
|
|
1654
|
+
content.push({ type: "text", text: reasoningContent });
|
|
1471
1655
|
}
|
|
1472
1656
|
|
|
1473
1657
|
// Convert tool calls
|
|
@@ -1488,13 +1672,14 @@ function convertOpenAIToAnthropic(response) {
|
|
|
1488
1672
|
}
|
|
1489
1673
|
|
|
1490
1674
|
// Determine stop reason
|
|
1675
|
+
// IMPORTANT: Check for actual tool_calls presence, not just finish_reason string.
|
|
1676
|
+
// Some providers (Moonshot, etc.) return finish_reason: "stop" even when tool_calls exist.
|
|
1677
|
+
// If we don't set stop_reason to "tool_use", the CLI won't execute the tool calls.
|
|
1491
1678
|
let stopReason = "end_turn";
|
|
1492
|
-
if (
|
|
1679
|
+
if (hasToolCalls) {
|
|
1493
1680
|
stopReason = "tool_use";
|
|
1494
1681
|
} else if (choice.finish_reason === "length") {
|
|
1495
1682
|
stopReason = "max_tokens";
|
|
1496
|
-
} else if (choice.finish_reason === "stop") {
|
|
1497
|
-
stopReason = "end_turn";
|
|
1498
1683
|
}
|
|
1499
1684
|
|
|
1500
1685
|
return {
|
|
@@ -1580,7 +1765,7 @@ async function invokeVertex(body) {
|
|
|
1580
1765
|
};
|
|
1581
1766
|
|
|
1582
1767
|
// Map model name
|
|
1583
|
-
const requestedModel = body.model || config.vertex.model;
|
|
1768
|
+
const requestedModel = body._tierModel || body.model || config.vertex.model;
|
|
1584
1769
|
const geminiModel = modelMap[requestedModel] || config.vertex.model || "gemini-2.0-flash";
|
|
1585
1770
|
|
|
1586
1771
|
// Construct Gemini API endpoint
|
|
@@ -1626,7 +1811,7 @@ async function invokeVertex(body) {
|
|
|
1626
1811
|
"Content-Type": "application/json",
|
|
1627
1812
|
};
|
|
1628
1813
|
|
|
1629
|
-
logger.
|
|
1814
|
+
logger.debug({
|
|
1630
1815
|
endpoint: endpoint.replace(apiKey, "***"),
|
|
1631
1816
|
model: geminiModel,
|
|
1632
1817
|
originalModel: requestedModel,
|
|
@@ -1655,7 +1840,7 @@ async function invokeVertex(body) {
|
|
|
1655
1840
|
// Convert Gemini response to Anthropic format
|
|
1656
1841
|
if (response?.json) {
|
|
1657
1842
|
const anthropicJson = convertGeminiToAnthropic(response.json, requestedModel);
|
|
1658
|
-
logger.
|
|
1843
|
+
logger.debug({
|
|
1659
1844
|
convertedContent: JSON.stringify(anthropicJson.content).substring(0, 200),
|
|
1660
1845
|
}, "=== VERTEX AI (GEMINI) CONVERTED RESPONSE ===");
|
|
1661
1846
|
return {
|
|
@@ -1816,35 +2001,44 @@ function convertGeminiToAnthropic(response, requestedModel) {
|
|
|
1816
2001
|
}
|
|
1817
2002
|
|
|
1818
2003
|
async function invokeModel(body, options = {}) {
|
|
1819
|
-
const {
|
|
2004
|
+
const { determineProviderSmart, isFallbackEnabled, getFallbackProvider } = require("./routing");
|
|
1820
2005
|
const metricsCollector = getMetricsCollector();
|
|
1821
2006
|
const registry = getCircuitBreakerRegistry();
|
|
1822
2007
|
const healthTracker = getHealthTracker();
|
|
1823
2008
|
|
|
1824
|
-
//
|
|
1825
|
-
const
|
|
1826
|
-
|
|
1827
|
-
|
|
2009
|
+
// Determine provider via async tier routing
|
|
2010
|
+
const routingResult = options.forceProvider
|
|
2011
|
+
? { provider: options.forceProvider, model: null, method: 'forced' }
|
|
2012
|
+
: await determineProviderSmart(body);
|
|
2013
|
+
const initialProvider = routingResult.provider;
|
|
2014
|
+
const tierSelectedModel = routingResult.model;
|
|
2015
|
+
|
|
2016
|
+
// Inject tier-selected model into body so provider functions can use it
|
|
2017
|
+
if (tierSelectedModel) {
|
|
2018
|
+
body._tierModel = tierSelectedModel;
|
|
2019
|
+
}
|
|
1828
2020
|
|
|
1829
2021
|
// Build routing decision object for response headers
|
|
1830
2022
|
const routingDecision = {
|
|
1831
2023
|
provider: initialProvider,
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
1837
|
-
|
|
2024
|
+
tier: routingResult.tier || null,
|
|
2025
|
+
model: tierSelectedModel || null,
|
|
2026
|
+
score: routingResult.score,
|
|
2027
|
+
threshold: routingResult.threshold,
|
|
2028
|
+
mode: routingResult.mode,
|
|
2029
|
+
reason: routingResult.reason,
|
|
2030
|
+
method: routingResult.method || 'static',
|
|
1838
2031
|
};
|
|
1839
2032
|
|
|
1840
2033
|
logger.debug({
|
|
1841
2034
|
initialProvider,
|
|
1842
|
-
|
|
2035
|
+
tierSelectedModel,
|
|
2036
|
+
tier: routingResult.tier,
|
|
1843
2037
|
fallbackEnabled: isFallbackEnabled(),
|
|
1844
2038
|
toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
2039
|
+
score: routingResult.score,
|
|
2040
|
+
reason: routingResult.reason,
|
|
2041
|
+
method: routingResult.method,
|
|
1848
2042
|
}, "Provider routing decision");
|
|
1849
2043
|
|
|
1850
2044
|
metricsCollector.recordProviderRouting(initialProvider);
|
|
@@ -1885,6 +2079,8 @@ async function invokeModel(body, options = {}) {
|
|
|
1885
2079
|
return await invokeZai(body);
|
|
1886
2080
|
} else if (initialProvider === "vertex") {
|
|
1887
2081
|
return await invokeVertex(body);
|
|
2082
|
+
} else if (initialProvider === "moonshot") {
|
|
2083
|
+
return await invokeMoonshot(body);
|
|
1888
2084
|
}
|
|
1889
2085
|
return await invokeDatabricks(body);
|
|
1890
2086
|
});
|
|
@@ -1920,11 +2116,10 @@ async function invokeModel(body, options = {}) {
|
|
|
1920
2116
|
metricsCollector.recordProviderFailure(initialProvider);
|
|
1921
2117
|
healthTracker.recordFailure(initialProvider, err, err.status);
|
|
1922
2118
|
|
|
1923
|
-
// Check if we should fallback
|
|
2119
|
+
// Check if we should fallback (any provider can fall back, not just ollama)
|
|
1924
2120
|
const shouldFallback =
|
|
1925
|
-
preferOllama &&
|
|
1926
|
-
initialProvider === "ollama" &&
|
|
1927
2121
|
isFallbackEnabled() &&
|
|
2122
|
+
initialProvider !== getFallbackProvider() &&
|
|
1928
2123
|
!options.disableFallback;
|
|
1929
2124
|
|
|
1930
2125
|
if (!shouldFallback) {
|
|
@@ -1941,7 +2136,7 @@ async function invokeModel(body, options = {}) {
|
|
|
1941
2136
|
fallbackProvider,
|
|
1942
2137
|
reason,
|
|
1943
2138
|
error: err.message,
|
|
1944
|
-
}, "
|
|
2139
|
+
}, "Primary provider failed, attempting transparent fallback");
|
|
1945
2140
|
|
|
1946
2141
|
metricsCollector.recordFallbackAttempt(initialProvider, fallbackProvider, reason);
|
|
1947
2142
|
|
|
@@ -1974,6 +2169,8 @@ async function invokeModel(body, options = {}) {
|
|
|
1974
2169
|
return await invokeZai(body);
|
|
1975
2170
|
} else if (fallbackProvider === "vertex") {
|
|
1976
2171
|
return await invokeVertex(body);
|
|
2172
|
+
} else if (fallbackProvider === "moonshot") {
|
|
2173
|
+
return await invokeMoonshot(body);
|
|
1977
2174
|
}
|
|
1978
2175
|
return await invokeDatabricks(body);
|
|
1979
2176
|
});
|
|
@@ -2023,7 +2220,7 @@ async function invokeModel(body, options = {}) {
|
|
|
2023
2220
|
fallbackProvider,
|
|
2024
2221
|
originalError: err.message,
|
|
2025
2222
|
fallbackError: fallbackErr.message,
|
|
2026
|
-
}, "Both
|
|
2223
|
+
}, "Both primary and fallback provider failed");
|
|
2027
2224
|
|
|
2028
2225
|
// Return fallback error (more actionable than Ollama error)
|
|
2029
2226
|
throw fallbackErr;
|