lynkr 7.2.0 → 7.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agents/store.js +28 -13
- package/src/budget/index.js +13 -7
- package/src/cache/prompt.js +9 -2
- package/src/db/index.js +370 -333
- package/src/clients/databricks.js.backup +0 -1036
|
@@ -1,1036 +0,0 @@
|
|
|
1
|
-
const config = require("../config");
|
|
2
|
-
const http = require("http");
|
|
3
|
-
const https = require("https");
|
|
4
|
-
const { withRetry } = require("./retry");
|
|
5
|
-
const { getCircuitBreakerRegistry } = require("./circuit-breaker");
|
|
6
|
-
const { getMetricsCollector } = require("../observability/metrics");
|
|
7
|
-
const logger = require("../logger");
|
|
8
|
-
const { STANDARD_TOOLS } = require("./standard-tools");
|
|
9
|
-
const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
|
|
10
|
-
const {
|
|
11
|
-
detectModelFamily,
|
|
12
|
-
convertAnthropicToBedrockFormat,
|
|
13
|
-
convertBedrockResponseToAnthropic
|
|
14
|
-
} = require("./bedrock-utils");
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
if (typeof fetch !== "function") {
|
|
20
|
-
throw new Error("Node 18+ is required for the built-in fetch API.");
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
// HTTP connection pooling for better performance
|
|
26
|
-
const httpAgent = new http.Agent({
|
|
27
|
-
keepAlive: true,
|
|
28
|
-
maxSockets: 50,
|
|
29
|
-
maxFreeSockets: 10,
|
|
30
|
-
timeout: 60000,
|
|
31
|
-
keepAliveMsecs: 30000,
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
const httpsAgent = new https.Agent({
|
|
35
|
-
keepAlive: true,
|
|
36
|
-
maxSockets: 50,
|
|
37
|
-
maxFreeSockets: 10,
|
|
38
|
-
timeout: 60000,
|
|
39
|
-
keepAliveMsecs: 30000,
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
|
|
43
|
-
const agent = url.startsWith('https:') ? httpsAgent : httpAgent;
|
|
44
|
-
const isStreaming = body.stream === true;
|
|
45
|
-
|
|
46
|
-
// Streaming requests can't be retried, so handle them directly
|
|
47
|
-
if (isStreaming) {
|
|
48
|
-
const response = await fetch(url, {
|
|
49
|
-
method: "POST",
|
|
50
|
-
headers,
|
|
51
|
-
body: JSON.stringify(body),
|
|
52
|
-
agent,
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
logger.debug({
|
|
56
|
-
provider: providerLabel,
|
|
57
|
-
status: response.status,
|
|
58
|
-
streaming: true,
|
|
59
|
-
}, `${providerLabel} API streaming response`);
|
|
60
|
-
|
|
61
|
-
if (!response.ok) {
|
|
62
|
-
const errorText = await response.text();
|
|
63
|
-
logger.warn({
|
|
64
|
-
provider: providerLabel,
|
|
65
|
-
status: response.status,
|
|
66
|
-
error: errorText.substring(0, 200),
|
|
67
|
-
}, `${providerLabel} API streaming error`);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
return {
|
|
71
|
-
ok: response.ok,
|
|
72
|
-
status: response.status,
|
|
73
|
-
stream: response.body, // Return the readable stream
|
|
74
|
-
contentType: response.headers.get("content-type"),
|
|
75
|
-
headers: response.headers,
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// Non-streaming requests use retry logic
|
|
80
|
-
return withRetry(async () => {
|
|
81
|
-
const response = await fetch(url, {
|
|
82
|
-
method: "POST",
|
|
83
|
-
headers,
|
|
84
|
-
body: JSON.stringify(body),
|
|
85
|
-
agent,
|
|
86
|
-
});
|
|
87
|
-
const text = await response.text();
|
|
88
|
-
|
|
89
|
-
logger.debug({
|
|
90
|
-
provider: providerLabel,
|
|
91
|
-
status: response.status,
|
|
92
|
-
responseLength: text.length,
|
|
93
|
-
}, `${providerLabel} API response`);
|
|
94
|
-
|
|
95
|
-
let json;
|
|
96
|
-
try {
|
|
97
|
-
json = JSON.parse(text);
|
|
98
|
-
} catch {
|
|
99
|
-
json = null;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
const result = {
|
|
103
|
-
ok: response.ok,
|
|
104
|
-
status: response.status,
|
|
105
|
-
json,
|
|
106
|
-
text,
|
|
107
|
-
contentType: response.headers.get("content-type"),
|
|
108
|
-
headers: response.headers,
|
|
109
|
-
};
|
|
110
|
-
|
|
111
|
-
// Log errors for retry logic
|
|
112
|
-
if (!response.ok) {
|
|
113
|
-
logger.warn({
|
|
114
|
-
provider: providerLabel,
|
|
115
|
-
status: response.status,
|
|
116
|
-
error: json?.error || text.substring(0, 200),
|
|
117
|
-
}, `${providerLabel} API error`);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
return result;
|
|
121
|
-
}, {
|
|
122
|
-
maxRetries: config.apiRetry?.maxRetries || 3,
|
|
123
|
-
initialDelay: config.apiRetry?.initialDelay || 1000,
|
|
124
|
-
maxDelay: config.apiRetry?.maxDelay || 30000,
|
|
125
|
-
});
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
async function invokeDatabricks(body) {
|
|
129
|
-
if (!config.databricks?.url) {
|
|
130
|
-
throw new Error("Databricks configuration is missing required URL.");
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Create a copy of body to avoid mutating the original
|
|
134
|
-
const databricksBody = { ...body };
|
|
135
|
-
|
|
136
|
-
// Inject standard tools if client didn't send any (passthrough mode)
|
|
137
|
-
if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
|
|
138
|
-
databricksBody.tools = STANDARD_TOOLS;
|
|
139
|
-
logger.info({
|
|
140
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
141
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
142
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
143
|
-
}, "=== INJECTING STANDARD TOOLS (Databricks) ===");
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Convert Anthropic format tools to OpenAI format (Databricks uses OpenAI format)
|
|
147
|
-
if (Array.isArray(databricksBody.tools) && databricksBody.tools.length > 0) {
|
|
148
|
-
// Check if tools are already in OpenAI format (have type: "function")
|
|
149
|
-
const alreadyConverted = databricksBody.tools[0]?.type === "function";
|
|
150
|
-
|
|
151
|
-
if (!alreadyConverted) {
|
|
152
|
-
databricksBody.tools = convertAnthropicToolsToOpenRouter(databricksBody.tools);
|
|
153
|
-
logger.debug({
|
|
154
|
-
convertedToolCount: databricksBody.tools.length,
|
|
155
|
-
convertedToolNames: databricksBody.tools.map(t => t.function?.name),
|
|
156
|
-
}, "Converted tools to OpenAI format for Databricks");
|
|
157
|
-
} else {
|
|
158
|
-
logger.debug({
|
|
159
|
-
toolCount: databricksBody.tools.length,
|
|
160
|
-
toolNames: databricksBody.tools.map(t => t.function?.name),
|
|
161
|
-
}, "Tools already in OpenAI format, skipping conversion");
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const headers = {
|
|
166
|
-
Authorization: `Bearer ${config.databricks.apiKey}`,
|
|
167
|
-
"Content-Type": "application/json",
|
|
168
|
-
};
|
|
169
|
-
return performJsonRequest(config.databricks.url, { headers, body: databricksBody }, "Databricks");
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
async function invokeAzureAnthropic(body) {
|
|
173
|
-
if (!config.azureAnthropic?.endpoint) {
|
|
174
|
-
throw new Error("Azure Anthropic endpoint is not configured.");
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// Inject standard tools if client didn't send any (passthrough mode)
|
|
178
|
-
if (!Array.isArray(body.tools) || body.tools.length === 0) {
|
|
179
|
-
body.tools = STANDARD_TOOLS;
|
|
180
|
-
logger.info({
|
|
181
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
182
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
183
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
184
|
-
}, "=== INJECTING STANDARD TOOLS (Azure Anthropic) ===");
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
const headers = {
|
|
188
|
-
"Content-Type": "application/json",
|
|
189
|
-
"x-api-key": config.azureAnthropic.apiKey,
|
|
190
|
-
"anthropic-version": config.azureAnthropic.version ?? "2023-06-01",
|
|
191
|
-
};
|
|
192
|
-
return performJsonRequest(
|
|
193
|
-
config.azureAnthropic.endpoint,
|
|
194
|
-
{ headers, body },
|
|
195
|
-
"Azure Anthropic",
|
|
196
|
-
);
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
async function invokeOllama(body) {
|
|
200
|
-
if (!config.ollama?.endpoint) {
|
|
201
|
-
throw new Error("Ollama endpoint is not configured.");
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
const { convertAnthropicToolsToOllama } = require("./ollama-utils");
|
|
205
|
-
|
|
206
|
-
const endpoint = `${config.ollama.endpoint}/api/chat`;
|
|
207
|
-
const headers = { "Content-Type": "application/json" };
|
|
208
|
-
|
|
209
|
-
// Convert Anthropic messages format to Ollama format
|
|
210
|
-
// Ollama expects content as string, not content blocks array
|
|
211
|
-
const convertedMessages = (body.messages || []).map(msg => {
|
|
212
|
-
let content = msg.content;
|
|
213
|
-
|
|
214
|
-
// Convert content blocks array to simple string
|
|
215
|
-
if (Array.isArray(content)) {
|
|
216
|
-
content = content
|
|
217
|
-
.filter(block => block.type === 'text')
|
|
218
|
-
.map(block => block.text || '')
|
|
219
|
-
.join('\n');
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
return {
|
|
223
|
-
role: msg.role,
|
|
224
|
-
content: content || ''
|
|
225
|
-
};
|
|
226
|
-
});
|
|
227
|
-
|
|
228
|
-
const ollamaBody = {
|
|
229
|
-
model: config.ollama.model,
|
|
230
|
-
messages: convertedMessages,
|
|
231
|
-
stream: body.stream ?? false,
|
|
232
|
-
options: {
|
|
233
|
-
temperature: body.temperature ?? 0.7,
|
|
234
|
-
num_predict: body.max_tokens ?? 4096,
|
|
235
|
-
top_p: body.top_p ?? 1.0,
|
|
236
|
-
},
|
|
237
|
-
};
|
|
238
|
-
|
|
239
|
-
// Inject standard tools if client didn't send any (passthrough mode)
|
|
240
|
-
let toolsToSend = body.tools;
|
|
241
|
-
let toolsInjected = false;
|
|
242
|
-
|
|
243
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
244
|
-
toolsToSend = STANDARD_TOOLS;
|
|
245
|
-
toolsInjected = true;
|
|
246
|
-
logger.info({
|
|
247
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
248
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
249
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
250
|
-
}, "=== INJECTING STANDARD TOOLS (Ollama) ===");
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Add tools if present (for tool-capable models)
|
|
254
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
255
|
-
ollamaBody.tools = convertAnthropicToolsToOllama(toolsToSend);
|
|
256
|
-
logger.info({
|
|
257
|
-
toolCount: toolsToSend.length,
|
|
258
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
259
|
-
toolsInjected
|
|
260
|
-
}, "Sending tools to Ollama");
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
async function invokeOpenRouter(body) {
|
|
267
|
-
if (!config.openrouter?.endpoint || !config.openrouter?.apiKey) {
|
|
268
|
-
throw new Error("OpenRouter endpoint or API key is not configured.");
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
const {
|
|
272
|
-
convertAnthropicToolsToOpenRouter,
|
|
273
|
-
convertAnthropicMessagesToOpenRouter
|
|
274
|
-
} = require("./openrouter-utils");
|
|
275
|
-
|
|
276
|
-
const endpoint = config.openrouter.endpoint;
|
|
277
|
-
const headers = {
|
|
278
|
-
"Authorization": `Bearer ${config.openrouter.apiKey}`,
|
|
279
|
-
"Content-Type": "application/json",
|
|
280
|
-
"HTTP-Referer": "https://localhost:8080",
|
|
281
|
-
"X-Title": "Claude-Ollama-Proxy"
|
|
282
|
-
};
|
|
283
|
-
|
|
284
|
-
// Convert messages and handle system message
|
|
285
|
-
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
286
|
-
|
|
287
|
-
// Anthropic uses separate 'system' field, OpenAI needs it as first message
|
|
288
|
-
if (body.system) {
|
|
289
|
-
messages.unshift({
|
|
290
|
-
role: "system",
|
|
291
|
-
content: body.system
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
const openRouterBody = {
|
|
296
|
-
model: config.openrouter.model,
|
|
297
|
-
messages,
|
|
298
|
-
temperature: body.temperature ?? 0.7,
|
|
299
|
-
max_tokens: body.max_tokens ?? 4096,
|
|
300
|
-
top_p: body.top_p ?? 1.0,
|
|
301
|
-
stream: body.stream ?? false
|
|
302
|
-
};
|
|
303
|
-
|
|
304
|
-
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
305
|
-
let toolsToSend = body.tools;
|
|
306
|
-
let toolsInjected = false;
|
|
307
|
-
|
|
308
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
309
|
-
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
310
|
-
toolsToSend = STANDARD_TOOLS;
|
|
311
|
-
toolsInjected = true;
|
|
312
|
-
logger.info({
|
|
313
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
314
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
315
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
316
|
-
}, "=== INJECTING STANDARD TOOLS (OpenRouter) ===");
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
320
|
-
openRouterBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
321
|
-
logger.info({
|
|
322
|
-
toolCount: toolsToSend.length,
|
|
323
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
324
|
-
toolsInjected
|
|
325
|
-
}, "Sending tools to OpenRouter");
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
return performJsonRequest(endpoint, { headers, body: openRouterBody }, "OpenRouter");
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
function detectAzureFormat(url) {
|
|
332
|
-
if (url.includes("/openai/responses")) return "responses";
|
|
333
|
-
if (url.includes("/models/")) return "models";
|
|
334
|
-
if (url.includes("/openai/deployments")) return "deployments";
|
|
335
|
-
throw new Error("Unknown Azure OpenAI endpoint");
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
async function invokeAzureOpenAI(body) {
|
|
340
|
-
if (!config.azureOpenAI?.endpoint || !config.azureOpenAI?.apiKey) {
|
|
341
|
-
throw new Error("Azure OpenAI endpoint or API key is not configured.");
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
const {
|
|
345
|
-
convertAnthropicToolsToOpenRouter,
|
|
346
|
-
convertAnthropicMessagesToOpenRouter
|
|
347
|
-
} = require("./openrouter-utils");
|
|
348
|
-
|
|
349
|
-
// Azure OpenAI URL format
|
|
350
|
-
const endpoint = config.azureOpenAI.endpoint;
|
|
351
|
-
const format = detectAzureFormat(endpoint);
|
|
352
|
-
|
|
353
|
-
const headers = {
|
|
354
|
-
"api-key": config.azureOpenAI.apiKey, // Azure uses "api-key" not "Authorization"
|
|
355
|
-
"Content-Type": "application/json"
|
|
356
|
-
};
|
|
357
|
-
|
|
358
|
-
// Convert messages and handle system message
|
|
359
|
-
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
360
|
-
|
|
361
|
-
// Anthropic uses separate 'system' field, OpenAI needs it as first message
|
|
362
|
-
if (body.system) {
|
|
363
|
-
messages.unshift({
|
|
364
|
-
role: "system",
|
|
365
|
-
content: body.system
|
|
366
|
-
});
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
const azureBody = {
|
|
370
|
-
messages,
|
|
371
|
-
temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
|
|
372
|
-
max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
|
|
373
|
-
top_p: body.top_p ?? 1.0,
|
|
374
|
-
stream: body.stream ?? false,
|
|
375
|
-
model: config.azureOpenAI.deployment
|
|
376
|
-
};
|
|
377
|
-
|
|
378
|
-
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
379
|
-
let toolsToSend = body.tools;
|
|
380
|
-
let toolsInjected = false;
|
|
381
|
-
|
|
382
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
383
|
-
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
384
|
-
toolsToSend = STANDARD_TOOLS;
|
|
385
|
-
toolsInjected = true;
|
|
386
|
-
logger.info({
|
|
387
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
388
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
389
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
390
|
-
}, "=== INJECTING STANDARD TOOLS ===");
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
394
|
-
azureBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
395
|
-
azureBody.parallel_tool_calls = true; // Enable parallel tool calling for better performance
|
|
396
|
-
azureBody.tool_choice = "auto"; // Explicitly enable tool use (helps GPT models understand they should use tools)
|
|
397
|
-
logger.info({
|
|
398
|
-
toolCount: toolsToSend.length,
|
|
399
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
400
|
-
toolsInjected,
|
|
401
|
-
hasSystemMessage: !!body.system,
|
|
402
|
-
messageCount: messages.length,
|
|
403
|
-
temperature: azureBody.temperature,
|
|
404
|
-
sampleTool: azureBody.tools[0] // Log first tool for inspection
|
|
405
|
-
}, "=== SENDING TOOLS TO AZURE OPENAI ===");
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
logger.info({
|
|
409
|
-
endpoint,
|
|
410
|
-
hasTools: !!azureBody.tools,
|
|
411
|
-
toolCount: azureBody.tools?.length || 0,
|
|
412
|
-
temperature: azureBody.temperature,
|
|
413
|
-
max_tokens: azureBody.max_tokens,
|
|
414
|
-
tool_choice: azureBody.tool_choice
|
|
415
|
-
}, "=== AZURE OPENAI REQUEST ===");
|
|
416
|
-
|
|
417
|
-
if (format === "deployments" || format === "models") {
|
|
418
|
-
return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
|
|
419
|
-
}
|
|
420
|
-
else if (format === "responses") {
|
|
421
|
-
azureBody.max_completion_tokens = azureBody.max_tokens;
|
|
422
|
-
delete azureBody.max_tokens;
|
|
423
|
-
delete azureBody.temperature;
|
|
424
|
-
delete azureBody.top_p;
|
|
425
|
-
return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
|
|
426
|
-
}
|
|
427
|
-
else {
|
|
428
|
-
throw new Error(`Unsupported Azure OpenAI endpoint format: ${format}`);
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
async function invokeOpenAI(body) {
|
|
433
|
-
if (!config.openai?.apiKey) {
|
|
434
|
-
throw new Error("OpenAI API key is not configured.");
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
const {
|
|
438
|
-
convertAnthropicToolsToOpenRouter,
|
|
439
|
-
convertAnthropicMessagesToOpenRouter
|
|
440
|
-
} = require("./openrouter-utils");
|
|
441
|
-
|
|
442
|
-
const endpoint = config.openai.endpoint || "https://api.openai.com/v1/chat/completions";
|
|
443
|
-
const headers = {
|
|
444
|
-
"Authorization": `Bearer ${config.openai.apiKey}`,
|
|
445
|
-
"Content-Type": "application/json",
|
|
446
|
-
};
|
|
447
|
-
|
|
448
|
-
// Add organization header if configured
|
|
449
|
-
if (config.openai.organization) {
|
|
450
|
-
headers["OpenAI-Organization"] = config.openai.organization;
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
// Convert messages and handle system message
|
|
454
|
-
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
455
|
-
|
|
456
|
-
// Anthropic uses separate 'system' field, OpenAI needs it as first message
|
|
457
|
-
if (body.system) {
|
|
458
|
-
messages.unshift({
|
|
459
|
-
role: "system",
|
|
460
|
-
content: body.system
|
|
461
|
-
});
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
const openAIBody = {
|
|
465
|
-
model: config.openai.model || "gpt-4o",
|
|
466
|
-
messages,
|
|
467
|
-
temperature: body.temperature ?? 0.7,
|
|
468
|
-
max_tokens: body.max_tokens ?? 4096,
|
|
469
|
-
top_p: body.top_p ?? 1.0,
|
|
470
|
-
stream: body.stream ?? false
|
|
471
|
-
};
|
|
472
|
-
|
|
473
|
-
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
474
|
-
let toolsToSend = body.tools;
|
|
475
|
-
let toolsInjected = false;
|
|
476
|
-
|
|
477
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
478
|
-
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
|
|
479
|
-
toolsToSend = STANDARD_TOOLS;
|
|
480
|
-
toolsInjected = true;
|
|
481
|
-
logger.info({
|
|
482
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
483
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
484
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
485
|
-
}, "=== INJECTING STANDARD TOOLS (OpenAI) ===");
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
489
|
-
openAIBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
490
|
-
openAIBody.parallel_tool_calls = true; // Enable parallel tool calling
|
|
491
|
-
openAIBody.tool_choice = "auto"; // Let the model decide when to use tools
|
|
492
|
-
logger.info({
|
|
493
|
-
toolCount: toolsToSend.length,
|
|
494
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
495
|
-
toolsInjected
|
|
496
|
-
}, "=== SENDING TOOLS TO OPENAI ===");
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
logger.info({
|
|
500
|
-
endpoint,
|
|
501
|
-
model: openAIBody.model,
|
|
502
|
-
hasTools: !!openAIBody.tools,
|
|
503
|
-
toolCount: openAIBody.tools?.length || 0,
|
|
504
|
-
temperature: openAIBody.temperature,
|
|
505
|
-
max_tokens: openAIBody.max_tokens,
|
|
506
|
-
}, "=== OPENAI REQUEST ===");
|
|
507
|
-
|
|
508
|
-
return performJsonRequest(endpoint, { headers, body: openAIBody }, "OpenAI");
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
async function invokeLlamaCpp(body) {
|
|
512
|
-
if (!config.llamacpp?.endpoint) {
|
|
513
|
-
throw new Error("llama.cpp endpoint is not configured.");
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
const {
|
|
517
|
-
convertAnthropicToolsToOpenRouter,
|
|
518
|
-
convertAnthropicMessagesToOpenRouter
|
|
519
|
-
} = require("./openrouter-utils");
|
|
520
|
-
|
|
521
|
-
const endpoint = `${config.llamacpp.endpoint}/v1/chat/completions`;
|
|
522
|
-
const headers = {
|
|
523
|
-
"Content-Type": "application/json",
|
|
524
|
-
};
|
|
525
|
-
|
|
526
|
-
// Add API key if configured (for secured llama.cpp servers)
|
|
527
|
-
if (config.llamacpp.apiKey) {
|
|
528
|
-
headers["Authorization"] = `Bearer ${config.llamacpp.apiKey}`;
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
// Convert messages to OpenAI format
|
|
532
|
-
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
533
|
-
|
|
534
|
-
// Handle system message
|
|
535
|
-
if (body.system) {
|
|
536
|
-
messages.unshift({ role: "system", content: body.system });
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
const llamacppBody = {
|
|
540
|
-
messages,
|
|
541
|
-
temperature: body.temperature ?? 0.7,
|
|
542
|
-
max_tokens: body.max_tokens ?? 4096,
|
|
543
|
-
top_p: body.top_p ?? 1.0,
|
|
544
|
-
stream: body.stream ?? false
|
|
545
|
-
};
|
|
546
|
-
|
|
547
|
-
// Inject standard tools if client didn't send any
|
|
548
|
-
let toolsToSend = body.tools;
|
|
549
|
-
let toolsInjected = false;
|
|
550
|
-
|
|
551
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
552
|
-
toolsToSend = STANDARD_TOOLS;
|
|
553
|
-
toolsInjected = true;
|
|
554
|
-
logger.info({
|
|
555
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
556
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
557
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
558
|
-
}, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
562
|
-
llamacppBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
563
|
-
llamacppBody.tool_choice = "auto";
|
|
564
|
-
logger.info({
|
|
565
|
-
toolCount: toolsToSend.length,
|
|
566
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
567
|
-
toolsInjected
|
|
568
|
-
}, "=== SENDING TOOLS TO LLAMA.CPP ===");
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
logger.info({
|
|
572
|
-
endpoint,
|
|
573
|
-
hasTools: !!llamacppBody.tools,
|
|
574
|
-
toolCount: llamacppBody.tools?.length || 0,
|
|
575
|
-
temperature: llamacppBody.temperature,
|
|
576
|
-
max_tokens: llamacppBody.max_tokens,
|
|
577
|
-
}, "=== LLAMA.CPP REQUEST ===");
|
|
578
|
-
|
|
579
|
-
return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp");
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
async function invokeLMStudio(body) {
|
|
583
|
-
if (!config.lmstudio?.endpoint) {
|
|
584
|
-
throw new Error("LM Studio endpoint is not configured.");
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
const {
|
|
588
|
-
convertAnthropicToolsToOpenRouter,
|
|
589
|
-
convertAnthropicMessagesToOpenRouter
|
|
590
|
-
} = require("./openrouter-utils");
|
|
591
|
-
|
|
592
|
-
const endpoint = `${config.lmstudio.endpoint}/v1/chat/completions`;
|
|
593
|
-
const headers = {
|
|
594
|
-
"Content-Type": "application/json",
|
|
595
|
-
};
|
|
596
|
-
|
|
597
|
-
// Add API key if configured (for secured LM Studio servers)
|
|
598
|
-
if (config.lmstudio.apiKey) {
|
|
599
|
-
headers["Authorization"] = `Bearer ${config.lmstudio.apiKey}`;
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
// Convert messages to OpenAI format
|
|
603
|
-
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
|
|
604
|
-
|
|
605
|
-
// Handle system message
|
|
606
|
-
if (body.system) {
|
|
607
|
-
messages.unshift({ role: "system", content: body.system });
|
|
608
|
-
}
|
|
609
|
-
|
|
610
|
-
const lmstudioBody = {
|
|
611
|
-
messages,
|
|
612
|
-
temperature: body.temperature ?? 0.7,
|
|
613
|
-
max_tokens: body.max_tokens ?? 4096,
|
|
614
|
-
top_p: body.top_p ?? 1.0,
|
|
615
|
-
stream: body.stream ?? false
|
|
616
|
-
};
|
|
617
|
-
|
|
618
|
-
// Inject standard tools if client didn't send any
|
|
619
|
-
let toolsToSend = body.tools;
|
|
620
|
-
let toolsInjected = false;
|
|
621
|
-
|
|
622
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
623
|
-
toolsToSend = STANDARD_TOOLS;
|
|
624
|
-
toolsInjected = true;
|
|
625
|
-
logger.info({
|
|
626
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
627
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
628
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
629
|
-
}, "=== INJECTING STANDARD TOOLS (LM Studio) ===");
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
|
|
633
|
-
lmstudioBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
|
|
634
|
-
lmstudioBody.tool_choice = "auto";
|
|
635
|
-
logger.info({
|
|
636
|
-
toolCount: toolsToSend.length,
|
|
637
|
-
toolNames: toolsToSend.map(t => t.name),
|
|
638
|
-
toolsInjected
|
|
639
|
-
}, "=== SENDING TOOLS TO LM STUDIO ===");
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
logger.info({
|
|
643
|
-
endpoint,
|
|
644
|
-
hasTools: !!lmstudioBody.tools,
|
|
645
|
-
toolCount: lmstudioBody.tools?.length || 0,
|
|
646
|
-
temperature: lmstudioBody.temperature,
|
|
647
|
-
max_tokens: lmstudioBody.max_tokens,
|
|
648
|
-
}, "=== LM STUDIO REQUEST ===");
|
|
649
|
-
|
|
650
|
-
return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio");
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
async function invokeBedrock(body) {
|
|
654
|
-
|
|
655
|
-
// 1. Validate configuration
|
|
656
|
-
if (!config.bedrock?.accessKeyId || !config.bedrock?.secretAccessKey) {
|
|
657
|
-
throw new Error(
|
|
658
|
-
"AWS Bedrock requires AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. " +
|
|
659
|
-
"Set these environment variables before starting the proxy."
|
|
660
|
-
);
|
|
661
|
-
}
|
|
662
|
-
|
|
663
|
-
// 2. Initialize AWS SDK client
|
|
664
|
-
const client = new BedrockRuntimeClient({
|
|
665
|
-
region: config.bedrock.region,
|
|
666
|
-
credentials: {
|
|
667
|
-
accessKeyId: config.bedrock.accessKeyId,
|
|
668
|
-
secretAccessKey: config.bedrock.secretAccessKey,
|
|
669
|
-
},
|
|
670
|
-
});
|
|
671
|
-
|
|
672
|
-
// 3. Inject standard tools if needed
|
|
673
|
-
let toolsToSend = body.tools;
|
|
674
|
-
let toolsInjected = false;
|
|
675
|
-
|
|
676
|
-
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
|
|
677
|
-
toolsToSend = STANDARD_TOOLS;
|
|
678
|
-
toolsInjected = true;
|
|
679
|
-
logger.info({
|
|
680
|
-
injectedToolCount: STANDARD_TOOLS.length,
|
|
681
|
-
injectedToolNames: STANDARD_TOOLS.map(t => t.name),
|
|
682
|
-
reason: "Client did not send tools (passthrough mode)"
|
|
683
|
-
}, "=== INJECTING STANDARD TOOLS (Bedrock) ===");
|
|
684
|
-
}
|
|
685
|
-
|
|
686
|
-
const bedrockBody = { ...body, tools: toolsToSend };
|
|
687
|
-
|
|
688
|
-
// 4. Detect model family and convert format
|
|
689
|
-
const modelId = config.bedrock.modelId;
|
|
690
|
-
const modelFamily = detectModelFamily(modelId);
|
|
691
|
-
|
|
692
|
-
logger.info({
|
|
693
|
-
modelId,
|
|
694
|
-
modelFamily,
|
|
695
|
-
hasTools: !!bedrockBody.tools,
|
|
696
|
-
toolCount: bedrockBody.tools?.length || 0,
|
|
697
|
-
streaming: body.stream || false,
|
|
698
|
-
}, "=== BEDROCK REQUEST ===");
|
|
699
|
-
|
|
700
|
-
const requestBody = convertAnthropicToBedrockFormat(bedrockBody, modelFamily);
|
|
701
|
-
|
|
702
|
-
// 5. Handle streaming vs non-streaming
|
|
703
|
-
if (body.stream) {
|
|
704
|
-
// Streaming request
|
|
705
|
-
const command = new InvokeModelWithResponseStreamCommand({
|
|
706
|
-
modelId,
|
|
707
|
-
contentType: "application/json",
|
|
708
|
-
accept: "application/json",
|
|
709
|
-
body: JSON.stringify(requestBody),
|
|
710
|
-
});
|
|
711
|
-
try {
|
|
712
|
-
const response = await client.send(command);
|
|
713
|
-
|
|
714
|
-
// Convert Bedrock event stream to Anthropic SSE format
|
|
715
|
-
const { Readable } = require("stream");
|
|
716
|
-
const anthropicStream = new Readable({
|
|
717
|
-
async read() {
|
|
718
|
-
try {
|
|
719
|
-
for await (const event of response.body) {
|
|
720
|
-
if (event.chunk) {
|
|
721
|
-
// Decode the chunk bytes
|
|
722
|
-
const chunkJson = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
|
|
723
|
-
|
|
724
|
-
// Convert to Anthropic SSE format
|
|
725
|
-
const sseEvent = `event: message\ndata: ${JSON.stringify(chunkJson)}\n\n`;
|
|
726
|
-
this.push(sseEvent);
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
// Send completion event
|
|
730
|
-
this.push(`event: message_stop\ndata: {}\n\n`);
|
|
731
|
-
this.push(null); // End stream
|
|
732
|
-
} catch (err) {
|
|
733
|
-
logger.error({ err }, "Error converting Bedrock stream");
|
|
734
|
-
this.destroy(err);
|
|
735
|
-
}
|
|
736
|
-
}
|
|
737
|
-
});
|
|
738
|
-
|
|
739
|
-
logger.info({
|
|
740
|
-
modelId,
|
|
741
|
-
modelFamily,
|
|
742
|
-
streaming: true
|
|
743
|
-
}, "=== BEDROCK STREAMING RESPONSE ===");
|
|
744
|
-
|
|
745
|
-
return {
|
|
746
|
-
stream: anthropicStream, // Converted stream in Anthropic SSE format
|
|
747
|
-
actualProvider: "bedrock",
|
|
748
|
-
modelFamily,
|
|
749
|
-
contentType: "text/event-stream",
|
|
750
|
-
status: 200,
|
|
751
|
-
};
|
|
752
|
-
} catch (e) {
|
|
753
|
-
logger.error({
|
|
754
|
-
error: e.message,
|
|
755
|
-
code: e.name,
|
|
756
|
-
statusCode: e.$metadata?.httpStatusCode,
|
|
757
|
-
requestId: e.$metadata?.requestId,
|
|
758
|
-
modelId,
|
|
759
|
-
region: config.bedrock.region,
|
|
760
|
-
fullError: e
|
|
761
|
-
}, "=== BEDROCK STREAMING ERROR ===");
|
|
762
|
-
throw e;
|
|
763
|
-
}
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
} else {
|
|
768
|
-
// Non-streaming request
|
|
769
|
-
const command = new InvokeModelCommand({
|
|
770
|
-
modelId,
|
|
771
|
-
contentType: "application/json",
|
|
772
|
-
accept: "application/json",
|
|
773
|
-
body: JSON.stringify(requestBody),
|
|
774
|
-
});
|
|
775
|
-
try {
|
|
776
|
-
const response = await client.send(command);
|
|
777
|
-
|
|
778
|
-
// Parse response body
|
|
779
|
-
const responseBody = JSON.parse(new TextDecoder().decode(response.body));
|
|
780
|
-
|
|
781
|
-
// Convert to Anthropic format
|
|
782
|
-
const anthropicResponse = convertBedrockResponseToAnthropic(
|
|
783
|
-
responseBody,
|
|
784
|
-
modelFamily,
|
|
785
|
-
modelId
|
|
786
|
-
);
|
|
787
|
-
|
|
788
|
-
logger.info({
|
|
789
|
-
modelId,
|
|
790
|
-
modelFamily,
|
|
791
|
-
stopReason: anthropicResponse.stop_reason,
|
|
792
|
-
inputTokens: anthropicResponse.usage?.input_tokens || 0,
|
|
793
|
-
outputTokens: anthropicResponse.usage?.output_tokens || 0,
|
|
794
|
-
}, "=== BEDROCK RESPONSE ===");
|
|
795
|
-
|
|
796
|
-
return {
|
|
797
|
-
ok: true,
|
|
798
|
-
status: 200,
|
|
799
|
-
json: anthropicResponse,
|
|
800
|
-
actualProvider: "bedrock",
|
|
801
|
-
modelFamily,
|
|
802
|
-
};
|
|
803
|
-
}
|
|
804
|
-
catch (e) {
|
|
805
|
-
logger.error({
|
|
806
|
-
error: e.message,
|
|
807
|
-
code: e.name,
|
|
808
|
-
statusCode: e.$metadata?.httpStatusCode,
|
|
809
|
-
requestId: e.$metadata?.requestId,
|
|
810
|
-
modelId,
|
|
811
|
-
region: config.bedrock.region,
|
|
812
|
-
fullError: e
|
|
813
|
-
}, "=== BEDROCK NON-STREAMING ERROR ===");
|
|
814
|
-
throw e;
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
}
|
|
819
|
-
}
|
|
820
|
-
|
|
821
|
-
async function invokeModel(body, options = {}) {
|
|
822
|
-
const { determineProvider, isFallbackEnabled, getFallbackProvider } = require("./routing");
|
|
823
|
-
const metricsCollector = getMetricsCollector();
|
|
824
|
-
const registry = getCircuitBreakerRegistry();
|
|
825
|
-
|
|
826
|
-
// Determine provider based on routing logic
|
|
827
|
-
const initialProvider = options.forceProvider ?? determineProvider(body);
|
|
828
|
-
const preferOllama = config.modelProvider?.preferOllama ?? false;
|
|
829
|
-
|
|
830
|
-
logger.debug({
|
|
831
|
-
initialProvider,
|
|
832
|
-
preferOllama,
|
|
833
|
-
fallbackEnabled: isFallbackEnabled(),
|
|
834
|
-
toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
|
|
835
|
-
}, "Provider routing decision");
|
|
836
|
-
|
|
837
|
-
metricsCollector.recordProviderRouting(initialProvider);
|
|
838
|
-
|
|
839
|
-
// Get circuit breaker for initial provider
|
|
840
|
-
const breaker = registry.get(initialProvider, {
|
|
841
|
-
failureThreshold: 5,
|
|
842
|
-
successThreshold: 2,
|
|
843
|
-
timeout: 60000,
|
|
844
|
-
});
|
|
845
|
-
|
|
846
|
-
let retries = 0;
|
|
847
|
-
const startTime = Date.now();
|
|
848
|
-
|
|
849
|
-
try {
|
|
850
|
-
// Try initial provider with circuit breaker
|
|
851
|
-
const result = await breaker.execute(async () => {
|
|
852
|
-
if (initialProvider === "azure-openai") {
|
|
853
|
-
return await invokeAzureOpenAI(body);
|
|
854
|
-
} else if (initialProvider === "azure-anthropic") {
|
|
855
|
-
return await invokeAzureAnthropic(body);
|
|
856
|
-
} else if (initialProvider === "ollama") {
|
|
857
|
-
return await invokeOllama(body);
|
|
858
|
-
} else if (initialProvider === "openrouter") {
|
|
859
|
-
return await invokeOpenRouter(body);
|
|
860
|
-
} else if (initialProvider === "openai") {
|
|
861
|
-
return await invokeOpenAI(body);
|
|
862
|
-
} else if (initialProvider === "llamacpp") {
|
|
863
|
-
return await invokeLlamaCpp(body);
|
|
864
|
-
} else if (initialProvider === "lmstudio") {
|
|
865
|
-
return await invokeLMStudio(body);
|
|
866
|
-
} else if (initialProvider === "bedrock") {
|
|
867
|
-
return await invokeBedrock(body);
|
|
868
|
-
}
|
|
869
|
-
return await invokeDatabricks(body);
|
|
870
|
-
});
|
|
871
|
-
|
|
872
|
-
// Record success metrics
|
|
873
|
-
const latency = Date.now() - startTime;
|
|
874
|
-
metricsCollector.recordProviderSuccess(initialProvider, latency);
|
|
875
|
-
metricsCollector.recordDatabricksRequest(true, retries);
|
|
876
|
-
|
|
877
|
-
// Record tokens and cost savings
|
|
878
|
-
if (result.json?.usage) {
|
|
879
|
-
const inputTokens = result.json.usage.input_tokens || result.json.usage.prompt_tokens || 0;
|
|
880
|
-
const outputTokens = result.json.usage.output_tokens || result.json.usage.completion_tokens || 0;
|
|
881
|
-
metricsCollector.recordTokens(inputTokens, outputTokens);
|
|
882
|
-
|
|
883
|
-
// Estimate cost savings if Ollama was used
|
|
884
|
-
if (initialProvider === "ollama") {
|
|
885
|
-
const savings = estimateCostSavings(inputTokens, outputTokens);
|
|
886
|
-
metricsCollector.recordCostSavings(savings);
|
|
887
|
-
}
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
// Return result with provider info for proper response conversion
|
|
891
|
-
return {
|
|
892
|
-
...result,
|
|
893
|
-
actualProvider: initialProvider
|
|
894
|
-
};
|
|
895
|
-
|
|
896
|
-
} catch (err) {
|
|
897
|
-
// Record failure
|
|
898
|
-
metricsCollector.recordProviderFailure(initialProvider);
|
|
899
|
-
|
|
900
|
-
// Check if we should fallback
|
|
901
|
-
const shouldFallback =
|
|
902
|
-
preferOllama &&
|
|
903
|
-
initialProvider === "ollama" &&
|
|
904
|
-
isFallbackEnabled() &&
|
|
905
|
-
!options.disableFallback;
|
|
906
|
-
|
|
907
|
-
if (!shouldFallback) {
|
|
908
|
-
metricsCollector.recordDatabricksRequest(false, retries);
|
|
909
|
-
throw err;
|
|
910
|
-
}
|
|
911
|
-
|
|
912
|
-
// Determine failure reason
|
|
913
|
-
const reason = categorizeFailure(err);
|
|
914
|
-
const fallbackProvider = getFallbackProvider();
|
|
915
|
-
|
|
916
|
-
logger.info({
|
|
917
|
-
originalProvider: initialProvider,
|
|
918
|
-
fallbackProvider,
|
|
919
|
-
reason,
|
|
920
|
-
error: err.message,
|
|
921
|
-
}, "Ollama failed, attempting transparent fallback to cloud");
|
|
922
|
-
|
|
923
|
-
metricsCollector.recordFallbackAttempt(initialProvider, fallbackProvider, reason);
|
|
924
|
-
|
|
925
|
-
try {
|
|
926
|
-
// Get circuit breaker for fallback provider
|
|
927
|
-
const fallbackBreaker = registry.get(fallbackProvider, {
|
|
928
|
-
failureThreshold: 5,
|
|
929
|
-
successThreshold: 2,
|
|
930
|
-
timeout: 60000,
|
|
931
|
-
});
|
|
932
|
-
|
|
933
|
-
const fallbackStart = Date.now();
|
|
934
|
-
|
|
935
|
-
// Execute fallback
|
|
936
|
-
const fallbackResult = await fallbackBreaker.execute(async () => {
|
|
937
|
-
if (fallbackProvider === "azure-openai") {
|
|
938
|
-
return await invokeAzureOpenAI(body);
|
|
939
|
-
} else if (fallbackProvider === "azure-anthropic") {
|
|
940
|
-
return await invokeAzureAnthropic(body);
|
|
941
|
-
} else if (fallbackProvider === "openrouter") {
|
|
942
|
-
return await invokeOpenRouter(body);
|
|
943
|
-
} else if (fallbackProvider === "openai") {
|
|
944
|
-
return await invokeOpenAI(body);
|
|
945
|
-
} else if (fallbackProvider === "llamacpp") {
|
|
946
|
-
return await invokeLlamaCpp(body);
|
|
947
|
-
}
|
|
948
|
-
return await invokeDatabricks(body);
|
|
949
|
-
});
|
|
950
|
-
|
|
951
|
-
const fallbackLatency = Date.now() - fallbackStart;
|
|
952
|
-
|
|
953
|
-
// Record fallback success
|
|
954
|
-
metricsCollector.recordFallbackSuccess(fallbackLatency);
|
|
955
|
-
metricsCollector.recordDatabricksRequest(true, retries);
|
|
956
|
-
|
|
957
|
-
// Record token usage
|
|
958
|
-
if (fallbackResult.json?.usage) {
|
|
959
|
-
metricsCollector.recordTokens(
|
|
960
|
-
fallbackResult.json.usage.input_tokens || fallbackResult.json.usage.prompt_tokens || 0,
|
|
961
|
-
fallbackResult.json.usage.output_tokens || fallbackResult.json.usage.completion_tokens || 0
|
|
962
|
-
);
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
logger.info({
|
|
966
|
-
originalProvider: initialProvider,
|
|
967
|
-
fallbackProvider,
|
|
968
|
-
fallbackLatency,
|
|
969
|
-
totalLatency: Date.now() - startTime,
|
|
970
|
-
}, "Fallback to cloud provider succeeded");
|
|
971
|
-
|
|
972
|
-
// Return result with actual provider used (fallback provider)
|
|
973
|
-
return {
|
|
974
|
-
...fallbackResult,
|
|
975
|
-
actualProvider: fallbackProvider
|
|
976
|
-
};
|
|
977
|
-
|
|
978
|
-
} catch (fallbackErr) {
|
|
979
|
-
// Both providers failed
|
|
980
|
-
metricsCollector.recordFallbackFailure();
|
|
981
|
-
metricsCollector.recordDatabricksRequest(false, retries);
|
|
982
|
-
|
|
983
|
-
logger.error({
|
|
984
|
-
originalProvider: initialProvider,
|
|
985
|
-
fallbackProvider,
|
|
986
|
-
originalError: err.message,
|
|
987
|
-
fallbackError: fallbackErr.message,
|
|
988
|
-
}, "Both Ollama and fallback provider failed");
|
|
989
|
-
|
|
990
|
-
// Return fallback error (more actionable than Ollama error)
|
|
991
|
-
throw fallbackErr;
|
|
992
|
-
}
|
|
993
|
-
}
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
/**
|
|
997
|
-
* Categorize failure for metrics
|
|
998
|
-
*/
|
|
999
|
-
function categorizeFailure(error) {
|
|
1000
|
-
if (error.name === "CircuitBreakerError" || error.code === "circuit_breaker_open") {
|
|
1001
|
-
return "circuit_breaker";
|
|
1002
|
-
}
|
|
1003
|
-
if (error.name === "AbortError" || error.code === "ETIMEDOUT") {
|
|
1004
|
-
return "timeout";
|
|
1005
|
-
}
|
|
1006
|
-
if (error.message?.includes("not configured") ||
|
|
1007
|
-
error.message?.includes("not available") ||
|
|
1008
|
-
error.code === "ECONNREFUSED") {
|
|
1009
|
-
return "service_unavailable";
|
|
1010
|
-
}
|
|
1011
|
-
if (error.message?.includes("tool") || error.message?.includes("function")) {
|
|
1012
|
-
return "tool_incompatible";
|
|
1013
|
-
}
|
|
1014
|
-
if (error.status === 429 || error.code === "RATE_LIMITED") {
|
|
1015
|
-
return "rate_limited";
|
|
1016
|
-
}
|
|
1017
|
-
return "error";
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
/**
|
|
1021
|
-
* Estimate cost savings from using Ollama
|
|
1022
|
-
*/
|
|
1023
|
-
function estimateCostSavings(inputTokens, outputTokens) {
|
|
1024
|
-
// Anthropic Claude Sonnet 4.5 pricing
|
|
1025
|
-
const INPUT_COST_PER_1M = 3.00; // $3 per 1M input tokens
|
|
1026
|
-
const OUTPUT_COST_PER_1M = 15.00; // $15 per 1M output tokens
|
|
1027
|
-
|
|
1028
|
-
const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_1M;
|
|
1029
|
-
const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_1M;
|
|
1030
|
-
|
|
1031
|
-
return inputCost + outputCost;
|
|
1032
|
-
}
|
|
1033
|
-
|
|
1034
|
-
module.exports = {
|
|
1035
|
-
invokeModel,
|
|
1036
|
-
};
|