lynkr 7.2.0 → 7.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1036 +0,0 @@
1
- const config = require("../config");
2
- const http = require("http");
3
- const https = require("https");
4
- const { withRetry } = require("./retry");
5
- const { getCircuitBreakerRegistry } = require("./circuit-breaker");
6
- const { getMetricsCollector } = require("../observability/metrics");
7
- const logger = require("../logger");
8
- const { STANDARD_TOOLS } = require("./standard-tools");
9
- const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
10
- const {
11
- detectModelFamily,
12
- convertAnthropicToBedrockFormat,
13
- convertBedrockResponseToAnthropic
14
- } = require("./bedrock-utils");
15
-
16
-
17
-
18
-
19
- if (typeof fetch !== "function") {
20
- throw new Error("Node 18+ is required for the built-in fetch API.");
21
- }
22
-
23
-
24
-
25
- // HTTP connection pooling for better performance
26
- const httpAgent = new http.Agent({
27
- keepAlive: true,
28
- maxSockets: 50,
29
- maxFreeSockets: 10,
30
- timeout: 60000,
31
- keepAliveMsecs: 30000,
32
- });
33
-
34
- const httpsAgent = new https.Agent({
35
- keepAlive: true,
36
- maxSockets: 50,
37
- maxFreeSockets: 10,
38
- timeout: 60000,
39
- keepAliveMsecs: 30000,
40
- });
41
-
42
- async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
43
- const agent = url.startsWith('https:') ? httpsAgent : httpAgent;
44
- const isStreaming = body.stream === true;
45
-
46
- // Streaming requests can't be retried, so handle them directly
47
- if (isStreaming) {
48
- const response = await fetch(url, {
49
- method: "POST",
50
- headers,
51
- body: JSON.stringify(body),
52
- agent,
53
- });
54
-
55
- logger.debug({
56
- provider: providerLabel,
57
- status: response.status,
58
- streaming: true,
59
- }, `${providerLabel} API streaming response`);
60
-
61
- if (!response.ok) {
62
- const errorText = await response.text();
63
- logger.warn({
64
- provider: providerLabel,
65
- status: response.status,
66
- error: errorText.substring(0, 200),
67
- }, `${providerLabel} API streaming error`);
68
- }
69
-
70
- return {
71
- ok: response.ok,
72
- status: response.status,
73
- stream: response.body, // Return the readable stream
74
- contentType: response.headers.get("content-type"),
75
- headers: response.headers,
76
- };
77
- }
78
-
79
- // Non-streaming requests use retry logic
80
- return withRetry(async () => {
81
- const response = await fetch(url, {
82
- method: "POST",
83
- headers,
84
- body: JSON.stringify(body),
85
- agent,
86
- });
87
- const text = await response.text();
88
-
89
- logger.debug({
90
- provider: providerLabel,
91
- status: response.status,
92
- responseLength: text.length,
93
- }, `${providerLabel} API response`);
94
-
95
- let json;
96
- try {
97
- json = JSON.parse(text);
98
- } catch {
99
- json = null;
100
- }
101
-
102
- const result = {
103
- ok: response.ok,
104
- status: response.status,
105
- json,
106
- text,
107
- contentType: response.headers.get("content-type"),
108
- headers: response.headers,
109
- };
110
-
111
- // Log errors for retry logic
112
- if (!response.ok) {
113
- logger.warn({
114
- provider: providerLabel,
115
- status: response.status,
116
- error: json?.error || text.substring(0, 200),
117
- }, `${providerLabel} API error`);
118
- }
119
-
120
- return result;
121
- }, {
122
- maxRetries: config.apiRetry?.maxRetries || 3,
123
- initialDelay: config.apiRetry?.initialDelay || 1000,
124
- maxDelay: config.apiRetry?.maxDelay || 30000,
125
- });
126
- }
127
-
128
- async function invokeDatabricks(body) {
129
- if (!config.databricks?.url) {
130
- throw new Error("Databricks configuration is missing required URL.");
131
- }
132
-
133
- // Create a copy of body to avoid mutating the original
134
- const databricksBody = { ...body };
135
-
136
- // Inject standard tools if client didn't send any (passthrough mode)
137
- if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
138
- databricksBody.tools = STANDARD_TOOLS;
139
- logger.info({
140
- injectedToolCount: STANDARD_TOOLS.length,
141
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
142
- reason: "Client did not send tools (passthrough mode)"
143
- }, "=== INJECTING STANDARD TOOLS (Databricks) ===");
144
- }
145
-
146
- // Convert Anthropic format tools to OpenAI format (Databricks uses OpenAI format)
147
- if (Array.isArray(databricksBody.tools) && databricksBody.tools.length > 0) {
148
- // Check if tools are already in OpenAI format (have type: "function")
149
- const alreadyConverted = databricksBody.tools[0]?.type === "function";
150
-
151
- if (!alreadyConverted) {
152
- databricksBody.tools = convertAnthropicToolsToOpenRouter(databricksBody.tools);
153
- logger.debug({
154
- convertedToolCount: databricksBody.tools.length,
155
- convertedToolNames: databricksBody.tools.map(t => t.function?.name),
156
- }, "Converted tools to OpenAI format for Databricks");
157
- } else {
158
- logger.debug({
159
- toolCount: databricksBody.tools.length,
160
- toolNames: databricksBody.tools.map(t => t.function?.name),
161
- }, "Tools already in OpenAI format, skipping conversion");
162
- }
163
- }
164
-
165
- const headers = {
166
- Authorization: `Bearer ${config.databricks.apiKey}`,
167
- "Content-Type": "application/json",
168
- };
169
- return performJsonRequest(config.databricks.url, { headers, body: databricksBody }, "Databricks");
170
- }
171
-
172
- async function invokeAzureAnthropic(body) {
173
- if (!config.azureAnthropic?.endpoint) {
174
- throw new Error("Azure Anthropic endpoint is not configured.");
175
- }
176
-
177
- // Inject standard tools if client didn't send any (passthrough mode)
178
- if (!Array.isArray(body.tools) || body.tools.length === 0) {
179
- body.tools = STANDARD_TOOLS;
180
- logger.info({
181
- injectedToolCount: STANDARD_TOOLS.length,
182
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
183
- reason: "Client did not send tools (passthrough mode)"
184
- }, "=== INJECTING STANDARD TOOLS (Azure Anthropic) ===");
185
- }
186
-
187
- const headers = {
188
- "Content-Type": "application/json",
189
- "x-api-key": config.azureAnthropic.apiKey,
190
- "anthropic-version": config.azureAnthropic.version ?? "2023-06-01",
191
- };
192
- return performJsonRequest(
193
- config.azureAnthropic.endpoint,
194
- { headers, body },
195
- "Azure Anthropic",
196
- );
197
- }
198
-
199
- async function invokeOllama(body) {
200
- if (!config.ollama?.endpoint) {
201
- throw new Error("Ollama endpoint is not configured.");
202
- }
203
-
204
- const { convertAnthropicToolsToOllama } = require("./ollama-utils");
205
-
206
- const endpoint = `${config.ollama.endpoint}/api/chat`;
207
- const headers = { "Content-Type": "application/json" };
208
-
209
- // Convert Anthropic messages format to Ollama format
210
- // Ollama expects content as string, not content blocks array
211
- const convertedMessages = (body.messages || []).map(msg => {
212
- let content = msg.content;
213
-
214
- // Convert content blocks array to simple string
215
- if (Array.isArray(content)) {
216
- content = content
217
- .filter(block => block.type === 'text')
218
- .map(block => block.text || '')
219
- .join('\n');
220
- }
221
-
222
- return {
223
- role: msg.role,
224
- content: content || ''
225
- };
226
- });
227
-
228
- const ollamaBody = {
229
- model: config.ollama.model,
230
- messages: convertedMessages,
231
- stream: body.stream ?? false,
232
- options: {
233
- temperature: body.temperature ?? 0.7,
234
- num_predict: body.max_tokens ?? 4096,
235
- top_p: body.top_p ?? 1.0,
236
- },
237
- };
238
-
239
- // Inject standard tools if client didn't send any (passthrough mode)
240
- let toolsToSend = body.tools;
241
- let toolsInjected = false;
242
-
243
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
244
- toolsToSend = STANDARD_TOOLS;
245
- toolsInjected = true;
246
- logger.info({
247
- injectedToolCount: STANDARD_TOOLS.length,
248
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
249
- reason: "Client did not send tools (passthrough mode)"
250
- }, "=== INJECTING STANDARD TOOLS (Ollama) ===");
251
- }
252
-
253
- // Add tools if present (for tool-capable models)
254
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
255
- ollamaBody.tools = convertAnthropicToolsToOllama(toolsToSend);
256
- logger.info({
257
- toolCount: toolsToSend.length,
258
- toolNames: toolsToSend.map(t => t.name),
259
- toolsInjected
260
- }, "Sending tools to Ollama");
261
- }
262
-
263
- return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
264
- }
265
-
266
- async function invokeOpenRouter(body) {
267
- if (!config.openrouter?.endpoint || !config.openrouter?.apiKey) {
268
- throw new Error("OpenRouter endpoint or API key is not configured.");
269
- }
270
-
271
- const {
272
- convertAnthropicToolsToOpenRouter,
273
- convertAnthropicMessagesToOpenRouter
274
- } = require("./openrouter-utils");
275
-
276
- const endpoint = config.openrouter.endpoint;
277
- const headers = {
278
- "Authorization": `Bearer ${config.openrouter.apiKey}`,
279
- "Content-Type": "application/json",
280
- "HTTP-Referer": "https://localhost:8080",
281
- "X-Title": "Claude-Ollama-Proxy"
282
- };
283
-
284
- // Convert messages and handle system message
285
- const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
286
-
287
- // Anthropic uses separate 'system' field, OpenAI needs it as first message
288
- if (body.system) {
289
- messages.unshift({
290
- role: "system",
291
- content: body.system
292
- });
293
- }
294
-
295
- const openRouterBody = {
296
- model: config.openrouter.model,
297
- messages,
298
- temperature: body.temperature ?? 0.7,
299
- max_tokens: body.max_tokens ?? 4096,
300
- top_p: body.top_p ?? 1.0,
301
- stream: body.stream ?? false
302
- };
303
-
304
- // Add tools - inject standard tools if client didn't send any (passthrough mode)
305
- let toolsToSend = body.tools;
306
- let toolsInjected = false;
307
-
308
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
309
- // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
310
- toolsToSend = STANDARD_TOOLS;
311
- toolsInjected = true;
312
- logger.info({
313
- injectedToolCount: STANDARD_TOOLS.length,
314
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
315
- reason: "Client did not send tools (passthrough mode)"
316
- }, "=== INJECTING STANDARD TOOLS (OpenRouter) ===");
317
- }
318
-
319
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
320
- openRouterBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
321
- logger.info({
322
- toolCount: toolsToSend.length,
323
- toolNames: toolsToSend.map(t => t.name),
324
- toolsInjected
325
- }, "Sending tools to OpenRouter");
326
- }
327
-
328
- return performJsonRequest(endpoint, { headers, body: openRouterBody }, "OpenRouter");
329
- }
330
-
331
- function detectAzureFormat(url) {
332
- if (url.includes("/openai/responses")) return "responses";
333
- if (url.includes("/models/")) return "models";
334
- if (url.includes("/openai/deployments")) return "deployments";
335
- throw new Error("Unknown Azure OpenAI endpoint");
336
- }
337
-
338
-
339
- async function invokeAzureOpenAI(body) {
340
- if (!config.azureOpenAI?.endpoint || !config.azureOpenAI?.apiKey) {
341
- throw new Error("Azure OpenAI endpoint or API key is not configured.");
342
- }
343
-
344
- const {
345
- convertAnthropicToolsToOpenRouter,
346
- convertAnthropicMessagesToOpenRouter
347
- } = require("./openrouter-utils");
348
-
349
- // Azure OpenAI URL format
350
- const endpoint = config.azureOpenAI.endpoint;
351
- const format = detectAzureFormat(endpoint);
352
-
353
- const headers = {
354
- "api-key": config.azureOpenAI.apiKey, // Azure uses "api-key" not "Authorization"
355
- "Content-Type": "application/json"
356
- };
357
-
358
- // Convert messages and handle system message
359
- const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
360
-
361
- // Anthropic uses separate 'system' field, OpenAI needs it as first message
362
- if (body.system) {
363
- messages.unshift({
364
- role: "system",
365
- content: body.system
366
- });
367
- }
368
-
369
- const azureBody = {
370
- messages,
371
- temperature: body.temperature ?? 0.3, // Lower temperature for more deterministic, action-oriented behavior
372
- max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
373
- top_p: body.top_p ?? 1.0,
374
- stream: body.stream ?? false,
375
- model: config.azureOpenAI.deployment
376
- };
377
-
378
- // Add tools - inject standard tools if client didn't send any (passthrough mode)
379
- let toolsToSend = body.tools;
380
- let toolsInjected = false;
381
-
382
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
383
- // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
384
- toolsToSend = STANDARD_TOOLS;
385
- toolsInjected = true;
386
- logger.info({
387
- injectedToolCount: STANDARD_TOOLS.length,
388
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
389
- reason: "Client did not send tools (passthrough mode)"
390
- }, "=== INJECTING STANDARD TOOLS ===");
391
- }
392
-
393
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
394
- azureBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
395
- azureBody.parallel_tool_calls = true; // Enable parallel tool calling for better performance
396
- azureBody.tool_choice = "auto"; // Explicitly enable tool use (helps GPT models understand they should use tools)
397
- logger.info({
398
- toolCount: toolsToSend.length,
399
- toolNames: toolsToSend.map(t => t.name),
400
- toolsInjected,
401
- hasSystemMessage: !!body.system,
402
- messageCount: messages.length,
403
- temperature: azureBody.temperature,
404
- sampleTool: azureBody.tools[0] // Log first tool for inspection
405
- }, "=== SENDING TOOLS TO AZURE OPENAI ===");
406
- }
407
-
408
- logger.info({
409
- endpoint,
410
- hasTools: !!azureBody.tools,
411
- toolCount: azureBody.tools?.length || 0,
412
- temperature: azureBody.temperature,
413
- max_tokens: azureBody.max_tokens,
414
- tool_choice: azureBody.tool_choice
415
- }, "=== AZURE OPENAI REQUEST ===");
416
-
417
- if (format === "deployments" || format === "models") {
418
- return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
419
- }
420
- else if (format === "responses") {
421
- azureBody.max_completion_tokens = azureBody.max_tokens;
422
- delete azureBody.max_tokens;
423
- delete azureBody.temperature;
424
- delete azureBody.top_p;
425
- return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
426
- }
427
- else {
428
- throw new Error(`Unsupported Azure OpenAI endpoint format: ${format}`);
429
- }
430
- }
431
-
432
- async function invokeOpenAI(body) {
433
- if (!config.openai?.apiKey) {
434
- throw new Error("OpenAI API key is not configured.");
435
- }
436
-
437
- const {
438
- convertAnthropicToolsToOpenRouter,
439
- convertAnthropicMessagesToOpenRouter
440
- } = require("./openrouter-utils");
441
-
442
- const endpoint = config.openai.endpoint || "https://api.openai.com/v1/chat/completions";
443
- const headers = {
444
- "Authorization": `Bearer ${config.openai.apiKey}`,
445
- "Content-Type": "application/json",
446
- };
447
-
448
- // Add organization header if configured
449
- if (config.openai.organization) {
450
- headers["OpenAI-Organization"] = config.openai.organization;
451
- }
452
-
453
- // Convert messages and handle system message
454
- const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
455
-
456
- // Anthropic uses separate 'system' field, OpenAI needs it as first message
457
- if (body.system) {
458
- messages.unshift({
459
- role: "system",
460
- content: body.system
461
- });
462
- }
463
-
464
- const openAIBody = {
465
- model: config.openai.model || "gpt-4o",
466
- messages,
467
- temperature: body.temperature ?? 0.7,
468
- max_tokens: body.max_tokens ?? 4096,
469
- top_p: body.top_p ?? 1.0,
470
- stream: body.stream ?? false
471
- };
472
-
473
- // Add tools - inject standard tools if client didn't send any (passthrough mode)
474
- let toolsToSend = body.tools;
475
- let toolsInjected = false;
476
-
477
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
478
- // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
479
- toolsToSend = STANDARD_TOOLS;
480
- toolsInjected = true;
481
- logger.info({
482
- injectedToolCount: STANDARD_TOOLS.length,
483
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
484
- reason: "Client did not send tools (passthrough mode)"
485
- }, "=== INJECTING STANDARD TOOLS (OpenAI) ===");
486
- }
487
-
488
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
489
- openAIBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
490
- openAIBody.parallel_tool_calls = true; // Enable parallel tool calling
491
- openAIBody.tool_choice = "auto"; // Let the model decide when to use tools
492
- logger.info({
493
- toolCount: toolsToSend.length,
494
- toolNames: toolsToSend.map(t => t.name),
495
- toolsInjected
496
- }, "=== SENDING TOOLS TO OPENAI ===");
497
- }
498
-
499
- logger.info({
500
- endpoint,
501
- model: openAIBody.model,
502
- hasTools: !!openAIBody.tools,
503
- toolCount: openAIBody.tools?.length || 0,
504
- temperature: openAIBody.temperature,
505
- max_tokens: openAIBody.max_tokens,
506
- }, "=== OPENAI REQUEST ===");
507
-
508
- return performJsonRequest(endpoint, { headers, body: openAIBody }, "OpenAI");
509
- }
510
-
511
- async function invokeLlamaCpp(body) {
512
- if (!config.llamacpp?.endpoint) {
513
- throw new Error("llama.cpp endpoint is not configured.");
514
- }
515
-
516
- const {
517
- convertAnthropicToolsToOpenRouter,
518
- convertAnthropicMessagesToOpenRouter
519
- } = require("./openrouter-utils");
520
-
521
- const endpoint = `${config.llamacpp.endpoint}/v1/chat/completions`;
522
- const headers = {
523
- "Content-Type": "application/json",
524
- };
525
-
526
- // Add API key if configured (for secured llama.cpp servers)
527
- if (config.llamacpp.apiKey) {
528
- headers["Authorization"] = `Bearer ${config.llamacpp.apiKey}`;
529
- }
530
-
531
- // Convert messages to OpenAI format
532
- const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
533
-
534
- // Handle system message
535
- if (body.system) {
536
- messages.unshift({ role: "system", content: body.system });
537
- }
538
-
539
- const llamacppBody = {
540
- messages,
541
- temperature: body.temperature ?? 0.7,
542
- max_tokens: body.max_tokens ?? 4096,
543
- top_p: body.top_p ?? 1.0,
544
- stream: body.stream ?? false
545
- };
546
-
547
- // Inject standard tools if client didn't send any
548
- let toolsToSend = body.tools;
549
- let toolsInjected = false;
550
-
551
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
552
- toolsToSend = STANDARD_TOOLS;
553
- toolsInjected = true;
554
- logger.info({
555
- injectedToolCount: STANDARD_TOOLS.length,
556
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
557
- reason: "Client did not send tools (passthrough mode)"
558
- }, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
559
- }
560
-
561
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
562
- llamacppBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
563
- llamacppBody.tool_choice = "auto";
564
- logger.info({
565
- toolCount: toolsToSend.length,
566
- toolNames: toolsToSend.map(t => t.name),
567
- toolsInjected
568
- }, "=== SENDING TOOLS TO LLAMA.CPP ===");
569
- }
570
-
571
- logger.info({
572
- endpoint,
573
- hasTools: !!llamacppBody.tools,
574
- toolCount: llamacppBody.tools?.length || 0,
575
- temperature: llamacppBody.temperature,
576
- max_tokens: llamacppBody.max_tokens,
577
- }, "=== LLAMA.CPP REQUEST ===");
578
-
579
- return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp");
580
- }
581
-
582
- async function invokeLMStudio(body) {
583
- if (!config.lmstudio?.endpoint) {
584
- throw new Error("LM Studio endpoint is not configured.");
585
- }
586
-
587
- const {
588
- convertAnthropicToolsToOpenRouter,
589
- convertAnthropicMessagesToOpenRouter
590
- } = require("./openrouter-utils");
591
-
592
- const endpoint = `${config.lmstudio.endpoint}/v1/chat/completions`;
593
- const headers = {
594
- "Content-Type": "application/json",
595
- };
596
-
597
- // Add API key if configured (for secured LM Studio servers)
598
- if (config.lmstudio.apiKey) {
599
- headers["Authorization"] = `Bearer ${config.lmstudio.apiKey}`;
600
- }
601
-
602
- // Convert messages to OpenAI format
603
- const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
604
-
605
- // Handle system message
606
- if (body.system) {
607
- messages.unshift({ role: "system", content: body.system });
608
- }
609
-
610
- const lmstudioBody = {
611
- messages,
612
- temperature: body.temperature ?? 0.7,
613
- max_tokens: body.max_tokens ?? 4096,
614
- top_p: body.top_p ?? 1.0,
615
- stream: body.stream ?? false
616
- };
617
-
618
- // Inject standard tools if client didn't send any
619
- let toolsToSend = body.tools;
620
- let toolsInjected = false;
621
-
622
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
623
- toolsToSend = STANDARD_TOOLS;
624
- toolsInjected = true;
625
- logger.info({
626
- injectedToolCount: STANDARD_TOOLS.length,
627
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
628
- reason: "Client did not send tools (passthrough mode)"
629
- }, "=== INJECTING STANDARD TOOLS (LM Studio) ===");
630
- }
631
-
632
- if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
633
- lmstudioBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
634
- lmstudioBody.tool_choice = "auto";
635
- logger.info({
636
- toolCount: toolsToSend.length,
637
- toolNames: toolsToSend.map(t => t.name),
638
- toolsInjected
639
- }, "=== SENDING TOOLS TO LM STUDIO ===");
640
- }
641
-
642
- logger.info({
643
- endpoint,
644
- hasTools: !!lmstudioBody.tools,
645
- toolCount: lmstudioBody.tools?.length || 0,
646
- temperature: lmstudioBody.temperature,
647
- max_tokens: lmstudioBody.max_tokens,
648
- }, "=== LM STUDIO REQUEST ===");
649
-
650
- return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio");
651
- }
652
-
653
- async function invokeBedrock(body) {
654
-
655
- // 1. Validate configuration
656
- if (!config.bedrock?.accessKeyId || !config.bedrock?.secretAccessKey) {
657
- throw new Error(
658
- "AWS Bedrock requires AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY. " +
659
- "Set these environment variables before starting the proxy."
660
- );
661
- }
662
-
663
- // 2. Initialize AWS SDK client
664
- const client = new BedrockRuntimeClient({
665
- region: config.bedrock.region,
666
- credentials: {
667
- accessKeyId: config.bedrock.accessKeyId,
668
- secretAccessKey: config.bedrock.secretAccessKey,
669
- },
670
- });
671
-
672
- // 3. Inject standard tools if needed
673
- let toolsToSend = body.tools;
674
- let toolsInjected = false;
675
-
676
- if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
677
- toolsToSend = STANDARD_TOOLS;
678
- toolsInjected = true;
679
- logger.info({
680
- injectedToolCount: STANDARD_TOOLS.length,
681
- injectedToolNames: STANDARD_TOOLS.map(t => t.name),
682
- reason: "Client did not send tools (passthrough mode)"
683
- }, "=== INJECTING STANDARD TOOLS (Bedrock) ===");
684
- }
685
-
686
- const bedrockBody = { ...body, tools: toolsToSend };
687
-
688
- // 4. Detect model family and convert format
689
- const modelId = config.bedrock.modelId;
690
- const modelFamily = detectModelFamily(modelId);
691
-
692
- logger.info({
693
- modelId,
694
- modelFamily,
695
- hasTools: !!bedrockBody.tools,
696
- toolCount: bedrockBody.tools?.length || 0,
697
- streaming: body.stream || false,
698
- }, "=== BEDROCK REQUEST ===");
699
-
700
- const requestBody = convertAnthropicToBedrockFormat(bedrockBody, modelFamily);
701
-
702
- // 5. Handle streaming vs non-streaming
703
- if (body.stream) {
704
- // Streaming request
705
- const command = new InvokeModelWithResponseStreamCommand({
706
- modelId,
707
- contentType: "application/json",
708
- accept: "application/json",
709
- body: JSON.stringify(requestBody),
710
- });
711
- try {
712
- const response = await client.send(command);
713
-
714
- // Convert Bedrock event stream to Anthropic SSE format
715
- const { Readable } = require("stream");
716
- const anthropicStream = new Readable({
717
- async read() {
718
- try {
719
- for await (const event of response.body) {
720
- if (event.chunk) {
721
- // Decode the chunk bytes
722
- const chunkJson = JSON.parse(new TextDecoder().decode(event.chunk.bytes));
723
-
724
- // Convert to Anthropic SSE format
725
- const sseEvent = `event: message\ndata: ${JSON.stringify(chunkJson)}\n\n`;
726
- this.push(sseEvent);
727
- }
728
- }
729
- // Send completion event
730
- this.push(`event: message_stop\ndata: {}\n\n`);
731
- this.push(null); // End stream
732
- } catch (err) {
733
- logger.error({ err }, "Error converting Bedrock stream");
734
- this.destroy(err);
735
- }
736
- }
737
- });
738
-
739
- logger.info({
740
- modelId,
741
- modelFamily,
742
- streaming: true
743
- }, "=== BEDROCK STREAMING RESPONSE ===");
744
-
745
- return {
746
- stream: anthropicStream, // Converted stream in Anthropic SSE format
747
- actualProvider: "bedrock",
748
- modelFamily,
749
- contentType: "text/event-stream",
750
- status: 200,
751
- };
752
- } catch (e) {
753
- logger.error({
754
- error: e.message,
755
- code: e.name,
756
- statusCode: e.$metadata?.httpStatusCode,
757
- requestId: e.$metadata?.requestId,
758
- modelId,
759
- region: config.bedrock.region,
760
- fullError: e
761
- }, "=== BEDROCK STREAMING ERROR ===");
762
- throw e;
763
- }
764
-
765
-
766
-
767
- } else {
768
- // Non-streaming request
769
- const command = new InvokeModelCommand({
770
- modelId,
771
- contentType: "application/json",
772
- accept: "application/json",
773
- body: JSON.stringify(requestBody),
774
- });
775
- try {
776
- const response = await client.send(command);
777
-
778
- // Parse response body
779
- const responseBody = JSON.parse(new TextDecoder().decode(response.body));
780
-
781
- // Convert to Anthropic format
782
- const anthropicResponse = convertBedrockResponseToAnthropic(
783
- responseBody,
784
- modelFamily,
785
- modelId
786
- );
787
-
788
- logger.info({
789
- modelId,
790
- modelFamily,
791
- stopReason: anthropicResponse.stop_reason,
792
- inputTokens: anthropicResponse.usage?.input_tokens || 0,
793
- outputTokens: anthropicResponse.usage?.output_tokens || 0,
794
- }, "=== BEDROCK RESPONSE ===");
795
-
796
- return {
797
- ok: true,
798
- status: 200,
799
- json: anthropicResponse,
800
- actualProvider: "bedrock",
801
- modelFamily,
802
- };
803
- }
804
- catch (e) {
805
- logger.error({
806
- error: e.message,
807
- code: e.name,
808
- statusCode: e.$metadata?.httpStatusCode,
809
- requestId: e.$metadata?.requestId,
810
- modelId,
811
- region: config.bedrock.region,
812
- fullError: e
813
- }, "=== BEDROCK NON-STREAMING ERROR ===");
814
- throw e;
815
- }
816
-
817
-
818
- }
819
- }
820
-
821
- async function invokeModel(body, options = {}) {
822
- const { determineProvider, isFallbackEnabled, getFallbackProvider } = require("./routing");
823
- const metricsCollector = getMetricsCollector();
824
- const registry = getCircuitBreakerRegistry();
825
-
826
- // Determine provider based on routing logic
827
- const initialProvider = options.forceProvider ?? determineProvider(body);
828
- const preferOllama = config.modelProvider?.preferOllama ?? false;
829
-
830
- logger.debug({
831
- initialProvider,
832
- preferOllama,
833
- fallbackEnabled: isFallbackEnabled(),
834
- toolCount: Array.isArray(body?.tools) ? body.tools.length : 0,
835
- }, "Provider routing decision");
836
-
837
- metricsCollector.recordProviderRouting(initialProvider);
838
-
839
- // Get circuit breaker for initial provider
840
- const breaker = registry.get(initialProvider, {
841
- failureThreshold: 5,
842
- successThreshold: 2,
843
- timeout: 60000,
844
- });
845
-
846
- let retries = 0;
847
- const startTime = Date.now();
848
-
849
- try {
850
- // Try initial provider with circuit breaker
851
- const result = await breaker.execute(async () => {
852
- if (initialProvider === "azure-openai") {
853
- return await invokeAzureOpenAI(body);
854
- } else if (initialProvider === "azure-anthropic") {
855
- return await invokeAzureAnthropic(body);
856
- } else if (initialProvider === "ollama") {
857
- return await invokeOllama(body);
858
- } else if (initialProvider === "openrouter") {
859
- return await invokeOpenRouter(body);
860
- } else if (initialProvider === "openai") {
861
- return await invokeOpenAI(body);
862
- } else if (initialProvider === "llamacpp") {
863
- return await invokeLlamaCpp(body);
864
- } else if (initialProvider === "lmstudio") {
865
- return await invokeLMStudio(body);
866
- } else if (initialProvider === "bedrock") {
867
- return await invokeBedrock(body);
868
- }
869
- return await invokeDatabricks(body);
870
- });
871
-
872
- // Record success metrics
873
- const latency = Date.now() - startTime;
874
- metricsCollector.recordProviderSuccess(initialProvider, latency);
875
- metricsCollector.recordDatabricksRequest(true, retries);
876
-
877
- // Record tokens and cost savings
878
- if (result.json?.usage) {
879
- const inputTokens = result.json.usage.input_tokens || result.json.usage.prompt_tokens || 0;
880
- const outputTokens = result.json.usage.output_tokens || result.json.usage.completion_tokens || 0;
881
- metricsCollector.recordTokens(inputTokens, outputTokens);
882
-
883
- // Estimate cost savings if Ollama was used
884
- if (initialProvider === "ollama") {
885
- const savings = estimateCostSavings(inputTokens, outputTokens);
886
- metricsCollector.recordCostSavings(savings);
887
- }
888
- }
889
-
890
- // Return result with provider info for proper response conversion
891
- return {
892
- ...result,
893
- actualProvider: initialProvider
894
- };
895
-
896
- } catch (err) {
897
- // Record failure
898
- metricsCollector.recordProviderFailure(initialProvider);
899
-
900
- // Check if we should fallback
901
- const shouldFallback =
902
- preferOllama &&
903
- initialProvider === "ollama" &&
904
- isFallbackEnabled() &&
905
- !options.disableFallback;
906
-
907
- if (!shouldFallback) {
908
- metricsCollector.recordDatabricksRequest(false, retries);
909
- throw err;
910
- }
911
-
912
- // Determine failure reason
913
- const reason = categorizeFailure(err);
914
- const fallbackProvider = getFallbackProvider();
915
-
916
- logger.info({
917
- originalProvider: initialProvider,
918
- fallbackProvider,
919
- reason,
920
- error: err.message,
921
- }, "Ollama failed, attempting transparent fallback to cloud");
922
-
923
- metricsCollector.recordFallbackAttempt(initialProvider, fallbackProvider, reason);
924
-
925
- try {
926
- // Get circuit breaker for fallback provider
927
- const fallbackBreaker = registry.get(fallbackProvider, {
928
- failureThreshold: 5,
929
- successThreshold: 2,
930
- timeout: 60000,
931
- });
932
-
933
- const fallbackStart = Date.now();
934
-
935
- // Execute fallback
936
- const fallbackResult = await fallbackBreaker.execute(async () => {
937
- if (fallbackProvider === "azure-openai") {
938
- return await invokeAzureOpenAI(body);
939
- } else if (fallbackProvider === "azure-anthropic") {
940
- return await invokeAzureAnthropic(body);
941
- } else if (fallbackProvider === "openrouter") {
942
- return await invokeOpenRouter(body);
943
- } else if (fallbackProvider === "openai") {
944
- return await invokeOpenAI(body);
945
- } else if (fallbackProvider === "llamacpp") {
946
- return await invokeLlamaCpp(body);
947
- }
948
- return await invokeDatabricks(body);
949
- });
950
-
951
- const fallbackLatency = Date.now() - fallbackStart;
952
-
953
- // Record fallback success
954
- metricsCollector.recordFallbackSuccess(fallbackLatency);
955
- metricsCollector.recordDatabricksRequest(true, retries);
956
-
957
- // Record token usage
958
- if (fallbackResult.json?.usage) {
959
- metricsCollector.recordTokens(
960
- fallbackResult.json.usage.input_tokens || fallbackResult.json.usage.prompt_tokens || 0,
961
- fallbackResult.json.usage.output_tokens || fallbackResult.json.usage.completion_tokens || 0
962
- );
963
- }
964
-
965
- logger.info({
966
- originalProvider: initialProvider,
967
- fallbackProvider,
968
- fallbackLatency,
969
- totalLatency: Date.now() - startTime,
970
- }, "Fallback to cloud provider succeeded");
971
-
972
- // Return result with actual provider used (fallback provider)
973
- return {
974
- ...fallbackResult,
975
- actualProvider: fallbackProvider
976
- };
977
-
978
- } catch (fallbackErr) {
979
- // Both providers failed
980
- metricsCollector.recordFallbackFailure();
981
- metricsCollector.recordDatabricksRequest(false, retries);
982
-
983
- logger.error({
984
- originalProvider: initialProvider,
985
- fallbackProvider,
986
- originalError: err.message,
987
- fallbackError: fallbackErr.message,
988
- }, "Both Ollama and fallback provider failed");
989
-
990
- // Return fallback error (more actionable than Ollama error)
991
- throw fallbackErr;
992
- }
993
- }
994
- }
995
-
996
- /**
997
- * Categorize failure for metrics
998
- */
999
- function categorizeFailure(error) {
1000
- if (error.name === "CircuitBreakerError" || error.code === "circuit_breaker_open") {
1001
- return "circuit_breaker";
1002
- }
1003
- if (error.name === "AbortError" || error.code === "ETIMEDOUT") {
1004
- return "timeout";
1005
- }
1006
- if (error.message?.includes("not configured") ||
1007
- error.message?.includes("not available") ||
1008
- error.code === "ECONNREFUSED") {
1009
- return "service_unavailable";
1010
- }
1011
- if (error.message?.includes("tool") || error.message?.includes("function")) {
1012
- return "tool_incompatible";
1013
- }
1014
- if (error.status === 429 || error.code === "RATE_LIMITED") {
1015
- return "rate_limited";
1016
- }
1017
- return "error";
1018
- }
1019
-
1020
- /**
1021
- * Estimate cost savings from using Ollama
1022
- */
1023
- function estimateCostSavings(inputTokens, outputTokens) {
1024
- // Anthropic Claude Sonnet 4.5 pricing
1025
- const INPUT_COST_PER_1M = 3.00; // $3 per 1M input tokens
1026
- const OUTPUT_COST_PER_1M = 15.00; // $15 per 1M output tokens
1027
-
1028
- const inputCost = (inputTokens / 1_000_000) * INPUT_COST_PER_1M;
1029
- const outputCost = (outputTokens / 1_000_000) * OUTPUT_COST_PER_1M;
1030
-
1031
- return inputCost + outputCost;
1032
- }
1033
-
1034
- module.exports = {
1035
- invokeModel,
1036
- };