loren-code 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/server.js ADDED
@@ -0,0 +1,1238 @@
1
+ import http from "node:http";
2
+ import { randomUUID } from "node:crypto";
3
+ import { URL } from "node:url";
4
+ import fs from "node:fs";
5
+ import path from "node:path";
6
+ import { fileURLToPath } from "node:url";
7
+ import { loadConfig } from "./config.js";
8
+ import { ensureEnvLocal, ensureRuntimeDir } from "./bootstrap.js";
9
+ import logger from "./logger.js";
10
+ import { KeyManager } from "./key-manager.js";
11
+ import { validateInput, MessageSchema, CountTokensSchema } from "./schemas.js";
12
+ import { modelCache, getFromCache, setInCache, getCacheStats } from "./cache.js";
13
+ import { getAgent } from "./http-agents.js";
14
+ import { getMetrics, incrementError, recordTokenUsage, metricsMiddleware } from "./metrics.js";
15
+ import { createConfigWatcher } from "./config-watcher.js";
16
+ import usageTracker from "./usage-tracker.js";
17
+
18
+ // Configurazione globale
19
+ const __filename = fileURLToPath(import.meta.url);
20
+ const __dirname = path.dirname(__filename);
21
+ const projectRoot = path.resolve(__dirname, "..");
22
+
23
+ ensureRuntimeDir(projectRoot);
24
+ ensureEnvLocal(projectRoot, { logger });
25
+
26
+ let config = loadConfig();
27
+ let keyManager = new KeyManager(config.apiKeys);
28
+ const envFilePath = path.resolve(process.cwd(), ".env.local");
29
+
30
+ function reloadRuntimeConfig() {
31
+ config = loadConfig();
32
+ keyManager = new KeyManager(config.apiKeys);
33
+ usageTracker.syncKeysFromConfig();
34
+ logger.info('Configuration reloaded');
35
+ }
36
+
37
+ // Config watcher
38
+ const configWatcher = createConfigWatcher('.env.local', () => {
39
+ reloadRuntimeConfig();
40
+ void probeAllApiKeys();
41
+ });
42
+
43
+ // Avvia il watcher
44
+ configWatcher.start();
45
+
46
+ // Cleanup alla chiusura
47
+ process.on('SIGINT', () => {
48
+ logger.info('Shutting down gracefully...');
49
+ configWatcher.stop();
50
+ process.exit(0);
51
+ });
52
+
53
+ process.on('SIGTERM', () => {
54
+ logger.info('Received SIGTERM, shutting down gracefully...');
55
+ configWatcher.stop();
56
+ process.exit(0);
57
+ });
58
+
59
+ if (!config.apiKeys.length) {
60
+ logger.error('No Ollama API keys found. Set OLLAMA_API_KEYS or OLLAMA_API_KEY in the environment or .env.local.');
61
+ process.exit(1);
62
+ }
63
+
64
+ const server = http.createServer(async (req, res) => {
65
+ // Applica il middleware di metriche
66
+ metricsMiddleware(req, res, () => {});
67
+
68
+ try {
69
+ await routeRequest(req, res);
70
+ } catch (error) {
71
+ logger.error(`Request handling error: ${error.message}`, { stack: error.stack });
72
+ incrementError('other');
73
+ if (!res.headersSent && !res.writableEnded) {
74
+ sendJson(res, 500, {
75
+ type: "error",
76
+ error: {
77
+ type: "api_error",
78
+ message: error instanceof Error ? error.message : String(error),
79
+ },
80
+ });
81
+ return;
82
+ }
83
+
84
+ if (!res.writableEnded) {
85
+ try {
86
+ res.end();
87
+ } catch (endError) {
88
+ logger.error(`Failed to close response after request error: ${endError.message}`);
89
+ }
90
+ }
91
+ }
92
+ });
93
+
94
+ server.listen(config.port, config.host, () => {
95
+ logger.info(`Claude <-> Ollama Cloud bridge listening on http://${config.host}:${config.port}`);
96
+ logger.info(`Upstream: ${config.upstreamBaseUrl}`);
97
+ logger.info(`API Keys loaded: ${config.apiKeys.length}`);
98
+ void probeAllApiKeys();
99
+ });
100
+
101
+ async function routeRequest(req, res) {
102
+ if (!req.url) {
103
+ sendJson(res, 404, { error: "Not found" });
104
+ return;
105
+ }
106
+
107
+ const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
108
+
109
+ // Log della richiesta
110
+ logger.info(`${req.method} ${url.pathname}`, {
111
+ ip: req.socket.remoteAddress,
112
+ userAgent: req.headers['user-agent']
113
+ });
114
+
115
+ // Health check
116
+ if (req.method === "GET" && url.pathname === "/health") {
117
+ await handleHealth(req, res);
118
+ return;
119
+ }
120
+
121
+ // Metrics endpoint
122
+ if (req.method === "GET" && url.pathname === "/metrics") {
123
+ await handleMetrics(req, res);
124
+ return;
125
+ }
126
+
127
+ if (req.method === "GET" && url.pathname === "/events") {
128
+ await handleEvents(req, res);
129
+ return;
130
+ }
131
+
132
+ if (url.pathname === "/api/keys" && (req.method === "POST" || req.method === "DELETE")) {
133
+ await handleKeys(req, res);
134
+ return;
135
+ }
136
+
137
+ // Usage API endpoint (GET per dati, POST per reset)
138
+ if (url.pathname === "/api/usage" && (req.method === "GET" || req.method === "POST")) {
139
+ await handleUsage(req, res);
140
+ return;
141
+ }
142
+
143
+ // Dashboard endpoint
144
+ if (req.method === "GET" && url.pathname === "/dashboard") {
145
+ await handleDashboard(req, res);
146
+ return;
147
+ }
148
+
149
+ // Models endpoint
150
+ if (req.method === "GET" && url.pathname === "/v1/models") {
151
+ await handleModels(req, res);
152
+ return;
153
+ }
154
+
155
+ // Refresh endpoint - forces fresh fetch from Ollama Cloud
156
+ if (req.method === "POST" && url.pathname === "/v1/refresh") {
157
+ await handleRefresh(req, res);
158
+ return;
159
+ }
160
+
161
+ // Messages endpoint
162
+ if (req.method === "POST" && url.pathname === "/v1/messages") {
163
+ await handleMessages(req, res);
164
+ return;
165
+ }
166
+
167
+ // Count tokens endpoint
168
+ if (req.method === "POST" && url.pathname === "/v1/messages/count_tokens") {
169
+ await handleCountTokens(req, res);
170
+ return;
171
+ }
172
+
173
+ sendJson(res, 404, {
174
+ type: "error",
175
+ error: {
176
+ type: "not_found_error",
177
+ message: `Unsupported route: ${req.method} ${url.pathname}`,
178
+ },
179
+ });
180
+ }
181
+
182
+ async function handleHealth(_req, res) {
183
+ const stats = keyManager.getStats();
184
+ sendJson(res, 200, {
185
+ ok: true,
186
+ uptime: process.uptime(),
187
+ upstream: config.upstreamBaseUrl,
188
+ keysLoaded: config.apiKeys.length,
189
+ keysHealthy: stats.healthy,
190
+ version: process.env.npm_package_version || '0.1.0'
191
+ });
192
+ }
193
+
194
+ async function handleMetrics(_req, res) {
195
+ // Aggiorna cache stats
196
+ const cacheStats = {
197
+ models: getCacheStats(modelCache, 'models')
198
+ };
199
+
200
+ sendJson(res, 200, getMetrics());
201
+ }
202
+
203
+ async function handleRefresh(_req, res) {
204
+ // Invalidate models cache and fetch fresh data
205
+ const cacheKey = 'models_list';
206
+ modelCache.del(cacheKey);
207
+
208
+ try {
209
+ const { response: upstream } = await fetchUpstream("/api/tags", {
210
+ method: "GET",
211
+ });
212
+
213
+ if (!upstream.ok) {
214
+ await proxyError(upstream, res);
215
+ return;
216
+ }
217
+
218
+ const payload = await upstream.json();
219
+ let models = Array.isArray(payload.models) ? payload.models : [];
220
+
221
+ // Sort by modified date (most recent first)
222
+ models = models.sort((a, b) => {
223
+ const dateA = a.modified_at ? new Date(a.modified_at).getTime() : 0;
224
+ const dateB = b.modified_at ? new Date(b.modified_at).getTime() : 0;
225
+ return dateB - dateA;
226
+ });
227
+
228
+ const data = models.flatMap((model) => {
229
+ const baseId = model.model || model.name;
230
+ if (!baseId) {
231
+ return [];
232
+ }
233
+
234
+ const alias = findAliasForModel(baseId);
235
+ const baseRecord = {
236
+ id: baseId,
237
+ type: "model",
238
+ display_name: baseId,
239
+ created_at: model.modified_at || new Date().toISOString(),
240
+ };
241
+
242
+ if (!alias) {
243
+ return [baseRecord];
244
+ }
245
+
246
+ return [
247
+ {
248
+ ...baseRecord,
249
+ id: alias,
250
+ display_name: `${alias} -> ${baseId}`,
251
+ },
252
+ baseRecord,
253
+ ];
254
+ });
255
+
256
+ const response = { data, refreshed: true };
257
+
258
+ setInCache(modelCache, cacheKey, response);
259
+
260
+ logger.info('Models cache refreshed');
261
+ sendJson(res, 200, response);
262
+ } catch (error) {
263
+ logger.error(`Error refreshing models: ${error.message}`);
264
+ incrementError('upstream');
265
+ sendJson(res, 500, {
266
+ type: "error",
267
+ error: {
268
+ type: "upstream_error",
269
+ message: "Failed to refresh models"
270
+ }
271
+ });
272
+ }
273
+ }
274
+ function getDashboardState() {
275
+ return {
276
+ usage: usageTracker.getDashboardData(),
277
+ metrics: getMetrics(),
278
+ health: {
279
+ ok: true,
280
+ uptime: process.uptime(),
281
+ upstream: config.upstreamBaseUrl,
282
+ keysLoaded: config.apiKeys.length,
283
+ keysHealthy: keyManager.getStats().healthy,
284
+ version: process.env.npm_package_version || '0.1.0'
285
+ }
286
+ };
287
+ }
288
+
289
+ async function handleEvents(req, res) {
290
+ res.writeHead(200, {
291
+ "content-type": "text/event-stream",
292
+ "cache-control": "no-cache, no-transform",
293
+ connection: "keep-alive",
294
+ "x-accel-buffering": "no",
295
+ });
296
+
297
+ const sendState = () => {
298
+ if (res.writableEnded) {
299
+ return;
300
+ }
301
+
302
+ res.write(`event: state\n`);
303
+ res.write(`data: ${JSON.stringify(getDashboardState())}\n\n`);
304
+ };
305
+
306
+ sendState();
307
+ const interval = setInterval(sendState, 2000);
308
+
309
+ req.on("close", () => {
310
+ clearInterval(interval);
311
+ if (!res.writableEnded) {
312
+ res.end();
313
+ }
314
+ });
315
+ }
316
+
317
+ async function handleDashboard(_req, res) {
318
+ try {
319
+ // Usa il percorso corretto per il file dashboard.html
320
+ const __filename = fileURLToPath(import.meta.url);
321
+ const __dirname = path.dirname(__filename);
322
+ const dashboardPath = path.join(__dirname, 'dashboard.html');
323
+ const dashboardHtml = await fs.promises.readFile(dashboardPath, 'utf8');
324
+
325
+ res.writeHead(200, {
326
+ 'Content-Type': 'text/html',
327
+ 'Cache-Control': 'no-cache'
328
+ });
329
+ res.end(dashboardHtml);
330
+ } catch (error) {
331
+ logger.error(`Error serving dashboard: ${error.message}`);
332
+ sendJson(res, 500, {
333
+ type: "error",
334
+ error: {
335
+ type: "internal_error",
336
+ message: "Failed to load dashboard"
337
+ }
338
+ });
339
+ }
340
+ }
341
+
342
+ async function handleKeys(req, res) {
343
+ try {
344
+ const body = await readJson(req);
345
+ const key = String(body?.key || "").trim();
346
+
347
+ if (!key) {
348
+ sendJson(res, 400, {
349
+ type: "error",
350
+ error: {
351
+ type: "invalid_request_error",
352
+ message: "API key is required",
353
+ },
354
+ });
355
+ return;
356
+ }
357
+
358
+ if (req.method === "POST") {
359
+ const updatedKeys = Array.from(new Set([...config.apiKeys, key]));
360
+ writeApiKeysToEnvFile(updatedKeys);
361
+ reloadRuntimeConfig();
362
+ await probeSingleApiKey(key);
363
+ sendJson(res, 200, {
364
+ ok: true,
365
+ added: key,
366
+ keysLoaded: config.apiKeys.length,
367
+ });
368
+ return;
369
+ }
370
+
371
+ const updatedKeys = config.apiKeys.filter((existingKey) => existingKey !== key);
372
+ writeApiKeysToEnvFile(updatedKeys);
373
+ reloadRuntimeConfig();
374
+ sendJson(res, 200, {
375
+ ok: true,
376
+ removed: key,
377
+ keysLoaded: config.apiKeys.length,
378
+ });
379
+ } catch (error) {
380
+ logger.error(`Error updating API keys: ${error.message}`);
381
+ sendJson(res, 500, {
382
+ type: "error",
383
+ error: {
384
+ type: "internal_error",
385
+ message: error instanceof Error ? error.message : String(error),
386
+ },
387
+ });
388
+ }
389
+ }
390
+
391
+ async function handleUsage(req, res) {
392
+ try {
393
+ const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
394
+
395
+ // Supporta reset via query parameter: /api/usage?reset=true
396
+ if (url.searchParams.get('reset') === 'true') {
397
+ usageTracker.resetAll();
398
+ sendJson(res, 200, { ok: true, message: 'Usage data reset successfully' });
399
+ return;
400
+ }
401
+
402
+ const usageData = usageTracker.getDashboardData();
403
+
404
+ // Aggiungi informazioni sui rate limit attivi
405
+ const rateLimitedKeys = usageData.keys.filter(k => k.isRateLimited);
406
+
407
+ sendJson(res, 200, {
408
+ summary: usageData.summary,
409
+ keys: usageData.keys,
410
+ rateLimits: {
411
+ active: rateLimitedKeys.length,
412
+ keys: rateLimitedKeys.map(k => ({
413
+ key: k.key.substring(0, 20) + '...',
414
+ reason: k.rateLimitReason || 'Rate limit reached',
415
+ resetIn: typeof k.rateLimitResetTime === 'number'
416
+ ? Math.max(0, Math.floor((k.rateLimitResetTime - Date.now()) / 1000))
417
+ : null
418
+ }))
419
+ }
420
+ });
421
+ } catch (error) {
422
+ logger.error(`Error fetching usage data: ${error.message}`);
423
+ incrementError('other');
424
+ sendJson(res, 500, {
425
+ type: "error",
426
+ error: {
427
+ type: "internal_error",
428
+ message: "Failed to fetch usage data"
429
+ }
430
+ });
431
+ }
432
+ }
433
+
434
+ async function handleModels(req, res) {
435
+ const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
436
+
437
+ // Force refresh if requested
438
+ const forceRefresh = url.searchParams.get('refresh') === 'true';
439
+ const cacheKey = 'models_list';
440
+
441
+ if (!forceRefresh) {
442
+ const cached = getFromCache(modelCache, cacheKey);
443
+ if (cached) {
444
+ sendJson(res, 200, cached);
445
+ return;
446
+ }
447
+ }
448
+
449
+ try {
450
+ const { response: upstream } = await fetchUpstream("/api/tags", {
451
+ method: "GET",
452
+ });
453
+
454
+ if (!upstream.ok) {
455
+ await proxyError(upstream, res);
456
+ return;
457
+ }
458
+
459
+ const payload = await upstream.json();
460
+ let models = Array.isArray(payload.models) ? payload.models : [];
461
+
462
+ // Sort by modified date (most recent first)
463
+ models = models.sort((a, b) => {
464
+ const dateA = a.modified_at ? new Date(a.modified_at).getTime() : 0;
465
+ const dateB = b.modified_at ? new Date(b.modified_at).getTime() : 0;
466
+ return dateB - dateA;
467
+ });
468
+
469
+ const data = models.flatMap((model) => {
470
+ const baseId = model.model || model.name;
471
+ if (!baseId) {
472
+ return [];
473
+ }
474
+
475
+ const alias = findAliasForModel(baseId);
476
+ const baseRecord = {
477
+ id: baseId,
478
+ type: "model",
479
+ display_name: baseId,
480
+ created_at: model.modified_at || new Date().toISOString(),
481
+ };
482
+
483
+ if (!alias) {
484
+ return [baseRecord];
485
+ }
486
+
487
+ return [
488
+ {
489
+ ...baseRecord,
490
+ id: alias,
491
+ display_name: `${alias} -> ${baseId}`,
492
+ },
493
+ baseRecord,
494
+ ];
495
+ });
496
+
497
+ const response = { data };
498
+
499
+ // Invalidate cache or save fresh data
500
+ modelCache.del(cacheKey);
501
+ setInCache(modelCache, cacheKey, response);
502
+
503
+ sendJson(res, 200, response);
504
+ } catch (error) {
505
+ logger.error(`Error fetching models: ${error.message}`);
506
+ incrementError('upstream');
507
+ sendJson(res, 500, {
508
+ type: "error",
509
+ error: {
510
+ type: "upstream_error",
511
+ message: "Failed to fetch models from upstream"
512
+ }
513
+ });
514
+ }
515
+ }
516
+
517
+ async function handleMessages(req, res) {
518
+ try {
519
+ const body = await readJson(req);
520
+
521
+ // Valida input
522
+ const validatedBody = validateInput(MessageSchema, body);
523
+
524
+ const anthropicRequest = normalizeAnthropicRequest(validatedBody);
525
+ logger.info(`[bridge] /v1/messages requested_model=${anthropicRequest.requestedModel} resolved_model=${anthropicRequest.model} stream=${anthropicRequest.stream}`);
526
+
527
+ const ollamaRequest = anthropicToOllamaRequest(anthropicRequest);
528
+
529
+ const { response: upstream, apiKey } = await fetchUpstream("/api/chat", {
530
+ method: "POST",
531
+ headers: {
532
+ "content-type": "application/json",
533
+ },
534
+ body: JSON.stringify(ollamaRequest),
535
+ });
536
+
537
+ if (!upstream.ok) {
538
+ await proxyError(upstream, res);
539
+ return;
540
+ }
541
+
542
+ if (anthropicRequest.stream) {
543
+ await pipeStreamingResponse(upstream, anthropicRequest, res, apiKey);
544
+ return;
545
+ }
546
+
547
+ const payload = await upstream.json();
548
+ const message = ollamaToAnthropicMessage(payload, anthropicRequest.model);
549
+
550
+ // Registra token usage con la chiave specifica
551
+ usageTracker.recordUsage(apiKey, message.usage?.output_tokens || 0);
552
+ recordTokenUsage(
553
+ anthropicRequest.model,
554
+ message.usage?.input_tokens || 0,
555
+ message.usage?.output_tokens || 0
556
+ );
557
+
558
+ sendJson(res, 200, message);
559
+ } catch (error) {
560
+ if (error.message.includes('Validation failed')) {
561
+ incrementError('validation');
562
+ sendJson(res, 400, {
563
+ type: "error",
564
+ error: {
565
+ type: "invalid_request_error",
566
+ message: error.message
567
+ }
568
+ });
569
+ } else {
570
+ throw error;
571
+ }
572
+ }
573
+ }
574
+
575
+ async function handleCountTokens(req, res) {
576
+ try {
577
+ const body = await readJson(req);
578
+ const validatedBody = validateInput(CountTokensSchema, body);
579
+
580
+ const requestedModel = validatedBody.model || config.defaultModel;
581
+ const resolvedModel = resolveModelAlias(requestedModel);
582
+ logger.info(`[bridge] /v1/messages/count_tokens requested_model=${requestedModel} resolved_model=${resolvedModel}`);
583
+
584
+ const inputText = JSON.stringify(validatedBody.messages || []);
585
+ const systemText = typeof validatedBody.system === "string" ? validatedBody.system : JSON.stringify(validatedBody.system || "");
586
+ const tokenCount = estimateTokens(`${systemText}\n${inputText}`);
587
+
588
+ sendJson(res, 200, { input_tokens: tokenCount });
589
+ } catch (error) {
590
+ if (error.message.includes('Validation failed')) {
591
+ incrementError('validation');
592
+ sendJson(res, 400, {
593
+ type: "error",
594
+ error: {
595
+ type: "invalid_request_error",
596
+ message: error.message
597
+ }
598
+ });
599
+ } else {
600
+ throw error;
601
+ }
602
+ }
603
+ }
604
+
605
+ // Resto delle funzioni helper (abbreviate per brevità)
606
+ function normalizeAnthropicRequest(body) {
607
+ const requestedModel = body.model || config.defaultModel;
608
+ return {
609
+ requestedModel,
610
+ model: resolveModelAlias(requestedModel),
611
+ max_tokens: body.max_tokens || 4096,
612
+ messages: Array.isArray(body.messages) ? body.messages : [],
613
+ system: body.system,
614
+ stream: Boolean(body.stream),
615
+ tools: Array.isArray(body.tools) ? body.tools : [],
616
+ thinking: body.thinking,
617
+ };
618
+ }
619
+
620
+ function anthropicToOllamaRequest(request) {
621
+ return {
622
+ model: request.model,
623
+ stream: request.stream,
624
+ think: request.thinking ? true : undefined,
625
+ tools: request.tools.map((tool) => {
626
+ if (tool.type === "custom") {
627
+ return tool;
628
+ }
629
+
630
+ return {
631
+ type: "function",
632
+ function: {
633
+ name: tool.name,
634
+ description: tool.description || "",
635
+ parameters: tool.input_schema || {
636
+ type: "object",
637
+ properties: {},
638
+ },
639
+ },
640
+ };
641
+ }),
642
+ messages: anthropicMessagesToOllamaMessages(request.messages, request.system),
643
+ options: {
644
+ num_predict: request.max_tokens,
645
+ },
646
+ };
647
+ }
648
+
649
+ function anthropicMessagesToOllamaMessages(messages, system) {
650
+ const result = [];
651
+
652
+ if (system) {
653
+ if (typeof system === "string") {
654
+ result.push({ role: "system", content: system });
655
+ } else if (Array.isArray(system)) {
656
+ const content = system
657
+ .filter((item) => item?.type === "text")
658
+ .map((item) => item.text)
659
+ .join("\n");
660
+
661
+ if (content) {
662
+ result.push({ role: "system", content });
663
+ }
664
+ }
665
+ }
666
+
667
+ for (const message of messages) {
668
+ const content = Array.isArray(message.content)
669
+ ? message.content
670
+ : [{ type: "text", text: String(message.content || "") }];
671
+
672
+ const textParts = [];
673
+ const toolCalls = [];
674
+
675
+ for (const block of content) {
676
+ switch (block.type) {
677
+ case "text":
678
+ textParts.push(block.text || "");
679
+ break;
680
+ case "tool_use":
681
+ toolCalls.push({
682
+ type: "function",
683
+ function: {
684
+ name: block.name,
685
+ arguments: block.input || {},
686
+ },
687
+ });
688
+ break;
689
+ case "tool_result":
690
+ result.push({
691
+ role: "tool",
692
+ tool_name: block.tool_use_id || block.name || "tool",
693
+ content: flattenToolResultContent(block.content),
694
+ });
695
+ break;
696
+ default:
697
+ break;
698
+ }
699
+ }
700
+
701
+ const normalized = {
702
+ role: message.role,
703
+ content: textParts.join("\n"),
704
+ };
705
+
706
+ if (toolCalls.length) {
707
+ normalized.tool_calls = toolCalls;
708
+ }
709
+
710
+ if (normalized.role === "assistant" || normalized.role === "user") {
711
+ result.push(normalized);
712
+ }
713
+ }
714
+
715
+ return result;
716
+ }
717
+
718
+ function flattenToolResultContent(content) {
719
+ if (typeof content === "string") {
720
+ return content;
721
+ }
722
+
723
+ if (Array.isArray(content)) {
724
+ return content
725
+ .map((item) => {
726
+ if (typeof item === "string") {
727
+ return item;
728
+ }
729
+
730
+ if (item?.type === "text") {
731
+ return item.text || "";
732
+ }
733
+
734
+ return JSON.stringify(item);
735
+ })
736
+ .join("\n");
737
+ }
738
+
739
+ if (content == null) {
740
+ return "";
741
+ }
742
+
743
+ return JSON.stringify(content);
744
+ }
745
+
746
+ function ollamaToAnthropicMessage(payload, requestedModel) {
747
+ const toolCalls = Array.isArray(payload.message?.tool_calls) ? payload.message.tool_calls : [];
748
+ const text = payload.message?.content || "";
749
+ const content = [];
750
+
751
+ if (text) {
752
+ content.push({
753
+ type: "text",
754
+ text,
755
+ });
756
+ }
757
+
758
+ for (const toolCall of toolCalls) {
759
+ const id = `toolu_${randomUUID().replace(/-/g, "")}`;
760
+ content.push({
761
+ type: "tool_use",
762
+ id,
763
+ name: toolCall.function?.name || "tool",
764
+ input: toolCall.function?.arguments || {},
765
+ });
766
+ }
767
+
768
+ // Traccia l'usage con i token reali
769
+ const inputTokens = payload.prompt_eval_count || 0;
770
+ const outputTokens = payload.eval_count || 0;
771
+
772
+ // Qui dovremmo avere l'API key usata, ma per ora usiamo un approccio diverso
773
+ // Lo tracceremo a livello superiore
774
+
775
+ return {
776
+ id: `msg_${randomUUID().replace(/-/g, "")}`,
777
+ type: "message",
778
+ role: "assistant",
779
+ model: requestedModel,
780
+ content,
781
+ stop_reason: toolCalls.length ? "tool_use" : mapDoneReason(payload.done_reason),
782
+ stop_sequence: null,
783
+ usage: {
784
+ input_tokens: inputTokens,
785
+ output_tokens: outputTokens,
786
+ },
787
+ };
788
+ }
789
+
790
+ async function pipeStreamingResponse(upstream, request, res, apiKey) {
791
+ res.writeHead(200, {
792
+ "content-type": "text/event-stream",
793
+ "cache-control": "no-cache, no-transform",
794
+ connection: "keep-alive",
795
+ "x-accel-buffering": "no",
796
+ });
797
+
798
+ const decoder = new TextDecoder();
799
+ const reader = upstream.body.getReader();
800
+
801
+ let buffer = "";
802
+ let started = false;
803
+ let outputTokens = 0;
804
+ let inputTokens = 0;
805
+ let aggregatedText = "";
806
+ let toolCalls = [];
807
+
808
+ const messageId = `msg_${randomUUID().replace(/-/g, "")}`;
809
+
810
+ emitAnthropicEvent(res, {
811
+ type: "message_start",
812
+ message: {
813
+ id: messageId,
814
+ type: "message",
815
+ role: "assistant",
816
+ model: request.model,
817
+ content: [],
818
+ stop_reason: null,
819
+ stop_sequence: null,
820
+ usage: {
821
+ input_tokens: 0,
822
+ output_tokens: 0,
823
+ },
824
+ },
825
+ });
826
+
827
+ try {
828
+ while (true) {
829
+ const { value, done } = await reader.read();
830
+ if (done) {
831
+ break;
832
+ }
833
+
834
+ buffer += decoder.decode(value, { stream: true });
835
+ const lines = buffer.split(/\r?\n/);
836
+ buffer = lines.pop() || "";
837
+
838
+ for (const line of lines) {
839
+ const trimmed = line.trim();
840
+ if (!trimmed) {
841
+ continue;
842
+ }
843
+
844
+ try {
845
+ const chunk = JSON.parse(trimmed);
846
+ const message = chunk.message || {};
847
+ inputTokens = chunk.prompt_eval_count || inputTokens;
848
+ outputTokens = chunk.eval_count || outputTokens;
849
+
850
+ if (message.content && !started) {
851
+ started = true;
852
+ emitAnthropicEvent(res, {
853
+ type: "content_block_start",
854
+ index: 0,
855
+ content_block: {
856
+ type: "text",
857
+ text: "",
858
+ },
859
+ });
860
+ }
861
+
862
+ if (message.content) {
863
+ aggregatedText += message.content;
864
+ emitAnthropicEvent(res, {
865
+ type: "content_block_delta",
866
+ index: 0,
867
+ delta: {
868
+ type: "text_delta",
869
+ text: message.content,
870
+ },
871
+ });
872
+ }
873
+
874
+ if (Array.isArray(message.tool_calls) && message.tool_calls.length) {
875
+ toolCalls = message.tool_calls;
876
+ }
877
+
878
+ if (chunk.done) {
879
+ if (started) {
880
+ emitAnthropicEvent(res, {
881
+ type: "content_block_stop",
882
+ index: 0,
883
+ });
884
+ }
885
+
886
+ let nextIndex = started ? 1 : 0;
887
+ for (const toolCall of toolCalls) {
888
+ const input = toolCall.function?.arguments || {};
889
+ const toolId = `toolu_${randomUUID().replace(/-/g, "")}`;
890
+ emitAnthropicEvent(res, {
891
+ type: "content_block_start",
892
+ index: nextIndex,
893
+ content_block: {
894
+ type: "tool_use",
895
+ id: toolId,
896
+ name: toolCall.function?.name || "tool",
897
+ input: {},
898
+ },
899
+ });
900
+ emitAnthropicEvent(res, {
901
+ type: "content_block_delta",
902
+ index: nextIndex,
903
+ delta: {
904
+ type: "input_json_delta",
905
+ partial_json: JSON.stringify(input),
906
+ },
907
+ });
908
+ emitAnthropicEvent(res, {
909
+ type: "content_block_stop",
910
+ index: nextIndex,
911
+ });
912
+ nextIndex += 1;
913
+ }
914
+
915
+ // Registra token usage con la chiave specifica (anche per streaming)
916
+ usageTracker.recordUsage(apiKey, outputTokens || estimateTokens(aggregatedText));
917
+ recordTokenUsage(
918
+ request.model,
919
+ inputTokens,
920
+ outputTokens || estimateTokens(aggregatedText)
921
+ );
922
+
923
+ emitAnthropicEvent(res, {
924
+ type: "message_delta",
925
+ delta: {
926
+ stop_reason: toolCalls.length ? "tool_use" : mapDoneReason(chunk.done_reason),
927
+ stop_sequence: null,
928
+ },
929
+ usage: {
930
+ output_tokens: outputTokens || estimateTokens(aggregatedText),
931
+ },
932
+ });
933
+
934
+ emitAnthropicEvent(res, {
935
+ type: "message_stop",
936
+ });
937
+ }
938
+ } catch (parseError) {
939
+ logger.error(`Error parsing streaming chunk: ${parseError.message}`);
940
+ }
941
+ }
942
+ }
943
+ } catch (error) {
944
+ logger.error(`Streaming error: ${error.message}`);
945
+ if (!res.writableEnded) {
946
+ try {
947
+ emitAnthropicEvent(res, {
948
+ type: "message_delta",
949
+ delta: {
950
+ stop_reason: "end_turn",
951
+ stop_sequence: null,
952
+ },
953
+ usage: {
954
+ output_tokens: outputTokens || estimateTokens(aggregatedText),
955
+ },
956
+ });
957
+ emitAnthropicEvent(res, {
958
+ type: "message_stop",
959
+ });
960
+ } catch (emitError) {
961
+ logger.error(`Failed to emit terminal streaming event: ${emitError.message}`);
962
+ }
963
+ }
964
+ } finally {
965
+ reader.releaseLock();
966
+ }
967
+
968
+ if (!res.writableEnded) {
969
+ res.end();
970
+ }
971
+ }
972
+
973
+ function emitAnthropicEvent(res, payload) {
974
+ res.write(`event: ${payload.type}\n`);
975
+ res.write(`data: ${JSON.stringify(payload)}\n\n`);
976
+ }
977
+
978
+ function mapDoneReason(reason) {
979
+ switch (reason) {
980
+ case "stop":
981
+ case "done":
982
+ return "end_turn";
983
+ case "length":
984
+ return "max_tokens";
985
+ default:
986
+ return "end_turn";
987
+ }
988
+ }
989
+
990
+ async function fetchUpstream(pathname, init) {
991
+ // Controlla prima se ci sono chiavi rate limited e suggerisci la migliore
992
+ const suggestedKey = usageTracker.suggestNextKey(config.apiKeys);
993
+ if (!suggestedKey) {
994
+ throw new Error('All API keys are rate limited');
995
+ }
996
+
997
+ const apiKey = suggestedKey;
998
+
999
+ try {
1000
+ const response = await performUpstreamFetch(apiKey, pathname, init);
1001
+
1002
+ // Traccia l'usage (indipendentemente dal successo)
1003
+ usageTracker.recordUsage(apiKey, 0); // Tokens verranno aggiornati dopo
1004
+
1005
+ // Controlla risposte di rate limit
1006
+ if (response.status === 429) {
1007
+ let detailsText = '';
1008
+ let reason = 'Rate limit reached';
1009
+ let resetTime = null;
1010
+
1011
+ try {
1012
+ detailsText = await response.text();
1013
+ if (detailsText) {
1014
+ try {
1015
+ const errorData = JSON.parse(detailsText);
1016
+ reason = errorData.error || errorData.message || reason;
1017
+ if (typeof errorData.reset_after === 'number') {
1018
+ resetTime = Date.now() + (errorData.reset_after * 1000);
1019
+ }
1020
+ } catch {
1021
+ reason = detailsText;
1022
+ }
1023
+ }
1024
+ } catch (parseError) {
1025
+ logger.error(`Failed to read rate limit response: ${parseError.message}`);
1026
+ }
1027
+
1028
+ usageTracker.markRateLimited(apiKey, resetTime, reason);
1029
+
1030
+ if (typeof resetTime === 'number') {
1031
+ logger.warn(`Rate limit detected for key ${apiKey.substring(0, 20)}... Reset in ${Math.ceil((resetTime - Date.now()) / 60000)} minutes`);
1032
+ } else {
1033
+ logger.warn(`Rate limit detected for key ${apiKey.substring(0, 20)}... Reset time not provided by upstream`);
1034
+ }
1035
+
1036
+ return fetchUpstream(pathname, init);
1037
+ } else if (!response.ok && response.status >= 500) {
1038
+ keyManager.markKeyFailed(apiKey, new Error(`HTTP ${response.status}: ${response.statusText}`));
1039
+ } else if (response.ok) {
1040
+ usageTracker.markHealthy(apiKey);
1041
+ }
1042
+
1043
+ // Restituisci sia la response che la chiave usata
1044
+ return { response, apiKey };
1045
+ } catch (error) {
1046
+ keyManager.markKeyFailed(suggestedKey, error);
1047
+ usageTracker.markUnhealthy(suggestedKey, error.message);
1048
+ throw error;
1049
+ }
1050
+ }
1051
+
1052
+ async function performUpstreamFetch(apiKey, pathname, init) {
1053
+ const url = `${config.upstreamBaseUrl}${pathname}`;
1054
+ const headers = new Headers(init?.headers || {});
1055
+ headers.set("accept", headers.get("accept") || "application/json");
1056
+ headers.set("authorization", `Bearer ${apiKey}`);
1057
+
1058
+ const agent = getAgent(url);
1059
+
1060
+ return fetch(url, {
1061
+ ...init,
1062
+ headers,
1063
+ agent,
1064
+ });
1065
+ }
1066
+
1067
+ function writeApiKeysToEnvFile(keys) {
1068
+ const existing = fs.existsSync(envFilePath)
1069
+ ? fs.readFileSync(envFilePath, "utf8")
1070
+ : "";
1071
+
1072
+ const normalizedKeys = keys.map((entry) => entry.trim()).filter(Boolean);
1073
+ const nextLine = `OLLAMA_API_KEYS=${normalizedKeys.join(",")}`;
1074
+ const lines = existing.split(/\r?\n/);
1075
+ let replaced = false;
1076
+ const output = lines.map((line) => {
1077
+ if (/^\s*OLLAMA_API_KEYS=/.test(line) || /^\s*OLLAMA_API_KEY=/.test(line)) {
1078
+ if (!replaced) {
1079
+ replaced = true;
1080
+ return nextLine;
1081
+ }
1082
+ return null;
1083
+ }
1084
+ return line;
1085
+ }).filter((line) => line !== null);
1086
+
1087
+ if (!replaced) {
1088
+ output.push(nextLine);
1089
+ }
1090
+
1091
+ const content = output.join("\n").replace(/\n{3,}/g, "\n\n").trimEnd() + "\n";
1092
+ fs.writeFileSync(envFilePath, content, "utf8");
1093
+ }
1094
+
1095
+ async function probeSingleApiKey(apiKey) {
1096
+ try {
1097
+ const response = await performUpstreamFetch(apiKey, "/api/tags", {
1098
+ method: "GET",
1099
+ });
1100
+
1101
+ if (response.status === 429) {
1102
+ let reason = 'Rate limit reached';
1103
+ let resetTime = null;
1104
+
1105
+ try {
1106
+ const detailsText = await response.text();
1107
+ if (detailsText) {
1108
+ try {
1109
+ const errorData = JSON.parse(detailsText);
1110
+ reason = errorData.error || errorData.message || reason;
1111
+ if (typeof errorData.reset_after === 'number') {
1112
+ resetTime = Date.now() + (errorData.reset_after * 1000);
1113
+ }
1114
+ } catch {
1115
+ reason = detailsText;
1116
+ }
1117
+ }
1118
+ } catch (error) {
1119
+ logger.error(`Failed reading probe 429 body: ${error.message}`);
1120
+ }
1121
+
1122
+ usageTracker.markRateLimited(apiKey, resetTime, reason);
1123
+ return;
1124
+ }
1125
+
1126
+ if (!response.ok) {
1127
+ const reason = `HTTP ${response.status}: ${response.statusText}`;
1128
+ keyManager.markKeyFailed(apiKey, new Error(reason));
1129
+ usageTracker.markUnhealthy(apiKey, reason);
1130
+ return;
1131
+ }
1132
+
1133
+ usageTracker.markHealthy(apiKey);
1134
+ } catch (error) {
1135
+ keyManager.markKeyFailed(apiKey, error);
1136
+ usageTracker.markUnhealthy(apiKey, error.message);
1137
+ }
1138
+ }
1139
+
1140
+ async function probeAllApiKeys() {
1141
+ logger.info(`Starting API key probe for ${config.apiKeys.length} keys`);
1142
+
1143
+ for (const apiKey of config.apiKeys) {
1144
+ await probeSingleApiKey(apiKey);
1145
+ }
1146
+
1147
+ logger.info('API key probe completed');
1148
+ }
1149
+
1150
+ async function proxyError(upstream, res) {
1151
+ let details;
1152
+ try {
1153
+ details = await upstream.text();
1154
+ } catch {
1155
+ details = upstream.statusText;
1156
+ }
1157
+
1158
+ logger.error(`Upstream error: ${upstream.status} ${details}`);
1159
+ incrementError('upstream');
1160
+
1161
+ sendJson(res, upstream.status, {
1162
+ type: "error",
1163
+ error: {
1164
+ type: "upstream_error",
1165
+ message: details || upstream.statusText,
1166
+ },
1167
+ });
1168
+ }
1169
+
1170
+ async function readJson(req) {
1171
+ const chunks = [];
1172
+
1173
+ for await (const chunk of req) {
1174
+ chunks.push(chunk);
1175
+ }
1176
+
1177
+ const raw = Buffer.concat(chunks).toString("utf8");
1178
+ return raw ? JSON.parse(raw) : {};
1179
+ }
1180
+
1181
+ function sendJson(res, statusCode, payload) {
1182
+ if (res.writableEnded) {
1183
+ return;
1184
+ }
1185
+
1186
+ if (res.headersSent) {
1187
+ try {
1188
+ res.end(JSON.stringify(payload));
1189
+ } catch {
1190
+ // Ignore late-write attempts on already-started responses.
1191
+ }
1192
+ return;
1193
+ }
1194
+
1195
+ res.writeHead(statusCode, {
1196
+ "content-type": "application/json; charset=utf-8",
1197
+ });
1198
+ res.end(JSON.stringify(payload));
1199
+ }
1200
+
1201
+ function resolveModelAlias(model) {
1202
+ if (config.aliases[model]) {
1203
+ return config.aliases[model];
1204
+ }
1205
+
1206
+ const normalized = String(model || "").toLowerCase();
1207
+
1208
+ if (
1209
+ normalized === "default" ||
1210
+ normalized === "sonnet" ||
1211
+ normalized === "opus" ||
1212
+ normalized.startsWith("claude-sonnet") ||
1213
+ normalized.startsWith("claude-opus")
1214
+ ) {
1215
+ return config.aliases["ollama-free-auto"] || config.defaultModel;
1216
+ }
1217
+
1218
+ if (
1219
+ normalized === "haiku" ||
1220
+ normalized.startsWith("claude-haiku")
1221
+ ) {
1222
+ return (
1223
+ config.aliases["ollama-free-fast"] ||
1224
+ config.aliases["ollama-free-auto"] ||
1225
+ config.defaultModel
1226
+ );
1227
+ }
1228
+
1229
+ return model;
1230
+ }
1231
+
1232
+ function findAliasForModel(model) {
1233
+ return Object.entries(config.aliases).find(([, target]) => target === model)?.[0];
1234
+ }
1235
+
1236
+ function estimateTokens(text) {
1237
+ return Math.max(1, Math.ceil((text || "").length / 4));
1238
+ }