loren-code 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +14 -0
- package/LICENSE +21 -0
- package/README.md +153 -0
- package/package.json +70 -0
- package/scripts/ClaudeWrapperLauncher.cs +78 -0
- package/scripts/claude-wrapper.js +216 -0
- package/scripts/install-claude-ollama.ps1 +184 -0
- package/scripts/loren.js +515 -0
- package/scripts/uninstall-claude-ollama.ps1 +73 -0
- package/src/bootstrap.js +30 -0
- package/src/cache.js +64 -0
- package/src/config-watcher.js +73 -0
- package/src/config.js +98 -0
- package/src/http-agents.js +80 -0
- package/src/key-manager.js +69 -0
- package/src/logger.js +46 -0
- package/src/metrics.js +210 -0
- package/src/schemas.js +66 -0
- package/src/server.js +1238 -0
- package/src/usage-tracker.js +346 -0
package/src/server.js
ADDED
|
@@ -0,0 +1,1238 @@
|
|
|
1
|
+
import http from "node:http";
|
|
2
|
+
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { URL } from "node:url";
|
|
4
|
+
import fs from "node:fs";
|
|
5
|
+
import path from "node:path";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import { loadConfig } from "./config.js";
|
|
8
|
+
import { ensureEnvLocal, ensureRuntimeDir } from "./bootstrap.js";
|
|
9
|
+
import logger from "./logger.js";
|
|
10
|
+
import { KeyManager } from "./key-manager.js";
|
|
11
|
+
import { validateInput, MessageSchema, CountTokensSchema } from "./schemas.js";
|
|
12
|
+
import { modelCache, getFromCache, setInCache, getCacheStats } from "./cache.js";
|
|
13
|
+
import { getAgent } from "./http-agents.js";
|
|
14
|
+
import { getMetrics, incrementError, recordTokenUsage, metricsMiddleware } from "./metrics.js";
|
|
15
|
+
import { createConfigWatcher } from "./config-watcher.js";
|
|
16
|
+
import usageTracker from "./usage-tracker.js";
|
|
17
|
+
|
|
18
|
+
// Configurazione globale
|
|
19
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
20
|
+
const __dirname = path.dirname(__filename);
|
|
21
|
+
const projectRoot = path.resolve(__dirname, "..");
|
|
22
|
+
|
|
23
|
+
ensureRuntimeDir(projectRoot);
|
|
24
|
+
ensureEnvLocal(projectRoot, { logger });
|
|
25
|
+
|
|
26
|
+
let config = loadConfig();
|
|
27
|
+
let keyManager = new KeyManager(config.apiKeys);
|
|
28
|
+
const envFilePath = path.resolve(process.cwd(), ".env.local");
|
|
29
|
+
|
|
30
|
+
function reloadRuntimeConfig() {
|
|
31
|
+
config = loadConfig();
|
|
32
|
+
keyManager = new KeyManager(config.apiKeys);
|
|
33
|
+
usageTracker.syncKeysFromConfig();
|
|
34
|
+
logger.info('Configuration reloaded');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Config watcher
|
|
38
|
+
const configWatcher = createConfigWatcher('.env.local', () => {
|
|
39
|
+
reloadRuntimeConfig();
|
|
40
|
+
void probeAllApiKeys();
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Avvia il watcher
|
|
44
|
+
configWatcher.start();
|
|
45
|
+
|
|
46
|
+
// Cleanup alla chiusura
|
|
47
|
+
process.on('SIGINT', () => {
|
|
48
|
+
logger.info('Shutting down gracefully...');
|
|
49
|
+
configWatcher.stop();
|
|
50
|
+
process.exit(0);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
process.on('SIGTERM', () => {
|
|
54
|
+
logger.info('Received SIGTERM, shutting down gracefully...');
|
|
55
|
+
configWatcher.stop();
|
|
56
|
+
process.exit(0);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
if (!config.apiKeys.length) {
|
|
60
|
+
logger.error('No Ollama API keys found. Set OLLAMA_API_KEYS or OLLAMA_API_KEY in the environment or .env.local.');
|
|
61
|
+
process.exit(1);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const server = http.createServer(async (req, res) => {
|
|
65
|
+
// Applica il middleware di metriche
|
|
66
|
+
metricsMiddleware(req, res, () => {});
|
|
67
|
+
|
|
68
|
+
try {
|
|
69
|
+
await routeRequest(req, res);
|
|
70
|
+
} catch (error) {
|
|
71
|
+
logger.error(`Request handling error: ${error.message}`, { stack: error.stack });
|
|
72
|
+
incrementError('other');
|
|
73
|
+
if (!res.headersSent && !res.writableEnded) {
|
|
74
|
+
sendJson(res, 500, {
|
|
75
|
+
type: "error",
|
|
76
|
+
error: {
|
|
77
|
+
type: "api_error",
|
|
78
|
+
message: error instanceof Error ? error.message : String(error),
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!res.writableEnded) {
|
|
85
|
+
try {
|
|
86
|
+
res.end();
|
|
87
|
+
} catch (endError) {
|
|
88
|
+
logger.error(`Failed to close response after request error: ${endError.message}`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
server.listen(config.port, config.host, () => {
|
|
95
|
+
logger.info(`Claude <-> Ollama Cloud bridge listening on http://${config.host}:${config.port}`);
|
|
96
|
+
logger.info(`Upstream: ${config.upstreamBaseUrl}`);
|
|
97
|
+
logger.info(`API Keys loaded: ${config.apiKeys.length}`);
|
|
98
|
+
void probeAllApiKeys();
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
async function routeRequest(req, res) {
|
|
102
|
+
if (!req.url) {
|
|
103
|
+
sendJson(res, 404, { error: "Not found" });
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
|
108
|
+
|
|
109
|
+
// Log della richiesta
|
|
110
|
+
logger.info(`${req.method} ${url.pathname}`, {
|
|
111
|
+
ip: req.socket.remoteAddress,
|
|
112
|
+
userAgent: req.headers['user-agent']
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// Health check
|
|
116
|
+
if (req.method === "GET" && url.pathname === "/health") {
|
|
117
|
+
await handleHealth(req, res);
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Metrics endpoint
|
|
122
|
+
if (req.method === "GET" && url.pathname === "/metrics") {
|
|
123
|
+
await handleMetrics(req, res);
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (req.method === "GET" && url.pathname === "/events") {
|
|
128
|
+
await handleEvents(req, res);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (url.pathname === "/api/keys" && (req.method === "POST" || req.method === "DELETE")) {
|
|
133
|
+
await handleKeys(req, res);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Usage API endpoint (GET per dati, POST per reset)
|
|
138
|
+
if (url.pathname === "/api/usage" && (req.method === "GET" || req.method === "POST")) {
|
|
139
|
+
await handleUsage(req, res);
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Dashboard endpoint
|
|
144
|
+
if (req.method === "GET" && url.pathname === "/dashboard") {
|
|
145
|
+
await handleDashboard(req, res);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Models endpoint
|
|
150
|
+
if (req.method === "GET" && url.pathname === "/v1/models") {
|
|
151
|
+
await handleModels(req, res);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Refresh endpoint - forces fresh fetch from Ollama Cloud
|
|
156
|
+
if (req.method === "POST" && url.pathname === "/v1/refresh") {
|
|
157
|
+
await handleRefresh(req, res);
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Messages endpoint
|
|
162
|
+
if (req.method === "POST" && url.pathname === "/v1/messages") {
|
|
163
|
+
await handleMessages(req, res);
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Count tokens endpoint
|
|
168
|
+
if (req.method === "POST" && url.pathname === "/v1/messages/count_tokens") {
|
|
169
|
+
await handleCountTokens(req, res);
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
sendJson(res, 404, {
|
|
174
|
+
type: "error",
|
|
175
|
+
error: {
|
|
176
|
+
type: "not_found_error",
|
|
177
|
+
message: `Unsupported route: ${req.method} ${url.pathname}`,
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function handleHealth(_req, res) {
|
|
183
|
+
const stats = keyManager.getStats();
|
|
184
|
+
sendJson(res, 200, {
|
|
185
|
+
ok: true,
|
|
186
|
+
uptime: process.uptime(),
|
|
187
|
+
upstream: config.upstreamBaseUrl,
|
|
188
|
+
keysLoaded: config.apiKeys.length,
|
|
189
|
+
keysHealthy: stats.healthy,
|
|
190
|
+
version: process.env.npm_package_version || '0.1.0'
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
async function handleMetrics(_req, res) {
|
|
195
|
+
// Aggiorna cache stats
|
|
196
|
+
const cacheStats = {
|
|
197
|
+
models: getCacheStats(modelCache, 'models')
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
sendJson(res, 200, getMetrics());
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async function handleRefresh(_req, res) {
|
|
204
|
+
// Invalidate models cache and fetch fresh data
|
|
205
|
+
const cacheKey = 'models_list';
|
|
206
|
+
modelCache.del(cacheKey);
|
|
207
|
+
|
|
208
|
+
try {
|
|
209
|
+
const { response: upstream } = await fetchUpstream("/api/tags", {
|
|
210
|
+
method: "GET",
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
if (!upstream.ok) {
|
|
214
|
+
await proxyError(upstream, res);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const payload = await upstream.json();
|
|
219
|
+
let models = Array.isArray(payload.models) ? payload.models : [];
|
|
220
|
+
|
|
221
|
+
// Sort by modified date (most recent first)
|
|
222
|
+
models = models.sort((a, b) => {
|
|
223
|
+
const dateA = a.modified_at ? new Date(a.modified_at).getTime() : 0;
|
|
224
|
+
const dateB = b.modified_at ? new Date(b.modified_at).getTime() : 0;
|
|
225
|
+
return dateB - dateA;
|
|
226
|
+
});
|
|
227
|
+
|
|
228
|
+
const data = models.flatMap((model) => {
|
|
229
|
+
const baseId = model.model || model.name;
|
|
230
|
+
if (!baseId) {
|
|
231
|
+
return [];
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const alias = findAliasForModel(baseId);
|
|
235
|
+
const baseRecord = {
|
|
236
|
+
id: baseId,
|
|
237
|
+
type: "model",
|
|
238
|
+
display_name: baseId,
|
|
239
|
+
created_at: model.modified_at || new Date().toISOString(),
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
if (!alias) {
|
|
243
|
+
return [baseRecord];
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return [
|
|
247
|
+
{
|
|
248
|
+
...baseRecord,
|
|
249
|
+
id: alias,
|
|
250
|
+
display_name: `${alias} -> ${baseId}`,
|
|
251
|
+
},
|
|
252
|
+
baseRecord,
|
|
253
|
+
];
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const response = { data, refreshed: true };
|
|
257
|
+
|
|
258
|
+
setInCache(modelCache, cacheKey, response);
|
|
259
|
+
|
|
260
|
+
logger.info('Models cache refreshed');
|
|
261
|
+
sendJson(res, 200, response);
|
|
262
|
+
} catch (error) {
|
|
263
|
+
logger.error(`Error refreshing models: ${error.message}`);
|
|
264
|
+
incrementError('upstream');
|
|
265
|
+
sendJson(res, 500, {
|
|
266
|
+
type: "error",
|
|
267
|
+
error: {
|
|
268
|
+
type: "upstream_error",
|
|
269
|
+
message: "Failed to refresh models"
|
|
270
|
+
}
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
function getDashboardState() {
|
|
275
|
+
return {
|
|
276
|
+
usage: usageTracker.getDashboardData(),
|
|
277
|
+
metrics: getMetrics(),
|
|
278
|
+
health: {
|
|
279
|
+
ok: true,
|
|
280
|
+
uptime: process.uptime(),
|
|
281
|
+
upstream: config.upstreamBaseUrl,
|
|
282
|
+
keysLoaded: config.apiKeys.length,
|
|
283
|
+
keysHealthy: keyManager.getStats().healthy,
|
|
284
|
+
version: process.env.npm_package_version || '0.1.0'
|
|
285
|
+
}
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async function handleEvents(req, res) {
|
|
290
|
+
res.writeHead(200, {
|
|
291
|
+
"content-type": "text/event-stream",
|
|
292
|
+
"cache-control": "no-cache, no-transform",
|
|
293
|
+
connection: "keep-alive",
|
|
294
|
+
"x-accel-buffering": "no",
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
const sendState = () => {
|
|
298
|
+
if (res.writableEnded) {
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
res.write(`event: state\n`);
|
|
303
|
+
res.write(`data: ${JSON.stringify(getDashboardState())}\n\n`);
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
sendState();
|
|
307
|
+
const interval = setInterval(sendState, 2000);
|
|
308
|
+
|
|
309
|
+
req.on("close", () => {
|
|
310
|
+
clearInterval(interval);
|
|
311
|
+
if (!res.writableEnded) {
|
|
312
|
+
res.end();
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
async function handleDashboard(_req, res) {
|
|
318
|
+
try {
|
|
319
|
+
// Usa il percorso corretto per il file dashboard.html
|
|
320
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
321
|
+
const __dirname = path.dirname(__filename);
|
|
322
|
+
const dashboardPath = path.join(__dirname, 'dashboard.html');
|
|
323
|
+
const dashboardHtml = await fs.promises.readFile(dashboardPath, 'utf8');
|
|
324
|
+
|
|
325
|
+
res.writeHead(200, {
|
|
326
|
+
'Content-Type': 'text/html',
|
|
327
|
+
'Cache-Control': 'no-cache'
|
|
328
|
+
});
|
|
329
|
+
res.end(dashboardHtml);
|
|
330
|
+
} catch (error) {
|
|
331
|
+
logger.error(`Error serving dashboard: ${error.message}`);
|
|
332
|
+
sendJson(res, 500, {
|
|
333
|
+
type: "error",
|
|
334
|
+
error: {
|
|
335
|
+
type: "internal_error",
|
|
336
|
+
message: "Failed to load dashboard"
|
|
337
|
+
}
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
async function handleKeys(req, res) {
|
|
343
|
+
try {
|
|
344
|
+
const body = await readJson(req);
|
|
345
|
+
const key = String(body?.key || "").trim();
|
|
346
|
+
|
|
347
|
+
if (!key) {
|
|
348
|
+
sendJson(res, 400, {
|
|
349
|
+
type: "error",
|
|
350
|
+
error: {
|
|
351
|
+
type: "invalid_request_error",
|
|
352
|
+
message: "API key is required",
|
|
353
|
+
},
|
|
354
|
+
});
|
|
355
|
+
return;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
if (req.method === "POST") {
|
|
359
|
+
const updatedKeys = Array.from(new Set([...config.apiKeys, key]));
|
|
360
|
+
writeApiKeysToEnvFile(updatedKeys);
|
|
361
|
+
reloadRuntimeConfig();
|
|
362
|
+
await probeSingleApiKey(key);
|
|
363
|
+
sendJson(res, 200, {
|
|
364
|
+
ok: true,
|
|
365
|
+
added: key,
|
|
366
|
+
keysLoaded: config.apiKeys.length,
|
|
367
|
+
});
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const updatedKeys = config.apiKeys.filter((existingKey) => existingKey !== key);
|
|
372
|
+
writeApiKeysToEnvFile(updatedKeys);
|
|
373
|
+
reloadRuntimeConfig();
|
|
374
|
+
sendJson(res, 200, {
|
|
375
|
+
ok: true,
|
|
376
|
+
removed: key,
|
|
377
|
+
keysLoaded: config.apiKeys.length,
|
|
378
|
+
});
|
|
379
|
+
} catch (error) {
|
|
380
|
+
logger.error(`Error updating API keys: ${error.message}`);
|
|
381
|
+
sendJson(res, 500, {
|
|
382
|
+
type: "error",
|
|
383
|
+
error: {
|
|
384
|
+
type: "internal_error",
|
|
385
|
+
message: error instanceof Error ? error.message : String(error),
|
|
386
|
+
},
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
async function handleUsage(req, res) {
|
|
392
|
+
try {
|
|
393
|
+
const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
|
394
|
+
|
|
395
|
+
// Supporta reset via query parameter: /api/usage?reset=true
|
|
396
|
+
if (url.searchParams.get('reset') === 'true') {
|
|
397
|
+
usageTracker.resetAll();
|
|
398
|
+
sendJson(res, 200, { ok: true, message: 'Usage data reset successfully' });
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
const usageData = usageTracker.getDashboardData();
|
|
403
|
+
|
|
404
|
+
// Aggiungi informazioni sui rate limit attivi
|
|
405
|
+
const rateLimitedKeys = usageData.keys.filter(k => k.isRateLimited);
|
|
406
|
+
|
|
407
|
+
sendJson(res, 200, {
|
|
408
|
+
summary: usageData.summary,
|
|
409
|
+
keys: usageData.keys,
|
|
410
|
+
rateLimits: {
|
|
411
|
+
active: rateLimitedKeys.length,
|
|
412
|
+
keys: rateLimitedKeys.map(k => ({
|
|
413
|
+
key: k.key.substring(0, 20) + '...',
|
|
414
|
+
reason: k.rateLimitReason || 'Rate limit reached',
|
|
415
|
+
resetIn: typeof k.rateLimitResetTime === 'number'
|
|
416
|
+
? Math.max(0, Math.floor((k.rateLimitResetTime - Date.now()) / 1000))
|
|
417
|
+
: null
|
|
418
|
+
}))
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
} catch (error) {
|
|
422
|
+
logger.error(`Error fetching usage data: ${error.message}`);
|
|
423
|
+
incrementError('other');
|
|
424
|
+
sendJson(res, 500, {
|
|
425
|
+
type: "error",
|
|
426
|
+
error: {
|
|
427
|
+
type: "internal_error",
|
|
428
|
+
message: "Failed to fetch usage data"
|
|
429
|
+
}
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
async function handleModels(req, res) {
|
|
435
|
+
const url = new URL(req.url, `http://${req.headers.host || "localhost"}`);
|
|
436
|
+
|
|
437
|
+
// Force refresh if requested
|
|
438
|
+
const forceRefresh = url.searchParams.get('refresh') === 'true';
|
|
439
|
+
const cacheKey = 'models_list';
|
|
440
|
+
|
|
441
|
+
if (!forceRefresh) {
|
|
442
|
+
const cached = getFromCache(modelCache, cacheKey);
|
|
443
|
+
if (cached) {
|
|
444
|
+
sendJson(res, 200, cached);
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
try {
|
|
450
|
+
const { response: upstream } = await fetchUpstream("/api/tags", {
|
|
451
|
+
method: "GET",
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
if (!upstream.ok) {
|
|
455
|
+
await proxyError(upstream, res);
|
|
456
|
+
return;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
const payload = await upstream.json();
|
|
460
|
+
let models = Array.isArray(payload.models) ? payload.models : [];
|
|
461
|
+
|
|
462
|
+
// Sort by modified date (most recent first)
|
|
463
|
+
models = models.sort((a, b) => {
|
|
464
|
+
const dateA = a.modified_at ? new Date(a.modified_at).getTime() : 0;
|
|
465
|
+
const dateB = b.modified_at ? new Date(b.modified_at).getTime() : 0;
|
|
466
|
+
return dateB - dateA;
|
|
467
|
+
});
|
|
468
|
+
|
|
469
|
+
const data = models.flatMap((model) => {
|
|
470
|
+
const baseId = model.model || model.name;
|
|
471
|
+
if (!baseId) {
|
|
472
|
+
return [];
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const alias = findAliasForModel(baseId);
|
|
476
|
+
const baseRecord = {
|
|
477
|
+
id: baseId,
|
|
478
|
+
type: "model",
|
|
479
|
+
display_name: baseId,
|
|
480
|
+
created_at: model.modified_at || new Date().toISOString(),
|
|
481
|
+
};
|
|
482
|
+
|
|
483
|
+
if (!alias) {
|
|
484
|
+
return [baseRecord];
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
return [
|
|
488
|
+
{
|
|
489
|
+
...baseRecord,
|
|
490
|
+
id: alias,
|
|
491
|
+
display_name: `${alias} -> ${baseId}`,
|
|
492
|
+
},
|
|
493
|
+
baseRecord,
|
|
494
|
+
];
|
|
495
|
+
});
|
|
496
|
+
|
|
497
|
+
const response = { data };
|
|
498
|
+
|
|
499
|
+
// Invalidate cache or save fresh data
|
|
500
|
+
modelCache.del(cacheKey);
|
|
501
|
+
setInCache(modelCache, cacheKey, response);
|
|
502
|
+
|
|
503
|
+
sendJson(res, 200, response);
|
|
504
|
+
} catch (error) {
|
|
505
|
+
logger.error(`Error fetching models: ${error.message}`);
|
|
506
|
+
incrementError('upstream');
|
|
507
|
+
sendJson(res, 500, {
|
|
508
|
+
type: "error",
|
|
509
|
+
error: {
|
|
510
|
+
type: "upstream_error",
|
|
511
|
+
message: "Failed to fetch models from upstream"
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
async function handleMessages(req, res) {
|
|
518
|
+
try {
|
|
519
|
+
const body = await readJson(req);
|
|
520
|
+
|
|
521
|
+
// Valida input
|
|
522
|
+
const validatedBody = validateInput(MessageSchema, body);
|
|
523
|
+
|
|
524
|
+
const anthropicRequest = normalizeAnthropicRequest(validatedBody);
|
|
525
|
+
logger.info(`[bridge] /v1/messages requested_model=${anthropicRequest.requestedModel} resolved_model=${anthropicRequest.model} stream=${anthropicRequest.stream}`);
|
|
526
|
+
|
|
527
|
+
const ollamaRequest = anthropicToOllamaRequest(anthropicRequest);
|
|
528
|
+
|
|
529
|
+
const { response: upstream, apiKey } = await fetchUpstream("/api/chat", {
|
|
530
|
+
method: "POST",
|
|
531
|
+
headers: {
|
|
532
|
+
"content-type": "application/json",
|
|
533
|
+
},
|
|
534
|
+
body: JSON.stringify(ollamaRequest),
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
if (!upstream.ok) {
|
|
538
|
+
await proxyError(upstream, res);
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
if (anthropicRequest.stream) {
|
|
543
|
+
await pipeStreamingResponse(upstream, anthropicRequest, res, apiKey);
|
|
544
|
+
return;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
const payload = await upstream.json();
|
|
548
|
+
const message = ollamaToAnthropicMessage(payload, anthropicRequest.model);
|
|
549
|
+
|
|
550
|
+
// Registra token usage con la chiave specifica
|
|
551
|
+
usageTracker.recordUsage(apiKey, message.usage?.output_tokens || 0);
|
|
552
|
+
recordTokenUsage(
|
|
553
|
+
anthropicRequest.model,
|
|
554
|
+
message.usage?.input_tokens || 0,
|
|
555
|
+
message.usage?.output_tokens || 0
|
|
556
|
+
);
|
|
557
|
+
|
|
558
|
+
sendJson(res, 200, message);
|
|
559
|
+
} catch (error) {
|
|
560
|
+
if (error.message.includes('Validation failed')) {
|
|
561
|
+
incrementError('validation');
|
|
562
|
+
sendJson(res, 400, {
|
|
563
|
+
type: "error",
|
|
564
|
+
error: {
|
|
565
|
+
type: "invalid_request_error",
|
|
566
|
+
message: error.message
|
|
567
|
+
}
|
|
568
|
+
});
|
|
569
|
+
} else {
|
|
570
|
+
throw error;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
async function handleCountTokens(req, res) {
|
|
576
|
+
try {
|
|
577
|
+
const body = await readJson(req);
|
|
578
|
+
const validatedBody = validateInput(CountTokensSchema, body);
|
|
579
|
+
|
|
580
|
+
const requestedModel = validatedBody.model || config.defaultModel;
|
|
581
|
+
const resolvedModel = resolveModelAlias(requestedModel);
|
|
582
|
+
logger.info(`[bridge] /v1/messages/count_tokens requested_model=${requestedModel} resolved_model=${resolvedModel}`);
|
|
583
|
+
|
|
584
|
+
const inputText = JSON.stringify(validatedBody.messages || []);
|
|
585
|
+
const systemText = typeof validatedBody.system === "string" ? validatedBody.system : JSON.stringify(validatedBody.system || "");
|
|
586
|
+
const tokenCount = estimateTokens(`${systemText}\n${inputText}`);
|
|
587
|
+
|
|
588
|
+
sendJson(res, 200, { input_tokens: tokenCount });
|
|
589
|
+
} catch (error) {
|
|
590
|
+
if (error.message.includes('Validation failed')) {
|
|
591
|
+
incrementError('validation');
|
|
592
|
+
sendJson(res, 400, {
|
|
593
|
+
type: "error",
|
|
594
|
+
error: {
|
|
595
|
+
type: "invalid_request_error",
|
|
596
|
+
message: error.message
|
|
597
|
+
}
|
|
598
|
+
});
|
|
599
|
+
} else {
|
|
600
|
+
throw error;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// Resto delle funzioni helper (abbreviate per brevità)
|
|
606
|
+
function normalizeAnthropicRequest(body) {
|
|
607
|
+
const requestedModel = body.model || config.defaultModel;
|
|
608
|
+
return {
|
|
609
|
+
requestedModel,
|
|
610
|
+
model: resolveModelAlias(requestedModel),
|
|
611
|
+
max_tokens: body.max_tokens || 4096,
|
|
612
|
+
messages: Array.isArray(body.messages) ? body.messages : [],
|
|
613
|
+
system: body.system,
|
|
614
|
+
stream: Boolean(body.stream),
|
|
615
|
+
tools: Array.isArray(body.tools) ? body.tools : [],
|
|
616
|
+
thinking: body.thinking,
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
function anthropicToOllamaRequest(request) {
|
|
621
|
+
return {
|
|
622
|
+
model: request.model,
|
|
623
|
+
stream: request.stream,
|
|
624
|
+
think: request.thinking ? true : undefined,
|
|
625
|
+
tools: request.tools.map((tool) => {
|
|
626
|
+
if (tool.type === "custom") {
|
|
627
|
+
return tool;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return {
|
|
631
|
+
type: "function",
|
|
632
|
+
function: {
|
|
633
|
+
name: tool.name,
|
|
634
|
+
description: tool.description || "",
|
|
635
|
+
parameters: tool.input_schema || {
|
|
636
|
+
type: "object",
|
|
637
|
+
properties: {},
|
|
638
|
+
},
|
|
639
|
+
},
|
|
640
|
+
};
|
|
641
|
+
}),
|
|
642
|
+
messages: anthropicMessagesToOllamaMessages(request.messages, request.system),
|
|
643
|
+
options: {
|
|
644
|
+
num_predict: request.max_tokens,
|
|
645
|
+
},
|
|
646
|
+
};
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
function anthropicMessagesToOllamaMessages(messages, system) {
|
|
650
|
+
const result = [];
|
|
651
|
+
|
|
652
|
+
if (system) {
|
|
653
|
+
if (typeof system === "string") {
|
|
654
|
+
result.push({ role: "system", content: system });
|
|
655
|
+
} else if (Array.isArray(system)) {
|
|
656
|
+
const content = system
|
|
657
|
+
.filter((item) => item?.type === "text")
|
|
658
|
+
.map((item) => item.text)
|
|
659
|
+
.join("\n");
|
|
660
|
+
|
|
661
|
+
if (content) {
|
|
662
|
+
result.push({ role: "system", content });
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
for (const message of messages) {
|
|
668
|
+
const content = Array.isArray(message.content)
|
|
669
|
+
? message.content
|
|
670
|
+
: [{ type: "text", text: String(message.content || "") }];
|
|
671
|
+
|
|
672
|
+
const textParts = [];
|
|
673
|
+
const toolCalls = [];
|
|
674
|
+
|
|
675
|
+
for (const block of content) {
|
|
676
|
+
switch (block.type) {
|
|
677
|
+
case "text":
|
|
678
|
+
textParts.push(block.text || "");
|
|
679
|
+
break;
|
|
680
|
+
case "tool_use":
|
|
681
|
+
toolCalls.push({
|
|
682
|
+
type: "function",
|
|
683
|
+
function: {
|
|
684
|
+
name: block.name,
|
|
685
|
+
arguments: block.input || {},
|
|
686
|
+
},
|
|
687
|
+
});
|
|
688
|
+
break;
|
|
689
|
+
case "tool_result":
|
|
690
|
+
result.push({
|
|
691
|
+
role: "tool",
|
|
692
|
+
tool_name: block.tool_use_id || block.name || "tool",
|
|
693
|
+
content: flattenToolResultContent(block.content),
|
|
694
|
+
});
|
|
695
|
+
break;
|
|
696
|
+
default:
|
|
697
|
+
break;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
const normalized = {
|
|
702
|
+
role: message.role,
|
|
703
|
+
content: textParts.join("\n"),
|
|
704
|
+
};
|
|
705
|
+
|
|
706
|
+
if (toolCalls.length) {
|
|
707
|
+
normalized.tool_calls = toolCalls;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
if (normalized.role === "assistant" || normalized.role === "user") {
|
|
711
|
+
result.push(normalized);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
return result;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
function flattenToolResultContent(content) {
|
|
719
|
+
if (typeof content === "string") {
|
|
720
|
+
return content;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
if (Array.isArray(content)) {
|
|
724
|
+
return content
|
|
725
|
+
.map((item) => {
|
|
726
|
+
if (typeof item === "string") {
|
|
727
|
+
return item;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
if (item?.type === "text") {
|
|
731
|
+
return item.text || "";
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
return JSON.stringify(item);
|
|
735
|
+
})
|
|
736
|
+
.join("\n");
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
if (content == null) {
|
|
740
|
+
return "";
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
return JSON.stringify(content);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
function ollamaToAnthropicMessage(payload, requestedModel) {
|
|
747
|
+
const toolCalls = Array.isArray(payload.message?.tool_calls) ? payload.message.tool_calls : [];
|
|
748
|
+
const text = payload.message?.content || "";
|
|
749
|
+
const content = [];
|
|
750
|
+
|
|
751
|
+
if (text) {
|
|
752
|
+
content.push({
|
|
753
|
+
type: "text",
|
|
754
|
+
text,
|
|
755
|
+
});
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
for (const toolCall of toolCalls) {
|
|
759
|
+
const id = `toolu_${randomUUID().replace(/-/g, "")}`;
|
|
760
|
+
content.push({
|
|
761
|
+
type: "tool_use",
|
|
762
|
+
id,
|
|
763
|
+
name: toolCall.function?.name || "tool",
|
|
764
|
+
input: toolCall.function?.arguments || {},
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// Traccia l'usage con i token reali
|
|
769
|
+
const inputTokens = payload.prompt_eval_count || 0;
|
|
770
|
+
const outputTokens = payload.eval_count || 0;
|
|
771
|
+
|
|
772
|
+
// Qui dovremmo avere l'API key usata, ma per ora usiamo un approccio diverso
|
|
773
|
+
// Lo tracceremo a livello superiore
|
|
774
|
+
|
|
775
|
+
return {
|
|
776
|
+
id: `msg_${randomUUID().replace(/-/g, "")}`,
|
|
777
|
+
type: "message",
|
|
778
|
+
role: "assistant",
|
|
779
|
+
model: requestedModel,
|
|
780
|
+
content,
|
|
781
|
+
stop_reason: toolCalls.length ? "tool_use" : mapDoneReason(payload.done_reason),
|
|
782
|
+
stop_sequence: null,
|
|
783
|
+
usage: {
|
|
784
|
+
input_tokens: inputTokens,
|
|
785
|
+
output_tokens: outputTokens,
|
|
786
|
+
},
|
|
787
|
+
};
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
async function pipeStreamingResponse(upstream, request, res, apiKey) {
|
|
791
|
+
res.writeHead(200, {
|
|
792
|
+
"content-type": "text/event-stream",
|
|
793
|
+
"cache-control": "no-cache, no-transform",
|
|
794
|
+
connection: "keep-alive",
|
|
795
|
+
"x-accel-buffering": "no",
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
const decoder = new TextDecoder();
|
|
799
|
+
const reader = upstream.body.getReader();
|
|
800
|
+
|
|
801
|
+
let buffer = "";
|
|
802
|
+
let started = false;
|
|
803
|
+
let outputTokens = 0;
|
|
804
|
+
let inputTokens = 0;
|
|
805
|
+
let aggregatedText = "";
|
|
806
|
+
let toolCalls = [];
|
|
807
|
+
|
|
808
|
+
const messageId = `msg_${randomUUID().replace(/-/g, "")}`;
|
|
809
|
+
|
|
810
|
+
emitAnthropicEvent(res, {
|
|
811
|
+
type: "message_start",
|
|
812
|
+
message: {
|
|
813
|
+
id: messageId,
|
|
814
|
+
type: "message",
|
|
815
|
+
role: "assistant",
|
|
816
|
+
model: request.model,
|
|
817
|
+
content: [],
|
|
818
|
+
stop_reason: null,
|
|
819
|
+
stop_sequence: null,
|
|
820
|
+
usage: {
|
|
821
|
+
input_tokens: 0,
|
|
822
|
+
output_tokens: 0,
|
|
823
|
+
},
|
|
824
|
+
},
|
|
825
|
+
});
|
|
826
|
+
|
|
827
|
+
try {
|
|
828
|
+
while (true) {
|
|
829
|
+
const { value, done } = await reader.read();
|
|
830
|
+
if (done) {
|
|
831
|
+
break;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
buffer += decoder.decode(value, { stream: true });
|
|
835
|
+
const lines = buffer.split(/\r?\n/);
|
|
836
|
+
buffer = lines.pop() || "";
|
|
837
|
+
|
|
838
|
+
for (const line of lines) {
|
|
839
|
+
const trimmed = line.trim();
|
|
840
|
+
if (!trimmed) {
|
|
841
|
+
continue;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
try {
|
|
845
|
+
const chunk = JSON.parse(trimmed);
|
|
846
|
+
const message = chunk.message || {};
|
|
847
|
+
inputTokens = chunk.prompt_eval_count || inputTokens;
|
|
848
|
+
outputTokens = chunk.eval_count || outputTokens;
|
|
849
|
+
|
|
850
|
+
if (message.content && !started) {
|
|
851
|
+
started = true;
|
|
852
|
+
emitAnthropicEvent(res, {
|
|
853
|
+
type: "content_block_start",
|
|
854
|
+
index: 0,
|
|
855
|
+
content_block: {
|
|
856
|
+
type: "text",
|
|
857
|
+
text: "",
|
|
858
|
+
},
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
if (message.content) {
|
|
863
|
+
aggregatedText += message.content;
|
|
864
|
+
emitAnthropicEvent(res, {
|
|
865
|
+
type: "content_block_delta",
|
|
866
|
+
index: 0,
|
|
867
|
+
delta: {
|
|
868
|
+
type: "text_delta",
|
|
869
|
+
text: message.content,
|
|
870
|
+
},
|
|
871
|
+
});
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
if (Array.isArray(message.tool_calls) && message.tool_calls.length) {
|
|
875
|
+
toolCalls = message.tool_calls;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
if (chunk.done) {
|
|
879
|
+
if (started) {
|
|
880
|
+
emitAnthropicEvent(res, {
|
|
881
|
+
type: "content_block_stop",
|
|
882
|
+
index: 0,
|
|
883
|
+
});
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
let nextIndex = started ? 1 : 0;
|
|
887
|
+
for (const toolCall of toolCalls) {
|
|
888
|
+
const input = toolCall.function?.arguments || {};
|
|
889
|
+
const toolId = `toolu_${randomUUID().replace(/-/g, "")}`;
|
|
890
|
+
emitAnthropicEvent(res, {
|
|
891
|
+
type: "content_block_start",
|
|
892
|
+
index: nextIndex,
|
|
893
|
+
content_block: {
|
|
894
|
+
type: "tool_use",
|
|
895
|
+
id: toolId,
|
|
896
|
+
name: toolCall.function?.name || "tool",
|
|
897
|
+
input: {},
|
|
898
|
+
},
|
|
899
|
+
});
|
|
900
|
+
emitAnthropicEvent(res, {
|
|
901
|
+
type: "content_block_delta",
|
|
902
|
+
index: nextIndex,
|
|
903
|
+
delta: {
|
|
904
|
+
type: "input_json_delta",
|
|
905
|
+
partial_json: JSON.stringify(input),
|
|
906
|
+
},
|
|
907
|
+
});
|
|
908
|
+
emitAnthropicEvent(res, {
|
|
909
|
+
type: "content_block_stop",
|
|
910
|
+
index: nextIndex,
|
|
911
|
+
});
|
|
912
|
+
nextIndex += 1;
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
// Registra token usage con la chiave specifica (anche per streaming)
|
|
916
|
+
usageTracker.recordUsage(apiKey, outputTokens || estimateTokens(aggregatedText));
|
|
917
|
+
recordTokenUsage(
|
|
918
|
+
request.model,
|
|
919
|
+
inputTokens,
|
|
920
|
+
outputTokens || estimateTokens(aggregatedText)
|
|
921
|
+
);
|
|
922
|
+
|
|
923
|
+
emitAnthropicEvent(res, {
|
|
924
|
+
type: "message_delta",
|
|
925
|
+
delta: {
|
|
926
|
+
stop_reason: toolCalls.length ? "tool_use" : mapDoneReason(chunk.done_reason),
|
|
927
|
+
stop_sequence: null,
|
|
928
|
+
},
|
|
929
|
+
usage: {
|
|
930
|
+
output_tokens: outputTokens || estimateTokens(aggregatedText),
|
|
931
|
+
},
|
|
932
|
+
});
|
|
933
|
+
|
|
934
|
+
emitAnthropicEvent(res, {
|
|
935
|
+
type: "message_stop",
|
|
936
|
+
});
|
|
937
|
+
}
|
|
938
|
+
} catch (parseError) {
|
|
939
|
+
logger.error(`Error parsing streaming chunk: ${parseError.message}`);
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
} catch (error) {
|
|
944
|
+
logger.error(`Streaming error: ${error.message}`);
|
|
945
|
+
if (!res.writableEnded) {
|
|
946
|
+
try {
|
|
947
|
+
emitAnthropicEvent(res, {
|
|
948
|
+
type: "message_delta",
|
|
949
|
+
delta: {
|
|
950
|
+
stop_reason: "end_turn",
|
|
951
|
+
stop_sequence: null,
|
|
952
|
+
},
|
|
953
|
+
usage: {
|
|
954
|
+
output_tokens: outputTokens || estimateTokens(aggregatedText),
|
|
955
|
+
},
|
|
956
|
+
});
|
|
957
|
+
emitAnthropicEvent(res, {
|
|
958
|
+
type: "message_stop",
|
|
959
|
+
});
|
|
960
|
+
} catch (emitError) {
|
|
961
|
+
logger.error(`Failed to emit terminal streaming event: ${emitError.message}`);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
} finally {
|
|
965
|
+
reader.releaseLock();
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
if (!res.writableEnded) {
|
|
969
|
+
res.end();
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
function emitAnthropicEvent(res, payload) {
|
|
974
|
+
res.write(`event: ${payload.type}\n`);
|
|
975
|
+
res.write(`data: ${JSON.stringify(payload)}\n\n`);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
function mapDoneReason(reason) {
|
|
979
|
+
switch (reason) {
|
|
980
|
+
case "stop":
|
|
981
|
+
case "done":
|
|
982
|
+
return "end_turn";
|
|
983
|
+
case "length":
|
|
984
|
+
return "max_tokens";
|
|
985
|
+
default:
|
|
986
|
+
return "end_turn";
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
async function fetchUpstream(pathname, init) {
|
|
991
|
+
// Controlla prima se ci sono chiavi rate limited e suggerisci la migliore
|
|
992
|
+
const suggestedKey = usageTracker.suggestNextKey(config.apiKeys);
|
|
993
|
+
if (!suggestedKey) {
|
|
994
|
+
throw new Error('All API keys are rate limited');
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
const apiKey = suggestedKey;
|
|
998
|
+
|
|
999
|
+
try {
|
|
1000
|
+
const response = await performUpstreamFetch(apiKey, pathname, init);
|
|
1001
|
+
|
|
1002
|
+
// Traccia l'usage (indipendentemente dal successo)
|
|
1003
|
+
usageTracker.recordUsage(apiKey, 0); // Tokens verranno aggiornati dopo
|
|
1004
|
+
|
|
1005
|
+
// Controlla risposte di rate limit
|
|
1006
|
+
if (response.status === 429) {
|
|
1007
|
+
let detailsText = '';
|
|
1008
|
+
let reason = 'Rate limit reached';
|
|
1009
|
+
let resetTime = null;
|
|
1010
|
+
|
|
1011
|
+
try {
|
|
1012
|
+
detailsText = await response.text();
|
|
1013
|
+
if (detailsText) {
|
|
1014
|
+
try {
|
|
1015
|
+
const errorData = JSON.parse(detailsText);
|
|
1016
|
+
reason = errorData.error || errorData.message || reason;
|
|
1017
|
+
if (typeof errorData.reset_after === 'number') {
|
|
1018
|
+
resetTime = Date.now() + (errorData.reset_after * 1000);
|
|
1019
|
+
}
|
|
1020
|
+
} catch {
|
|
1021
|
+
reason = detailsText;
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
} catch (parseError) {
|
|
1025
|
+
logger.error(`Failed to read rate limit response: ${parseError.message}`);
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
usageTracker.markRateLimited(apiKey, resetTime, reason);
|
|
1029
|
+
|
|
1030
|
+
if (typeof resetTime === 'number') {
|
|
1031
|
+
logger.warn(`Rate limit detected for key ${apiKey.substring(0, 20)}... Reset in ${Math.ceil((resetTime - Date.now()) / 60000)} minutes`);
|
|
1032
|
+
} else {
|
|
1033
|
+
logger.warn(`Rate limit detected for key ${apiKey.substring(0, 20)}... Reset time not provided by upstream`);
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return fetchUpstream(pathname, init);
|
|
1037
|
+
} else if (!response.ok && response.status >= 500) {
|
|
1038
|
+
keyManager.markKeyFailed(apiKey, new Error(`HTTP ${response.status}: ${response.statusText}`));
|
|
1039
|
+
} else if (response.ok) {
|
|
1040
|
+
usageTracker.markHealthy(apiKey);
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
// Restituisci sia la response che la chiave usata
|
|
1044
|
+
return { response, apiKey };
|
|
1045
|
+
} catch (error) {
|
|
1046
|
+
keyManager.markKeyFailed(suggestedKey, error);
|
|
1047
|
+
usageTracker.markUnhealthy(suggestedKey, error.message);
|
|
1048
|
+
throw error;
|
|
1049
|
+
}
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
async function performUpstreamFetch(apiKey, pathname, init) {
|
|
1053
|
+
const url = `${config.upstreamBaseUrl}${pathname}`;
|
|
1054
|
+
const headers = new Headers(init?.headers || {});
|
|
1055
|
+
headers.set("accept", headers.get("accept") || "application/json");
|
|
1056
|
+
headers.set("authorization", `Bearer ${apiKey}`);
|
|
1057
|
+
|
|
1058
|
+
const agent = getAgent(url);
|
|
1059
|
+
|
|
1060
|
+
return fetch(url, {
|
|
1061
|
+
...init,
|
|
1062
|
+
headers,
|
|
1063
|
+
agent,
|
|
1064
|
+
});
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
function writeApiKeysToEnvFile(keys) {
|
|
1068
|
+
const existing = fs.existsSync(envFilePath)
|
|
1069
|
+
? fs.readFileSync(envFilePath, "utf8")
|
|
1070
|
+
: "";
|
|
1071
|
+
|
|
1072
|
+
const normalizedKeys = keys.map((entry) => entry.trim()).filter(Boolean);
|
|
1073
|
+
const nextLine = `OLLAMA_API_KEYS=${normalizedKeys.join(",")}`;
|
|
1074
|
+
const lines = existing.split(/\r?\n/);
|
|
1075
|
+
let replaced = false;
|
|
1076
|
+
const output = lines.map((line) => {
|
|
1077
|
+
if (/^\s*OLLAMA_API_KEYS=/.test(line) || /^\s*OLLAMA_API_KEY=/.test(line)) {
|
|
1078
|
+
if (!replaced) {
|
|
1079
|
+
replaced = true;
|
|
1080
|
+
return nextLine;
|
|
1081
|
+
}
|
|
1082
|
+
return null;
|
|
1083
|
+
}
|
|
1084
|
+
return line;
|
|
1085
|
+
}).filter((line) => line !== null);
|
|
1086
|
+
|
|
1087
|
+
if (!replaced) {
|
|
1088
|
+
output.push(nextLine);
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
const content = output.join("\n").replace(/\n{3,}/g, "\n\n").trimEnd() + "\n";
|
|
1092
|
+
fs.writeFileSync(envFilePath, content, "utf8");
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
async function probeSingleApiKey(apiKey) {
|
|
1096
|
+
try {
|
|
1097
|
+
const response = await performUpstreamFetch(apiKey, "/api/tags", {
|
|
1098
|
+
method: "GET",
|
|
1099
|
+
});
|
|
1100
|
+
|
|
1101
|
+
if (response.status === 429) {
|
|
1102
|
+
let reason = 'Rate limit reached';
|
|
1103
|
+
let resetTime = null;
|
|
1104
|
+
|
|
1105
|
+
try {
|
|
1106
|
+
const detailsText = await response.text();
|
|
1107
|
+
if (detailsText) {
|
|
1108
|
+
try {
|
|
1109
|
+
const errorData = JSON.parse(detailsText);
|
|
1110
|
+
reason = errorData.error || errorData.message || reason;
|
|
1111
|
+
if (typeof errorData.reset_after === 'number') {
|
|
1112
|
+
resetTime = Date.now() + (errorData.reset_after * 1000);
|
|
1113
|
+
}
|
|
1114
|
+
} catch {
|
|
1115
|
+
reason = detailsText;
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
} catch (error) {
|
|
1119
|
+
logger.error(`Failed reading probe 429 body: ${error.message}`);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
usageTracker.markRateLimited(apiKey, resetTime, reason);
|
|
1123
|
+
return;
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
if (!response.ok) {
|
|
1127
|
+
const reason = `HTTP ${response.status}: ${response.statusText}`;
|
|
1128
|
+
keyManager.markKeyFailed(apiKey, new Error(reason));
|
|
1129
|
+
usageTracker.markUnhealthy(apiKey, reason);
|
|
1130
|
+
return;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
usageTracker.markHealthy(apiKey);
|
|
1134
|
+
} catch (error) {
|
|
1135
|
+
keyManager.markKeyFailed(apiKey, error);
|
|
1136
|
+
usageTracker.markUnhealthy(apiKey, error.message);
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
async function probeAllApiKeys() {
|
|
1141
|
+
logger.info(`Starting API key probe for ${config.apiKeys.length} keys`);
|
|
1142
|
+
|
|
1143
|
+
for (const apiKey of config.apiKeys) {
|
|
1144
|
+
await probeSingleApiKey(apiKey);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
logger.info('API key probe completed');
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
async function proxyError(upstream, res) {
|
|
1151
|
+
let details;
|
|
1152
|
+
try {
|
|
1153
|
+
details = await upstream.text();
|
|
1154
|
+
} catch {
|
|
1155
|
+
details = upstream.statusText;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
logger.error(`Upstream error: ${upstream.status} ${details}`);
|
|
1159
|
+
incrementError('upstream');
|
|
1160
|
+
|
|
1161
|
+
sendJson(res, upstream.status, {
|
|
1162
|
+
type: "error",
|
|
1163
|
+
error: {
|
|
1164
|
+
type: "upstream_error",
|
|
1165
|
+
message: details || upstream.statusText,
|
|
1166
|
+
},
|
|
1167
|
+
});
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
async function readJson(req) {
|
|
1171
|
+
const chunks = [];
|
|
1172
|
+
|
|
1173
|
+
for await (const chunk of req) {
|
|
1174
|
+
chunks.push(chunk);
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
const raw = Buffer.concat(chunks).toString("utf8");
|
|
1178
|
+
return raw ? JSON.parse(raw) : {};
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
function sendJson(res, statusCode, payload) {
|
|
1182
|
+
if (res.writableEnded) {
|
|
1183
|
+
return;
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
if (res.headersSent) {
|
|
1187
|
+
try {
|
|
1188
|
+
res.end(JSON.stringify(payload));
|
|
1189
|
+
} catch {
|
|
1190
|
+
// Ignore late-write attempts on already-started responses.
|
|
1191
|
+
}
|
|
1192
|
+
return;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
res.writeHead(statusCode, {
|
|
1196
|
+
"content-type": "application/json; charset=utf-8",
|
|
1197
|
+
});
|
|
1198
|
+
res.end(JSON.stringify(payload));
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
function resolveModelAlias(model) {
|
|
1202
|
+
if (config.aliases[model]) {
|
|
1203
|
+
return config.aliases[model];
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
const normalized = String(model || "").toLowerCase();
|
|
1207
|
+
|
|
1208
|
+
if (
|
|
1209
|
+
normalized === "default" ||
|
|
1210
|
+
normalized === "sonnet" ||
|
|
1211
|
+
normalized === "opus" ||
|
|
1212
|
+
normalized.startsWith("claude-sonnet") ||
|
|
1213
|
+
normalized.startsWith("claude-opus")
|
|
1214
|
+
) {
|
|
1215
|
+
return config.aliases["ollama-free-auto"] || config.defaultModel;
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
if (
|
|
1219
|
+
normalized === "haiku" ||
|
|
1220
|
+
normalized.startsWith("claude-haiku")
|
|
1221
|
+
) {
|
|
1222
|
+
return (
|
|
1223
|
+
config.aliases["ollama-free-fast"] ||
|
|
1224
|
+
config.aliases["ollama-free-auto"] ||
|
|
1225
|
+
config.defaultModel
|
|
1226
|
+
);
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
return model;
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1232
|
+
function findAliasForModel(model) {
|
|
1233
|
+
return Object.entries(config.aliases).find(([, target]) => target === model)?.[0];
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
function estimateTokens(text) {
|
|
1237
|
+
return Math.max(1, Math.ceil((text || "").length / 4));
|
|
1238
|
+
}
|