@agentgazer/proxy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,898 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.startProxy = startProxy;
37
+ const http = __importStar(require("node:http"));
38
+ const shared_1 = require("@agentgazer/shared");
39
+ const server_1 = require("@agentgazer/server");
40
+ const modelOverrideCache = {};
41
+ const MODEL_OVERRIDE_CACHE_TTL_MS = 30_000; // 30 seconds
42
+ function getModelOverride(db, agentId, provider) {
43
+ if (!db)
44
+ return null;
45
+ const cacheKey = `${agentId}:${provider}`;
46
+ const cached = modelOverrideCache[cacheKey];
47
+ if (cached && cached.expiresAt > Date.now()) {
48
+ return cached.model_override;
49
+ }
50
+ // Fetch from DB
51
+ const rule = (0, server_1.getModelRule)(db, agentId, provider);
52
+ const modelOverride = rule?.model_override ?? null;
53
+ // Cache the result
54
+ modelOverrideCache[cacheKey] = {
55
+ model_override: modelOverride,
56
+ expiresAt: Date.now() + MODEL_OVERRIDE_CACHE_TTL_MS,
57
+ };
58
+ return modelOverride;
59
+ }
60
+ const log = (0, shared_1.createLogger)("proxy");
61
+ const event_buffer_js_1 = require("./event-buffer.js");
62
+ const rate_limiter_js_1 = require("./rate-limiter.js");
63
+ const DEFAULT_PORT = 4000;
64
+ const DEFAULT_ENDPOINT = "https://ingest.agentgazer.com/v1/events";
65
+ const DEFAULT_FLUSH_INTERVAL = 5000;
66
+ const DEFAULT_MAX_BUFFER_SIZE = 50;
67
+ const MAX_REQUEST_BODY_SIZE = 10 * 1024 * 1024; // 10 MB
68
+ const MAX_SSE_BUFFER_SIZE = 50 * 1024 * 1024; // 50 MB
69
+ const UPSTREAM_TIMEOUT_MS = 120_000; // 2 minutes
70
+ const RATE_LIMIT_REFRESH_INTERVAL_MS = 30_000; // 30 seconds
71
+ function readRequestBody(req) {
72
+ return new Promise((resolve, reject) => {
73
+ const chunks = [];
74
+ let totalSize = 0;
75
+ req.on("data", (chunk) => {
76
+ totalSize += chunk.length;
77
+ if (totalSize > MAX_REQUEST_BODY_SIZE) {
78
+ const err = new Error("Request body too large");
79
+ req.destroy(err);
80
+ reject(err);
81
+ return;
82
+ }
83
+ chunks.push(chunk);
84
+ });
85
+ req.on("end", () => resolve(Buffer.concat(chunks)));
86
+ req.on("error", reject);
87
+ });
88
+ }
89
+ function sendJson(res, statusCode, body) {
90
+ const payload = JSON.stringify(body);
91
+ res.writeHead(statusCode, {
92
+ "Content-Type": "application/json",
93
+ "Content-Length": Buffer.byteLength(payload),
94
+ });
95
+ res.end(payload);
96
+ }
97
+ // ---------------------------------------------------------------------------
98
+ // SSE streaming parsers — extract usage/model from provider-specific formats
99
+ // ---------------------------------------------------------------------------
100
+ function parseOpenAISSE(dataLines, statusCode) {
101
+ let model = null;
102
+ let tokensIn = null;
103
+ let tokensOut = null;
104
+ let tokensTotal = null;
105
+ for (const line of dataLines) {
106
+ try {
107
+ const data = JSON.parse(line);
108
+ if (data.model)
109
+ model = data.model;
110
+ if (data.usage) {
111
+ tokensIn = data.usage.prompt_tokens ?? null;
112
+ tokensOut = data.usage.completion_tokens ?? null;
113
+ tokensTotal = data.usage.total_tokens ?? null;
114
+ }
115
+ }
116
+ catch {
117
+ continue;
118
+ }
119
+ }
120
+ return {
121
+ model,
122
+ tokensIn,
123
+ tokensOut,
124
+ tokensTotal,
125
+ statusCode,
126
+ errorMessage: null,
127
+ };
128
+ }
129
+ function parseAnthropicSSE(dataLines, statusCode) {
130
+ let model = null;
131
+ let tokensIn = null;
132
+ let tokensOut = null;
133
+ for (const line of dataLines) {
134
+ try {
135
+ const data = JSON.parse(line);
136
+ if (data.type === "message_start" && data.message) {
137
+ model = data.message.model ?? null;
138
+ tokensIn = data.message.usage?.input_tokens ?? null;
139
+ }
140
+ if (data.type === "message_delta" && data.usage) {
141
+ tokensOut = data.usage.output_tokens ?? null;
142
+ }
143
+ }
144
+ catch {
145
+ continue;
146
+ }
147
+ }
148
+ const tokensTotal = tokensIn != null && tokensOut != null ? tokensIn + tokensOut : null;
149
+ return {
150
+ model,
151
+ tokensIn,
152
+ tokensOut,
153
+ tokensTotal,
154
+ statusCode,
155
+ errorMessage: null,
156
+ };
157
+ }
158
+ function parseGoogleSSE(dataLines, statusCode) {
159
+ let model = null;
160
+ let tokensIn = null;
161
+ let tokensOut = null;
162
+ let tokensTotal = null;
163
+ for (const line of dataLines) {
164
+ try {
165
+ const data = JSON.parse(line);
166
+ if (data.modelVersion)
167
+ model = data.modelVersion;
168
+ if (data.usageMetadata) {
169
+ tokensIn = data.usageMetadata.promptTokenCount ?? null;
170
+ tokensOut = data.usageMetadata.candidatesTokenCount ?? null;
171
+ tokensTotal = data.usageMetadata.totalTokenCount ?? null;
172
+ }
173
+ }
174
+ catch {
175
+ continue;
176
+ }
177
+ }
178
+ return {
179
+ model,
180
+ tokensIn,
181
+ tokensOut,
182
+ tokensTotal,
183
+ statusCode,
184
+ errorMessage: null,
185
+ };
186
+ }
187
+ function parseCohereSSE(dataLines, statusCode) {
188
+ let tokensIn = null;
189
+ let tokensOut = null;
190
+ for (const line of dataLines) {
191
+ try {
192
+ const data = JSON.parse(line);
193
+ if (data.meta?.billed_units) {
194
+ tokensIn = data.meta.billed_units.input_tokens ?? null;
195
+ tokensOut = data.meta.billed_units.output_tokens ?? null;
196
+ }
197
+ // Cohere v2 chat streaming uses response.meta at the end
198
+ if (data.response?.meta?.billed_units) {
199
+ tokensIn = data.response.meta.billed_units.input_tokens ?? null;
200
+ tokensOut = data.response.meta.billed_units.output_tokens ?? null;
201
+ }
202
+ }
203
+ catch {
204
+ continue;
205
+ }
206
+ }
207
+ const tokensTotal = tokensIn != null && tokensOut != null ? tokensIn + tokensOut : null;
208
+ return {
209
+ model: null,
210
+ tokensIn,
211
+ tokensOut,
212
+ tokensTotal,
213
+ statusCode,
214
+ errorMessage: null,
215
+ };
216
+ }
217
+ function parseSSEResponse(provider, sseText, statusCode) {
218
+ const lines = sseText.split("\n");
219
+ const dataLines = [];
220
+ for (const line of lines) {
221
+ if (line.startsWith("data: ") && line !== "data: [DONE]") {
222
+ dataLines.push(line.slice(6));
223
+ }
224
+ }
225
+ if (dataLines.length === 0)
226
+ return null;
227
+ switch (provider) {
228
+ case "openai":
229
+ case "mistral":
230
+ case "deepseek":
231
+ case "moonshot":
232
+ case "zhipu":
233
+ case "minimax":
234
+ case "baichuan":
235
+ case "yi":
236
+ return parseOpenAISSE(dataLines, statusCode);
237
+ case "anthropic":
238
+ return parseAnthropicSSE(dataLines, statusCode);
239
+ case "google":
240
+ return parseGoogleSSE(dataLines, statusCode);
241
+ case "cohere":
242
+ return parseCohereSSE(dataLines, statusCode);
243
+ default:
244
+ return null;
245
+ }
246
+ }
247
+ function checkAgentPolicy(db, agentId) {
248
+ if (!db) {
249
+ // No DB means no policy enforcement (backwards compatible)
250
+ return { allowed: true };
251
+ }
252
+ const policy = (0, server_1.getAgentPolicy)(db, agentId);
253
+ if (!policy) {
254
+ // Agent doesn't exist yet or no policy — allow by default
255
+ return { allowed: true };
256
+ }
257
+ // Check if agent is active
258
+ if (!policy.active) {
259
+ return {
260
+ allowed: false,
261
+ reason: "inactive",
262
+ message: "Agent is currently deactivated",
263
+ };
264
+ }
265
+ // Check budget limit
266
+ if (policy.budget_limit !== null) {
267
+ const dailySpend = (0, server_1.getDailySpend)(db, agentId);
268
+ if (dailySpend >= policy.budget_limit) {
269
+ return {
270
+ allowed: false,
271
+ reason: "budget_exceeded",
272
+ message: `Daily budget limit of $${policy.budget_limit.toFixed(2)} exceeded (spent: $${dailySpend.toFixed(2)})`,
273
+ };
274
+ }
275
+ }
276
+ // Check allowed hours
277
+ if (policy.allowed_hours_start !== null && policy.allowed_hours_end !== null) {
278
+ const now = new Date();
279
+ const currentHour = now.getHours();
280
+ const start = policy.allowed_hours_start;
281
+ const end = policy.allowed_hours_end;
282
+ let isWithinHours;
283
+ if (start <= end) {
284
+ // Normal range (e.g., 9-17)
285
+ isWithinHours = currentHour >= start && currentHour < end;
286
+ }
287
+ else {
288
+ // Overnight range (e.g., 22-6)
289
+ isWithinHours = currentHour >= start || currentHour < end;
290
+ }
291
+ if (!isWithinHours) {
292
+ return {
293
+ allowed: false,
294
+ reason: "outside_hours",
295
+ message: `Agent is only allowed to operate between ${start}:00 and ${end}:00 (server time)`,
296
+ };
297
+ }
298
+ }
299
+ return { allowed: true };
300
+ }
301
+ /**
302
+ * Generate a blocked response in OpenAI format.
303
+ */
304
+ function generateOpenAIBlockedResponse(reason, message) {
305
+ return {
306
+ id: `chatcmpl-blocked-${Date.now()}`,
307
+ object: "chat.completion",
308
+ created: Math.floor(Date.now() / 1000),
309
+ model: "agentgazer-policy",
310
+ choices: [
311
+ {
312
+ index: 0,
313
+ message: {
314
+ role: "assistant",
315
+ content: `[AgentGazer Policy Block] ${message}`,
316
+ },
317
+ finish_reason: "stop",
318
+ },
319
+ ],
320
+ usage: {
321
+ prompt_tokens: 0,
322
+ completion_tokens: 0,
323
+ total_tokens: 0,
324
+ },
325
+ };
326
+ }
327
+ /**
328
+ * Generate a blocked response in Anthropic format.
329
+ */
330
+ function generateAnthropicBlockedResponse(reason, message) {
331
+ return {
332
+ id: `msg_blocked_${Date.now()}`,
333
+ type: "message",
334
+ role: "assistant",
335
+ content: [
336
+ {
337
+ type: "text",
338
+ text: `[AgentGazer Policy Block] ${message}`,
339
+ },
340
+ ],
341
+ model: "agentgazer-policy",
342
+ stop_reason: "end_turn",
343
+ usage: {
344
+ input_tokens: 0,
345
+ output_tokens: 0,
346
+ },
347
+ };
348
+ }
349
+ /**
350
+ * Generate a blocked response based on provider format.
351
+ */
352
+ function generateBlockedResponse(provider, reason, message) {
353
+ if (provider === "anthropic") {
354
+ return generateAnthropicBlockedResponse(reason, message);
355
+ }
356
+ // Default to OpenAI format (used by most providers)
357
+ return generateOpenAIBlockedResponse(reason, message);
358
+ }
359
+ /**
360
+ * Record a blocked event to the database.
361
+ */
362
+ function recordBlockedEvent(db, agentId, provider, reason, message) {
363
+ if (!db)
364
+ return;
365
+ try {
366
+ // Ensure agent exists
367
+ (0, server_1.upsertAgent)(db, agentId, false);
368
+ // Insert blocked event
369
+ const event = {
370
+ agent_id: agentId,
371
+ event_type: "blocked",
372
+ provider,
373
+ model: null,
374
+ tokens_in: null,
375
+ tokens_out: null,
376
+ tokens_total: null,
377
+ cost_usd: null,
378
+ latency_ms: null,
379
+ status_code: 403,
380
+ source: "proxy",
381
+ timestamp: new Date().toISOString(),
382
+ tags: { block_reason: reason, block_message: message },
383
+ };
384
+ (0, server_1.insertEvents)(db, [event]);
385
+ }
386
+ catch (err) {
387
+ log.error("Failed to record blocked event", { err: String(err) });
388
+ }
389
+ }
390
+ // ---------------------------------------------------------------------------
391
+ // Proxy server
392
+ // ---------------------------------------------------------------------------
393
+ /**
394
+ * Load rate limits from database and convert to RateLimiter config format.
395
+ */
396
+ function loadRateLimitsFromDb(db) {
397
+ if (!db)
398
+ return {};
399
+ try {
400
+ const rows = (0, server_1.getAllRateLimits)(db);
401
+ const configs = {};
402
+ for (const row of rows) {
403
+ const key = `${row.agent_id}:${row.provider}`;
404
+ configs[key] = {
405
+ maxRequests: row.max_requests,
406
+ windowSeconds: row.window_seconds,
407
+ };
408
+ }
409
+ return configs;
410
+ }
411
+ catch (err) {
412
+ log.error("Failed to load rate limits from database", { err: String(err) });
413
+ return {};
414
+ }
415
+ }
416
+ function startProxy(options) {
417
+ const port = options.port ?? DEFAULT_PORT;
418
+ const agentId = options.agentId;
419
+ const endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
420
+ const flushInterval = options.flushInterval ?? DEFAULT_FLUSH_INTERVAL;
421
+ const maxBufferSize = options.maxBufferSize ?? DEFAULT_MAX_BUFFER_SIZE;
422
+ const providerKeys = options.providerKeys ?? {};
423
+ const db = options.db;
424
+ // Initialize rate limiter - prefer database, fall back to options for backward compatibility/testing
425
+ let initialRateLimits = {};
426
+ if (db) {
427
+ initialRateLimits = loadRateLimitsFromDb(db);
428
+ }
429
+ else if (options.rateLimits) {
430
+ // Convert legacy format (provider -> config) to new format (agentId:provider -> config)
431
+ for (const [provider, config] of Object.entries(options.rateLimits)) {
432
+ initialRateLimits[`${agentId}:${provider}`] = config;
433
+ }
434
+ }
435
+ const rateLimiter = new rate_limiter_js_1.RateLimiter(initialRateLimits);
436
+ // Set up periodic refresh of rate limits from database
437
+ let rateLimitRefreshTimer = null;
438
+ if (db) {
439
+ rateLimitRefreshTimer = setInterval(() => {
440
+ const configs = loadRateLimitsFromDb(db);
441
+ rateLimiter.updateConfigs(configs);
442
+ }, RATE_LIMIT_REFRESH_INTERVAL_MS);
443
+ rateLimitRefreshTimer.unref();
444
+ }
445
+ const startTime = Date.now();
446
+ const eventBuffer = new event_buffer_js_1.EventBuffer({
447
+ apiKey: options.apiKey,
448
+ endpoint,
449
+ flushInterval,
450
+ maxBufferSize,
451
+ });
452
+ eventBuffer.start();
453
+ const server = http.createServer((req, res) => {
454
+ void handleRequest(req, res);
455
+ });
456
+ async function handleRequest(req, res) {
457
+ const method = req.method ?? "GET";
458
+ const path = req.url ?? "/";
459
+ // Health check endpoint
460
+ if (method === "GET" && path === "/health") {
461
+ sendJson(res, 200, {
462
+ status: "ok",
463
+ agent_id: agentId,
464
+ uptime_ms: Date.now() - startTime,
465
+ });
466
+ return;
467
+ }
468
+ // Agent identification priority: header > path (/agents/{id}/...) > default
469
+ let effectiveAgentId = req.headers["x-agent-id"];
470
+ let workingPath = path;
471
+ // Check for /agents/{id}/... path pattern if no header
472
+ if (!effectiveAgentId) {
473
+ const agentPathResult = (0, shared_1.parseAgentPath)(path);
474
+ if (agentPathResult) {
475
+ effectiveAgentId = agentPathResult.agentId;
476
+ workingPath = agentPathResult.remainingPath;
477
+ log.info(`[PROXY] Agent ID from path: ${effectiveAgentId}`);
478
+ }
479
+ }
480
+ // Fall back to default agent ID
481
+ if (!effectiveAgentId) {
482
+ effectiveAgentId = agentId;
483
+ }
484
+ // Proxy logic: use x-target-url header if provided, otherwise auto-detect
485
+ // provider from the Host header or request path.
486
+ let targetBase = req.headers["x-target-url"];
487
+ // Validate x-target-url to prevent SSRF
488
+ if (targetBase) {
489
+ try {
490
+ const parsed = new URL(targetBase);
491
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
492
+ sendJson(res, 400, { error: "x-target-url must use http or https protocol" });
493
+ return;
494
+ }
495
+ }
496
+ catch {
497
+ sendJson(res, 400, { error: "x-target-url must be a valid URL" });
498
+ return;
499
+ }
500
+ }
501
+ // Path prefix routing: /{provider}/... -> provider base URL + remaining path
502
+ let pathPrefixProvider = null;
503
+ let effectivePath = workingPath;
504
+ log.info(`[PROXY] ${method} ${path} (working path: ${workingPath}, agent: ${effectiveAgentId})`);
505
+ log.info(`[PROXY] Headers: ${JSON.stringify(Object.fromEntries(Object.entries(req.headers).filter(([k]) => !k.toLowerCase().includes('key') && !k.toLowerCase().includes('auth'))))}`);
506
+ if (!targetBase) {
507
+ const prefixResult = (0, shared_1.parsePathPrefix)(workingPath);
508
+ if (prefixResult) {
509
+ const baseUrl = (0, shared_1.getProviderBaseUrl)(prefixResult.provider);
510
+ if (baseUrl) {
511
+ targetBase = baseUrl;
512
+ effectivePath = prefixResult.remainingPath;
513
+ pathPrefixProvider = prefixResult.provider;
514
+ log.info(`[PROXY] Detected provider: ${prefixResult.provider}, forwarding to: ${baseUrl}${effectivePath}`);
515
+ }
516
+ }
517
+ }
518
+ if (!targetBase) {
519
+ // Try to detect provider from the Host header (e.g. api.openai.com)
520
+ const host = req.headers["host"] ?? "";
521
+ const hostUrl = `https://${host}${effectivePath}`;
522
+ const detectedProvider = (0, shared_1.detectProvider)(hostUrl);
523
+ if (detectedProvider !== "unknown") {
524
+ targetBase = (0, shared_1.getProviderBaseUrl)(detectedProvider) ?? undefined;
525
+ }
526
+ // Fallback: try to detect from path patterns alone
527
+ if (!targetBase) {
528
+ const pathProvider = (0, shared_1.detectProvider)(`https://placeholder${effectivePath}`);
529
+ if (pathProvider !== "unknown") {
530
+ targetBase = (0, shared_1.getProviderBaseUrl)(pathProvider) ?? undefined;
531
+ }
532
+ }
533
+ if (!targetBase) {
534
+ sendJson(res, 400, {
535
+ error: "Could not determine upstream provider. Use path prefix routing (e.g. /openai/v1/...), set the Host header to a known provider (e.g. api.openai.com), or provide x-target-url header.",
536
+ });
537
+ return;
538
+ }
539
+ }
540
+ // Build target URL: combine base with the effective path (prefix stripped if used)
541
+ const targetUrl = targetBase.replace(/\/+$/, "") + effectivePath;
542
+ // Detect provider early for policy enforcement response format
543
+ const earlyProvider = pathPrefixProvider ?? (0, shared_1.detectProvider)(targetUrl);
544
+ // Policy check: verify agent is allowed to make requests
545
+ const policyResult = checkAgentPolicy(db, effectiveAgentId);
546
+ if (!policyResult.allowed && policyResult.reason && policyResult.message) {
547
+ log.info(`[PROXY] Request blocked for agent "${effectiveAgentId}": ${policyResult.reason}`);
548
+ // Record blocked event
549
+ recordBlockedEvent(db, effectiveAgentId, earlyProvider, policyResult.reason, policyResult.message);
550
+ // Return a fake LLM response that indicates the block
551
+ const blockedResponse = generateBlockedResponse(earlyProvider, policyResult.reason, policyResult.message);
552
+ sendJson(res, 200, blockedResponse);
553
+ return;
554
+ }
555
+ log.info(`[PROXY] Target URL: ${targetUrl}`);
556
+ // Read the full request body
557
+ let requestBody;
558
+ try {
559
+ requestBody = await readRequestBody(req);
560
+ }
561
+ catch (err) {
562
+ if (err instanceof Error && err.message === "Request body too large") {
563
+ sendJson(res, 413, { error: `Request body too large (max ${MAX_REQUEST_BODY_SIZE / 1024 / 1024}MB)` });
564
+ }
565
+ else {
566
+ sendJson(res, 502, { error: "Failed to read request body" });
567
+ }
568
+ return;
569
+ }
570
+ // Strict detection (hostname-only): used for key injection and rate limiting.
571
+ // Path prefix is definitively trusted (we resolved the provider ourselves).
572
+ const detectedProviderStrict = pathPrefixProvider
573
+ ?? (0, shared_1.detectProviderByHostname)(targetUrl);
574
+ // Model override: check if we should rewrite the model in request body
575
+ let requestedModel = null;
576
+ let actualModel = null;
577
+ let modifiedRequestBody = requestBody;
578
+ if (detectedProviderStrict !== "unknown") {
579
+ try {
580
+ const bodyJson = JSON.parse(requestBody.toString("utf-8"));
581
+ if (bodyJson.model) {
582
+ requestedModel = bodyJson.model;
583
+ const modelOverride = getModelOverride(db, effectiveAgentId, detectedProviderStrict);
584
+ if (modelOverride) {
585
+ log.info(`[PROXY] Model override: ${requestedModel} → ${modelOverride}`);
586
+ bodyJson.model = modelOverride;
587
+ actualModel = modelOverride;
588
+ modifiedRequestBody = Buffer.from(JSON.stringify(bodyJson), "utf-8");
589
+ }
590
+ else {
591
+ actualModel = requestedModel;
592
+ }
593
+ }
594
+ }
595
+ catch {
596
+ // Not JSON or no model field - continue without modification
597
+ }
598
+ }
599
+ // Lenient detection (hostname + path fallback): used for metric extraction.
600
+ let detectedProviderForMetrics = pathPrefixProvider
601
+ ?? (0, shared_1.detectProvider)(targetUrl);
602
+ if (detectedProviderForMetrics === "unknown") {
603
+ detectedProviderForMetrics = (0, shared_1.detectProvider)(`https://placeholder${effectivePath}`);
604
+ }
605
+ // Warn when path matches a provider but hostname doesn't — key will NOT be injected.
606
+ // Skip when path prefix was used (provider is already trusted).
607
+ if (!pathPrefixProvider && detectedProviderStrict === "unknown" && detectedProviderForMetrics !== "unknown") {
608
+ const providerKey = providerKeys[detectedProviderForMetrics];
609
+ if (providerKey) {
610
+ const expectedBase = (0, shared_1.getProviderBaseUrl)(detectedProviderForMetrics) ?? detectedProviderForMetrics;
611
+ log.warn(`Path matches "${detectedProviderForMetrics}" but hostname does not — ` +
612
+ `API key NOT injected. Use x-target-url=${expectedBase} for key injection.`);
613
+ }
614
+ }
615
+ // Rate limiting: check before forwarding (strict match only)
616
+ if (detectedProviderStrict !== "unknown") {
617
+ const rateLimitResult = rateLimiter.check(effectiveAgentId, detectedProviderStrict);
618
+ if (!rateLimitResult.allowed) {
619
+ const retryAfter = rateLimitResult.retryAfterSeconds ?? 60;
620
+ const message = `Rate limit exceeded for agent "${effectiveAgentId}" on ${detectedProviderStrict}. Please retry after ${retryAfter} seconds.`;
621
+ res.writeHead(429, {
622
+ "Content-Type": "application/json",
623
+ "Retry-After": String(retryAfter),
624
+ });
625
+ // Return provider-specific error format
626
+ let errorBody;
627
+ if (detectedProviderStrict === "anthropic") {
628
+ // Anthropic error format
629
+ errorBody = {
630
+ type: "error",
631
+ error: {
632
+ type: "rate_limit_error",
633
+ message,
634
+ },
635
+ retry_after_seconds: retryAfter,
636
+ };
637
+ }
638
+ else {
639
+ // OpenAI-style error format (used by most providers)
640
+ errorBody = {
641
+ error: {
642
+ message,
643
+ type: "rate_limit_error",
644
+ param: null,
645
+ code: "rate_limit_exceeded",
646
+ },
647
+ retry_after_seconds: retryAfter,
648
+ };
649
+ }
650
+ res.end(JSON.stringify(errorBody));
651
+ // Record rate limit event
652
+ const event = {
653
+ agent_id: effectiveAgentId,
654
+ event_type: "error",
655
+ provider: detectedProviderStrict,
656
+ model: null,
657
+ tokens_in: null,
658
+ tokens_out: null,
659
+ tokens_total: null,
660
+ cost_usd: null,
661
+ latency_ms: null,
662
+ status_code: 429,
663
+ source: "proxy",
664
+ timestamp: new Date().toISOString(),
665
+ tags: { rate_limited: "true" },
666
+ };
667
+ eventBuffer.add(event);
668
+ return;
669
+ }
670
+ }
671
+ // Build forwarded headers, removing proxy-specific ones
672
+ const forwardHeaders = {};
673
+ for (const [key, value] of Object.entries(req.headers)) {
674
+ const lowerKey = key.toLowerCase();
675
+ if (lowerKey === "x-target-url" ||
676
+ lowerKey === "host" ||
677
+ lowerKey === "connection" ||
678
+ lowerKey === "content-length" // Let fetch recalculate after body modification
679
+ ) {
680
+ continue;
681
+ }
682
+ if (value !== undefined) {
683
+ forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
684
+ }
685
+ }
686
+ // Inject provider API key only for hostname-matched providers (strict).
687
+ // Path-only matches are NOT trusted for key injection to prevent leakage.
688
+ log.info(`[PROXY] Provider detection: strict=${detectedProviderStrict}, metrics=${detectedProviderForMetrics}`);
689
+ if (detectedProviderStrict !== "unknown") {
690
+ const providerKey = providerKeys[detectedProviderStrict];
691
+ if (providerKey) {
692
+ const authHeader = (0, shared_1.getProviderAuthHeader)(detectedProviderStrict, providerKey);
693
+ if (authHeader) {
694
+ // Remove any existing auth header and inject the configured one
695
+ const existingAuthKey = Object.keys(forwardHeaders).find((k) => k.toLowerCase() === authHeader.name.toLowerCase());
696
+ if (existingAuthKey) {
697
+ log.info(`[PROXY] Replacing existing ${existingAuthKey} header with configured key`);
698
+ delete forwardHeaders[existingAuthKey];
699
+ }
700
+ forwardHeaders[authHeader.name] = authHeader.value;
701
+ log.info(`[PROXY] Injected ${authHeader.name} header for ${detectedProviderStrict}`);
702
+ }
703
+ }
704
+ else {
705
+ log.warn(`[PROXY] No API key configured for provider: ${detectedProviderStrict}`);
706
+ }
707
+ }
708
+ const requestStart = Date.now();
709
+ let providerResponse;
710
+ try {
711
+ providerResponse = await fetch(targetUrl, {
712
+ method,
713
+ headers: forwardHeaders,
714
+ body: method !== "GET" && method !== "HEAD"
715
+ ? new Uint8Array(modifiedRequestBody)
716
+ : undefined,
717
+ signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
718
+ });
719
+ }
720
+ catch (error) {
721
+ const message = error instanceof Error ? error.message : "Unknown fetch error";
722
+ log.error(`[PROXY] Upstream request failed: ${message}`);
723
+ sendJson(res, 502, { error: `Upstream request failed: ${message}` });
724
+ return;
725
+ }
726
+ log.info(`[PROXY] Response: ${providerResponse.status} ${providerResponse.statusText}`);
727
+ // Check if the response is an SSE stream
728
+ const contentType = providerResponse.headers.get("content-type") ?? "";
729
+ const isSSE = contentType.includes("text/event-stream");
730
+ if (isSSE && providerResponse.body) {
731
+ // ---------------------------------------------------------------
732
+ // STREAMING PATH: pipe chunks through to client in real-time,
733
+ // accumulate them for metric extraction after the stream ends.
734
+ // ---------------------------------------------------------------
735
+ const responseHeaders = {};
736
+ providerResponse.headers.forEach((value, key) => {
737
+ responseHeaders[key] = value;
738
+ });
739
+ res.writeHead(providerResponse.status, responseHeaders);
740
+ const chunks = [];
741
+ let accumulatedSize = 0;
742
+ const reader = providerResponse.body.getReader();
743
+ try {
744
+ for (;;) {
745
+ const { done, value } = await reader.read();
746
+ if (done)
747
+ break;
748
+ const buf = Buffer.from(value);
749
+ res.write(buf);
750
+ accumulatedSize += buf.length;
751
+ if (accumulatedSize <= MAX_SSE_BUFFER_SIZE) {
752
+ chunks.push(buf);
753
+ }
754
+ }
755
+ }
756
+ catch (error) {
757
+ log.error("Stream read error", { err: error instanceof Error ? error.message : String(error) });
758
+ }
759
+ finally {
760
+ res.end();
761
+ }
762
+ const latencyMs = Date.now() - requestStart;
763
+ const fullBody = Buffer.concat(chunks);
764
+ try {
765
+ extractStreamingMetrics(detectedProviderForMetrics, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
766
+ }
767
+ catch (error) {
768
+ log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
769
+ }
770
+ }
771
+ else {
772
+ // ---------------------------------------------------------------
773
+ // NON-STREAMING PATH: buffer full response, forward, extract.
774
+ // ---------------------------------------------------------------
775
+ let responseBodyBuffer;
776
+ try {
777
+ const arrayBuffer = await providerResponse.arrayBuffer();
778
+ responseBodyBuffer = Buffer.from(arrayBuffer);
779
+ }
780
+ catch {
781
+ sendJson(res, 502, {
782
+ error: "Failed to read upstream response body",
783
+ });
784
+ return;
785
+ }
786
+ const latencyMs = Date.now() - requestStart;
787
+ // Forward status code and headers back to the client
788
+ const responseHeaders = {};
789
+ providerResponse.headers.forEach((value, key) => {
790
+ // Skip transfer-encoding since we are sending the full body
791
+ if (key.toLowerCase() === "transfer-encoding")
792
+ return;
793
+ responseHeaders[key] = value;
794
+ });
795
+ res.writeHead(providerResponse.status, responseHeaders);
796
+ res.end(responseBodyBuffer);
797
+ // After response is sent, extract metrics asynchronously
798
+ try {
799
+ extractAndQueueMetrics(detectedProviderForMetrics, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
800
+ }
801
+ catch (error) {
802
+ log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });
803
+ }
804
+ }
805
+ }
806
+ function extractStreamingMetrics(provider, statusCode, sseBody, latencyMs, effectiveAgentId, requestedModel) {
807
+ if (provider === "unknown") {
808
+ log.warn("Unrecognized provider - skipping streaming metric extraction");
809
+ return;
810
+ }
811
+ const sseText = sseBody.toString("utf-8");
812
+ const parsed = parseSSEResponse(provider, sseText, statusCode);
813
+ if (!parsed) {
814
+ log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
815
+ return;
816
+ }
817
+ let costUsd = null;
818
+ if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
819
+ costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
820
+ }
821
+ const event = {
822
+ agent_id: effectiveAgentId,
823
+ event_type: "llm_call",
824
+ provider,
825
+ model: parsed.model,
826
+ requested_model: requestedModel,
827
+ tokens_in: parsed.tokensIn,
828
+ tokens_out: parsed.tokensOut,
829
+ tokens_total: parsed.tokensTotal,
830
+ cost_usd: costUsd,
831
+ latency_ms: latencyMs,
832
+ status_code: statusCode,
833
+ source: "proxy",
834
+ timestamp: new Date().toISOString(),
835
+ tags: { streaming: "true" },
836
+ };
837
+ eventBuffer.add(event);
838
+ }
839
+ function extractAndQueueMetrics(provider, statusCode, responseBody, latencyMs, effectiveAgentId, requestedModel) {
840
+ if (provider === "unknown") {
841
+ log.warn("Unrecognized provider - skipping metric extraction");
842
+ return;
843
+ }
844
+ // Parse the response body as JSON
845
+ let parsedBody;
846
+ try {
847
+ parsedBody = JSON.parse(responseBody.toString("utf-8"));
848
+ }
849
+ catch {
850
+ log.warn(`Could not parse response body as JSON for ${provider} - skipping metric extraction`);
851
+ return;
852
+ }
853
+ const parsed = (0, shared_1.parseProviderResponse)(provider, parsedBody, statusCode);
854
+ if (!parsed) {
855
+ log.warn(`No parser result for provider: ${provider}`);
856
+ return;
857
+ }
858
+ // Calculate cost if we have the necessary token data
859
+ let costUsd = null;
860
+ if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
861
+ costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
862
+ }
863
+ const event = {
864
+ agent_id: effectiveAgentId,
865
+ event_type: "llm_call",
866
+ provider,
867
+ model: parsed.model,
868
+ requested_model: requestedModel,
869
+ tokens_in: parsed.tokensIn,
870
+ tokens_out: parsed.tokensOut,
871
+ tokens_total: parsed.tokensTotal,
872
+ cost_usd: costUsd,
873
+ latency_ms: latencyMs,
874
+ status_code: statusCode,
875
+ source: "proxy",
876
+ timestamp: new Date().toISOString(),
877
+ tags: {},
878
+ };
879
+ eventBuffer.add(event);
880
+ }
881
+ server.listen(port);
882
+ async function shutdown() {
883
+ if (rateLimitRefreshTimer) {
884
+ clearInterval(rateLimitRefreshTimer);
885
+ }
886
+ await eventBuffer.shutdown();
887
+ return new Promise((resolve, reject) => {
888
+ server.close((err) => {
889
+ if (err)
890
+ reject(err);
891
+ else
892
+ resolve();
893
+ });
894
+ });
895
+ }
896
+ return { server, shutdown };
897
+ }
898
+ //# sourceMappingURL=proxy-server.js.map