@agentgazer/proxy 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -5
- package/dist/__tests__/loop-detector.test.d.ts +2 -0
- package/dist/__tests__/loop-detector.test.d.ts.map +1 -0
- package/dist/__tests__/loop-detector.test.js +257 -0
- package/dist/__tests__/loop-detector.test.js.map +1 -0
- package/dist/__tests__/proxy-server.test.js +59 -32
- package/dist/__tests__/proxy-server.test.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/loop-detector.d.ts +109 -0
- package/dist/loop-detector.d.ts.map +1 -0
- package/dist/loop-detector.js +312 -0
- package/dist/loop-detector.js.map +1 -0
- package/dist/proxy-server.d.ts +7 -0
- package/dist/proxy-server.d.ts.map +1 -1
- package/dist/proxy-server.js +600 -284
- package/dist/proxy-server.js.map +1 -1
- package/dist/rate-limiter.d.ts +10 -0
- package/dist/rate-limiter.d.ts.map +1 -1
- package/dist/rate-limiter.js +43 -0
- package/dist/rate-limiter.js.map +1 -1
- package/package.json +3 -3
package/dist/proxy-server.js
CHANGED
|
@@ -60,6 +60,7 @@ function getModelOverride(db, agentId, provider) {
|
|
|
60
60
|
const log = (0, shared_1.createLogger)("proxy");
|
|
61
61
|
const event_buffer_js_1 = require("./event-buffer.js");
|
|
62
62
|
const rate_limiter_js_1 = require("./rate-limiter.js");
|
|
63
|
+
const loop_detector_js_1 = require("./loop-detector.js");
|
|
63
64
|
const DEFAULT_PORT = 4000;
|
|
64
65
|
const DEFAULT_ENDPOINT = "https://ingest.agentgazer.com/v1/events";
|
|
65
66
|
const DEFAULT_FLUSH_INTERVAL = 5000;
|
|
@@ -68,6 +69,8 @@ const MAX_REQUEST_BODY_SIZE = 10 * 1024 * 1024; // 10 MB
|
|
|
68
69
|
const MAX_SSE_BUFFER_SIZE = 50 * 1024 * 1024; // 50 MB
|
|
69
70
|
const UPSTREAM_TIMEOUT_MS = 120_000; // 2 minutes
|
|
70
71
|
const RATE_LIMIT_REFRESH_INTERVAL_MS = 30_000; // 30 seconds
|
|
72
|
+
const PROVIDER_KEYS_REFRESH_INTERVAL_MS = 10_000; // 10 seconds
|
|
73
|
+
const PROVIDER_SERVICE = "com.agentgazer.provider";
|
|
71
74
|
function readRequestBody(req) {
|
|
72
75
|
return new Promise((resolve, reject) => {
|
|
73
76
|
const chunks = [];
|
|
@@ -95,6 +98,111 @@ function sendJson(res, statusCode, body) {
|
|
|
95
98
|
res.end(payload);
|
|
96
99
|
}
|
|
97
100
|
// ---------------------------------------------------------------------------
|
|
101
|
+
// Request body normalization — remove/transform unsupported fields per provider
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
/**
|
|
104
|
+
* Normalize request body for provider compatibility.
|
|
105
|
+
* Some providers don't support all OpenAI fields.
|
|
106
|
+
* Returns the modified body and a list of changes made.
|
|
107
|
+
*/
|
|
108
|
+
function normalizeRequestBody(provider, body, log) {
|
|
109
|
+
const result = { ...body };
|
|
110
|
+
let modified = false;
|
|
111
|
+
const changes = [];
|
|
112
|
+
// Fields that only OpenAI supports (top-level)
|
|
113
|
+
const openaiOnlyFields = ["store", "metadata", "parallel_tool_calls", "stream_options"];
|
|
114
|
+
// max_completion_tokens -> max_tokens conversion for non-OpenAI providers
|
|
115
|
+
if (provider !== "openai" && "max_completion_tokens" in result) {
|
|
116
|
+
if (!("max_tokens" in result)) {
|
|
117
|
+
result.max_tokens = result.max_completion_tokens;
|
|
118
|
+
changes.push(`max_completion_tokens→max_tokens`);
|
|
119
|
+
}
|
|
120
|
+
delete result.max_completion_tokens;
|
|
121
|
+
modified = true;
|
|
122
|
+
}
|
|
123
|
+
// OpenAI o1/o3 models require max_completion_tokens instead of max_tokens
|
|
124
|
+
if (provider === "openai" && "max_tokens" in result) {
|
|
125
|
+
const model = result.model ?? "";
|
|
126
|
+
if (model.startsWith("o1") || model.startsWith("o3")) {
|
|
127
|
+
if (!("max_completion_tokens" in result)) {
|
|
128
|
+
result.max_completion_tokens = result.max_tokens;
|
|
129
|
+
changes.push(`max_tokens→max_completion_tokens (${model})`);
|
|
130
|
+
}
|
|
131
|
+
delete result.max_tokens;
|
|
132
|
+
modified = true;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Remove OpenAI-only fields for other providers
|
|
136
|
+
if (provider !== "openai") {
|
|
137
|
+
for (const field of openaiOnlyFields) {
|
|
138
|
+
if (field in result) {
|
|
139
|
+
delete result[field];
|
|
140
|
+
changes.push(`-${field}`);
|
|
141
|
+
modified = true;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
// Remove 'strict' from within tools array (OpenAI-specific nested field)
|
|
145
|
+
if (Array.isArray(result.tools)) {
|
|
146
|
+
let toolsModified = false;
|
|
147
|
+
for (const tool of result.tools) {
|
|
148
|
+
if (tool.function && typeof tool.function === "object") {
|
|
149
|
+
const fn = tool.function;
|
|
150
|
+
if ("strict" in fn) {
|
|
151
|
+
delete fn.strict;
|
|
152
|
+
toolsModified = true;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// Also check top-level strict on tool
|
|
156
|
+
if ("strict" in tool) {
|
|
157
|
+
delete tool.strict;
|
|
158
|
+
toolsModified = true;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
if (toolsModified) {
|
|
162
|
+
changes.push("-tools[].strict");
|
|
163
|
+
modified = true;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// Provider-specific handling
|
|
168
|
+
switch (provider) {
|
|
169
|
+
case "mistral":
|
|
170
|
+
// Mistral doesn't support these additional fields
|
|
171
|
+
const mistralUnsupported = ["logprobs", "top_logprobs", "n", "user", "service_tier"];
|
|
172
|
+
for (const field of mistralUnsupported) {
|
|
173
|
+
if (field in result) {
|
|
174
|
+
delete result[field];
|
|
175
|
+
changes.push(`-${field}`);
|
|
176
|
+
modified = true;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
break;
|
|
180
|
+
case "cohere":
|
|
181
|
+
// Cohere uses different field names and doesn't support some OpenAI fields
|
|
182
|
+
// See: https://docs.cohere.com/reference/chat
|
|
183
|
+
const cohereUnsupported = ["top_logprobs", "n", "user", "stream_options"];
|
|
184
|
+
for (const field of cohereUnsupported) {
|
|
185
|
+
if (field in result) {
|
|
186
|
+
delete result[field];
|
|
187
|
+
changes.push(`-${field}`);
|
|
188
|
+
modified = true;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// top_p → p for Cohere
|
|
192
|
+
if ("top_p" in result && !("p" in result)) {
|
|
193
|
+
result.p = result.top_p;
|
|
194
|
+
delete result.top_p;
|
|
195
|
+
changes.push("top_p→p");
|
|
196
|
+
modified = true;
|
|
197
|
+
}
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
if (modified) {
|
|
201
|
+
log.debug(`[PROXY] Normalized request body: ${changes.join(", ")}`);
|
|
202
|
+
}
|
|
203
|
+
return { body: result, modified };
|
|
204
|
+
}
|
|
205
|
+
// ---------------------------------------------------------------------------
|
|
98
206
|
// SSE streaming parsers — extract usage/model from provider-specific formats
|
|
99
207
|
// ---------------------------------------------------------------------------
|
|
100
208
|
function parseOpenAISSE(dataLines, statusCode) {
|
|
@@ -231,7 +339,6 @@ function parseSSEResponse(provider, sseText, statusCode) {
|
|
|
231
339
|
case "moonshot":
|
|
232
340
|
case "zhipu":
|
|
233
341
|
case "minimax":
|
|
234
|
-
case "baichuan":
|
|
235
342
|
case "yi":
|
|
236
343
|
return parseOpenAISSE(dataLines, statusCode);
|
|
237
344
|
case "anthropic":
|
|
@@ -298,6 +405,86 @@ function checkAgentPolicy(db, agentId) {
|
|
|
298
405
|
}
|
|
299
406
|
return { allowed: true };
|
|
300
407
|
}
|
|
408
|
+
const providerPolicyCache = {};
|
|
409
|
+
const PROVIDER_POLICY_CACHE_TTL_MS = 5_000; // 5 seconds (shorter for faster policy updates)
|
|
410
|
+
// Provider-level rate limiter (separate from agent rate limiter)
|
|
411
|
+
const providerRateLimiter = new rate_limiter_js_1.RateLimiter();
|
|
412
|
+
function checkProviderPolicy(db, provider) {
|
|
413
|
+
if (!db || provider === "unknown") {
|
|
414
|
+
return { allowed: true };
|
|
415
|
+
}
|
|
416
|
+
// Check cache first
|
|
417
|
+
const cached = providerPolicyCache[provider];
|
|
418
|
+
let settings;
|
|
419
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
420
|
+
settings = cached.settings;
|
|
421
|
+
}
|
|
422
|
+
else {
|
|
423
|
+
settings = (0, server_1.getProviderSettings)(db, provider);
|
|
424
|
+
providerPolicyCache[provider] = {
|
|
425
|
+
settings: settings ?? null,
|
|
426
|
+
expiresAt: Date.now() + PROVIDER_POLICY_CACHE_TTL_MS,
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
if (!settings) {
|
|
430
|
+
// No settings means default (active, no rate limit)
|
|
431
|
+
return { allowed: true };
|
|
432
|
+
}
|
|
433
|
+
// Check if provider is active
|
|
434
|
+
if (settings.active === 0) {
|
|
435
|
+
return {
|
|
436
|
+
allowed: false,
|
|
437
|
+
reason: "provider_deactivated",
|
|
438
|
+
message: `Provider "${provider}" is currently deactivated`,
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
// Check provider rate limit
|
|
442
|
+
if (settings.rate_limit_max_requests && settings.rate_limit_window_seconds) {
|
|
443
|
+
const isAllowed = providerRateLimiter.checkAndRecord(provider, // Use provider as the key
|
|
444
|
+
provider, settings.rate_limit_max_requests, settings.rate_limit_window_seconds);
|
|
445
|
+
if (!isAllowed) {
|
|
446
|
+
const retryAfter = providerRateLimiter.getRetryAfter(provider, provider);
|
|
447
|
+
log.info(`[PROXY] Provider ${provider} rate limited, retry after ${retryAfter}s`);
|
|
448
|
+
return {
|
|
449
|
+
allowed: false,
|
|
450
|
+
reason: "provider_rate_limited",
|
|
451
|
+
message: `Provider "${provider}" rate limit exceeded. Retry after ${retryAfter} seconds.`,
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
return { allowed: true };
|
|
456
|
+
}
|
|
457
|
+
const killSwitchConfigCache = {};
|
|
458
|
+
const KILL_SWITCH_CACHE_TTL_MS = 30_000; // 30 seconds
|
|
459
|
+
function getKillSwitchConfig(db, agentId) {
|
|
460
|
+
const defaultConfig = {
|
|
461
|
+
enabled: false,
|
|
462
|
+
windowSize: 20,
|
|
463
|
+
threshold: 10.0,
|
|
464
|
+
};
|
|
465
|
+
if (!db)
|
|
466
|
+
return defaultConfig;
|
|
467
|
+
// Check cache first
|
|
468
|
+
const cached = killSwitchConfigCache[agentId];
|
|
469
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
470
|
+
return cached.config;
|
|
471
|
+
}
|
|
472
|
+
// Fetch from DB
|
|
473
|
+
const policy = (0, server_1.getAgentPolicy)(db, agentId);
|
|
474
|
+
const config = {
|
|
475
|
+
enabled: policy?.kill_switch_enabled === 1,
|
|
476
|
+
windowSize: policy?.kill_switch_window_size ?? 20,
|
|
477
|
+
threshold: policy?.kill_switch_threshold ?? 10.0,
|
|
478
|
+
};
|
|
479
|
+
// Update loop detector config
|
|
480
|
+
loop_detector_js_1.loopDetector.setConfig(agentId, config);
|
|
481
|
+
// Cache the result
|
|
482
|
+
killSwitchConfigCache[agentId] = {
|
|
483
|
+
config,
|
|
484
|
+
expiresAt: Date.now() + KILL_SWITCH_CACHE_TTL_MS,
|
|
485
|
+
};
|
|
486
|
+
return config;
|
|
487
|
+
}
|
|
301
488
|
/**
|
|
302
489
|
* Generate a blocked response in OpenAI format.
|
|
303
490
|
*/
|
|
@@ -356,6 +543,32 @@ function generateBlockedResponse(provider, reason, message) {
|
|
|
356
543
|
// Default to OpenAI format (used by most providers)
|
|
357
544
|
return generateOpenAIBlockedResponse(reason, message);
|
|
358
545
|
}
|
|
546
|
+
/**
|
|
547
|
+
* Generate a rate limit response based on provider format.
|
|
548
|
+
*/
|
|
549
|
+
function generateRateLimitResponse(provider, agentId, retryAfterSeconds) {
|
|
550
|
+
const message = `Rate limit exceeded for provider "${provider}". Please retry after ${retryAfterSeconds} seconds.`;
|
|
551
|
+
if (provider === "anthropic") {
|
|
552
|
+
return {
|
|
553
|
+
type: "error",
|
|
554
|
+
error: {
|
|
555
|
+
type: "rate_limit_error",
|
|
556
|
+
message,
|
|
557
|
+
},
|
|
558
|
+
retry_after_seconds: retryAfterSeconds,
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
// OpenAI-style error format (used by most providers)
|
|
562
|
+
return {
|
|
563
|
+
error: {
|
|
564
|
+
message,
|
|
565
|
+
type: "rate_limit_error",
|
|
566
|
+
param: null,
|
|
567
|
+
code: "rate_limit_exceeded",
|
|
568
|
+
},
|
|
569
|
+
retry_after_seconds: retryAfterSeconds,
|
|
570
|
+
};
|
|
571
|
+
}
|
|
359
572
|
/**
|
|
360
573
|
* Record a blocked event to the database.
|
|
361
574
|
*/
|
|
@@ -419,8 +632,9 @@ function startProxy(options) {
|
|
|
419
632
|
const endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
|
|
420
633
|
const flushInterval = options.flushInterval ?? DEFAULT_FLUSH_INTERVAL;
|
|
421
634
|
const maxBufferSize = options.maxBufferSize ?? DEFAULT_MAX_BUFFER_SIZE;
|
|
422
|
-
|
|
635
|
+
let providerKeys = options.providerKeys ?? {};
|
|
423
636
|
const db = options.db;
|
|
637
|
+
const secretStore = options.secretStore;
|
|
424
638
|
// Initialize rate limiter - prefer database, fall back to options for backward compatibility/testing
|
|
425
639
|
let initialRateLimits = {};
|
|
426
640
|
if (db) {
|
|
@@ -442,6 +656,27 @@ function startProxy(options) {
|
|
|
442
656
|
}, RATE_LIMIT_REFRESH_INTERVAL_MS);
|
|
443
657
|
rateLimitRefreshTimer.unref();
|
|
444
658
|
}
|
|
659
|
+
// Set up periodic refresh of provider keys from secret store
|
|
660
|
+
let providerKeysRefreshTimer = null;
|
|
661
|
+
if (secretStore) {
|
|
662
|
+
providerKeysRefreshTimer = setInterval(async () => {
|
|
663
|
+
try {
|
|
664
|
+
const accounts = await secretStore.list(PROVIDER_SERVICE);
|
|
665
|
+
const newKeys = {};
|
|
666
|
+
for (const account of accounts) {
|
|
667
|
+
const value = await secretStore.get(PROVIDER_SERVICE, account);
|
|
668
|
+
if (value) {
|
|
669
|
+
newKeys[account] = value;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
providerKeys = newKeys;
|
|
673
|
+
}
|
|
674
|
+
catch (err) {
|
|
675
|
+
log.error("Failed to refresh provider keys", { err: String(err) });
|
|
676
|
+
}
|
|
677
|
+
}, PROVIDER_KEYS_REFRESH_INTERVAL_MS);
|
|
678
|
+
providerKeysRefreshTimer.unref();
|
|
679
|
+
}
|
|
445
680
|
const startTime = Date.now();
|
|
446
681
|
const eventBuffer = new event_buffer_js_1.EventBuffer({
|
|
447
682
|
apiKey: options.apiKey,
|
|
@@ -465,95 +700,208 @@ function startProxy(options) {
|
|
|
465
700
|
});
|
|
466
701
|
return;
|
|
467
702
|
}
|
|
468
|
-
//
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
//
|
|
486
|
-
|
|
487
|
-
//
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
703
|
+
// Internal endpoint: Clear loop detector window for an agent
|
|
704
|
+
// POST /internal/agents/:id/clear-window
|
|
705
|
+
const clearWindowMatch = path.match(/^\/internal\/agents\/([^/]+)\/clear-window$/);
|
|
706
|
+
if (method === "POST" && clearWindowMatch) {
|
|
707
|
+
const targetAgentId = decodeURIComponent(clearWindowMatch[1]);
|
|
708
|
+
// Security: Only allow from localhost
|
|
709
|
+
const remoteAddr = req.socket.remoteAddress;
|
|
710
|
+
const isLocalhost = remoteAddr === "127.0.0.1" || remoteAddr === "::1" || remoteAddr === "::ffff:127.0.0.1";
|
|
711
|
+
if (!isLocalhost) {
|
|
712
|
+
sendJson(res, 403, { error: "This endpoint is only accessible from localhost" });
|
|
713
|
+
return;
|
|
714
|
+
}
|
|
715
|
+
loop_detector_js_1.loopDetector.clearAgent(targetAgentId);
|
|
716
|
+
log.info(`[PROXY] Cleared loop detector window for agent "${targetAgentId}"`);
|
|
717
|
+
sendJson(res, 200, { success: true, agent_id: targetAgentId });
|
|
718
|
+
return;
|
|
719
|
+
}
|
|
720
|
+
// Simplified routing: POST /agents/:agent/:provider[/...]
|
|
721
|
+
// For most providers, trailing path is ignored and we use the fixed chat endpoint.
|
|
722
|
+
// For providers with path-based routing (e.g., Google), we preserve the trailing path.
|
|
723
|
+
const simplifiedRouteMatch = path.match(/^\/agents\/([^/]+)\/([^/]+)(\/.*)?$/);
|
|
724
|
+
if (method === "POST" && simplifiedRouteMatch) {
|
|
725
|
+
const routeAgentId = decodeURIComponent(simplifiedRouteMatch[1]);
|
|
726
|
+
const routeProvider = simplifiedRouteMatch[2].toLowerCase();
|
|
727
|
+
const trailingPath = simplifiedRouteMatch[3] || "";
|
|
728
|
+
// Validate provider
|
|
729
|
+
if (!shared_1.KNOWN_PROVIDER_NAMES.includes(routeProvider)) {
|
|
730
|
+
sendJson(res, 400, { error: `Unknown provider: ${routeProvider}` });
|
|
731
|
+
return;
|
|
732
|
+
}
|
|
733
|
+
let targetUrl;
|
|
734
|
+
if ((0, shared_1.providerUsesPathRouting)(routeProvider) && trailingPath) {
|
|
735
|
+
// Path-based routing: append trailing path to root URL
|
|
736
|
+
const rootUrl = (0, shared_1.getProviderRootUrl)(routeProvider);
|
|
737
|
+
if (!rootUrl) {
|
|
738
|
+
sendJson(res, 400, { error: `No root URL configured for provider: ${routeProvider}` });
|
|
493
739
|
return;
|
|
494
740
|
}
|
|
741
|
+
targetUrl = rootUrl + trailingPath;
|
|
742
|
+
// For Google native API, add key as query parameter
|
|
743
|
+
if (routeProvider === "google" && providerKeys["google"]) {
|
|
744
|
+
const separator = targetUrl.includes("?") ? "&" : "?";
|
|
745
|
+
targetUrl = `${targetUrl}${separator}key=${providerKeys["google"]}`;
|
|
746
|
+
}
|
|
495
747
|
}
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
748
|
+
else {
|
|
749
|
+
// Fixed endpoint routing
|
|
750
|
+
const chatEndpoint = (0, shared_1.getProviderChatEndpoint)(routeProvider);
|
|
751
|
+
if (!chatEndpoint) {
|
|
752
|
+
sendJson(res, 400, { error: `No chat endpoint configured for provider: ${routeProvider}` });
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
targetUrl = chatEndpoint;
|
|
499
756
|
}
|
|
757
|
+
log.info(`[PROXY] Simplified route: agent=${routeAgentId}, provider=${routeProvider}`);
|
|
758
|
+
log.info(`[PROXY] Forwarding to: ${targetUrl}`);
|
|
759
|
+
// For path-based routing (e.g., Google native API), we use different auth
|
|
760
|
+
const useNativeApi = (0, shared_1.providerUsesPathRouting)(routeProvider) && !!trailingPath;
|
|
761
|
+
// Handle the simplified route request
|
|
762
|
+
await handleSimplifiedRoute(req, res, routeAgentId, routeProvider, targetUrl, useNativeApi);
|
|
763
|
+
return;
|
|
500
764
|
}
|
|
501
|
-
//
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
pathPrefixProvider = prefixResult.provider;
|
|
514
|
-
log.info(`[PROXY] Detected provider: ${prefixResult.provider}, forwarding to: ${baseUrl}${effectivePath}`);
|
|
765
|
+
// Legacy routing: /:provider/... -> treat as /agents/default/:provider
|
|
766
|
+
// This maintains backward compatibility with old SDK configurations
|
|
767
|
+
const legacyProviderMatch = path.match(/^\/([^/]+)/);
|
|
768
|
+
if (method === "POST" && legacyProviderMatch) {
|
|
769
|
+
const legacyProvider = legacyProviderMatch[1].toLowerCase();
|
|
770
|
+
if (shared_1.KNOWN_PROVIDER_NAMES.includes(legacyProvider)) {
|
|
771
|
+
const chatEndpoint = (0, shared_1.getProviderChatEndpoint)(legacyProvider);
|
|
772
|
+
if (chatEndpoint) {
|
|
773
|
+
log.info(`[PROXY] Legacy route /${legacyProvider}/... -> agents/default/${legacyProvider}`);
|
|
774
|
+
log.info(`[PROXY] Forwarding to: ${chatEndpoint}`);
|
|
775
|
+
await handleSimplifiedRoute(req, res, "default", legacyProvider, chatEndpoint, false);
|
|
776
|
+
return;
|
|
515
777
|
}
|
|
516
778
|
}
|
|
517
779
|
}
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
780
|
+
// All other requests: return error with usage instructions
|
|
781
|
+
sendJson(res, 400, {
|
|
782
|
+
error: "Invalid route. Use POST /agents/:agent/:provider for LLM requests.",
|
|
783
|
+
usage: {
|
|
784
|
+
endpoint: "POST /agents/{agent_name}/{provider}",
|
|
785
|
+
example: "POST /agents/my-agent/openai",
|
|
786
|
+
providers: shared_1.KNOWN_PROVIDER_NAMES,
|
|
787
|
+
sdk_config: {
|
|
788
|
+
openai: "new OpenAI({ baseURL: 'http://localhost:4000/agents/my-agent/openai' })",
|
|
789
|
+
anthropic: "new Anthropic({ baseURL: 'http://localhost:4000/agents/my-agent/anthropic' })",
|
|
790
|
+
},
|
|
791
|
+
},
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
function extractStreamingMetrics(provider, statusCode, sseBody, latencyMs, effectiveAgentId, requestedModel) {
|
|
795
|
+
if (provider === "unknown") {
|
|
796
|
+
log.warn("Unrecognized provider - skipping streaming metric extraction");
|
|
797
|
+
return;
|
|
798
|
+
}
|
|
799
|
+
const sseText = sseBody.toString("utf-8");
|
|
800
|
+
const parsed = parseSSEResponse(provider, sseText, statusCode);
|
|
801
|
+
if (!parsed) {
|
|
802
|
+
log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
|
|
803
|
+
return;
|
|
804
|
+
}
|
|
805
|
+
let costUsd = null;
|
|
806
|
+
if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
807
|
+
costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
|
|
808
|
+
}
|
|
809
|
+
// Record response for loop detection
|
|
810
|
+
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, sseText);
|
|
811
|
+
const event = {
|
|
812
|
+
agent_id: effectiveAgentId,
|
|
813
|
+
event_type: "llm_call",
|
|
814
|
+
provider,
|
|
815
|
+
model: parsed.model,
|
|
816
|
+
requested_model: requestedModel,
|
|
817
|
+
tokens_in: parsed.tokensIn,
|
|
818
|
+
tokens_out: parsed.tokensOut,
|
|
819
|
+
tokens_total: parsed.tokensTotal,
|
|
820
|
+
cost_usd: costUsd,
|
|
821
|
+
latency_ms: latencyMs,
|
|
822
|
+
status_code: statusCode,
|
|
823
|
+
source: "proxy",
|
|
824
|
+
timestamp: new Date().toISOString(),
|
|
825
|
+
tags: { streaming: "true" },
|
|
826
|
+
};
|
|
827
|
+
eventBuffer.add(event);
|
|
828
|
+
}
|
|
829
|
+
function extractAndQueueMetrics(provider, statusCode, responseBody, latencyMs, effectiveAgentId, requestedModel) {
|
|
830
|
+
if (provider === "unknown") {
|
|
831
|
+
log.warn("Unrecognized provider - skipping metric extraction");
|
|
832
|
+
return;
|
|
833
|
+
}
|
|
834
|
+
// Parse the response body as JSON
|
|
835
|
+
let parsedBody;
|
|
836
|
+
try {
|
|
837
|
+
parsedBody = JSON.parse(responseBody.toString("utf-8"));
|
|
838
|
+
}
|
|
839
|
+
catch {
|
|
840
|
+
log.warn(`Could not parse response body as JSON for ${provider} - skipping metric extraction`);
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
const parsed = (0, shared_1.parseProviderResponse)(provider, parsedBody, statusCode);
|
|
844
|
+
if (!parsed) {
|
|
845
|
+
log.warn(`No parser result for provider: ${provider}`);
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
// Calculate cost if we have the necessary token data
|
|
849
|
+
let costUsd = null;
|
|
850
|
+
if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
851
|
+
costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
|
|
852
|
+
}
|
|
853
|
+
// Record response for loop detection
|
|
854
|
+
loop_detector_js_1.loopDetector.recordResponse(effectiveAgentId, responseBody.toString("utf-8"));
|
|
855
|
+
const event = {
|
|
856
|
+
agent_id: effectiveAgentId,
|
|
857
|
+
event_type: "llm_call",
|
|
858
|
+
provider,
|
|
859
|
+
model: parsed.model,
|
|
860
|
+
requested_model: requestedModel,
|
|
861
|
+
tokens_in: parsed.tokensIn,
|
|
862
|
+
tokens_out: parsed.tokensOut,
|
|
863
|
+
tokens_total: parsed.tokensTotal,
|
|
864
|
+
cost_usd: costUsd,
|
|
865
|
+
latency_ms: latencyMs,
|
|
866
|
+
status_code: statusCode,
|
|
867
|
+
source: "proxy",
|
|
868
|
+
timestamp: new Date().toISOString(),
|
|
869
|
+
tags: {},
|
|
870
|
+
};
|
|
871
|
+
eventBuffer.add(event);
|
|
872
|
+
}
|
|
873
|
+
/**
|
|
874
|
+
* Handle simplified route: POST /agents/:agent/:provider
|
|
875
|
+
* All path construction is done internally - user just provides agent and provider.
|
|
876
|
+
*/
|
|
877
|
+
async function handleSimplifiedRoute(req, res, effectiveAgentId, provider, targetUrl, useNativeApi = false) {
|
|
878
|
+
// Provider policy check
|
|
879
|
+
const providerPolicyResult = checkProviderPolicy(db, provider);
|
|
880
|
+
if (!providerPolicyResult.allowed && providerPolicyResult.reason && providerPolicyResult.message) {
|
|
881
|
+
log.info(`[PROXY] Request blocked for provider "${provider}": ${providerPolicyResult.reason}`);
|
|
882
|
+
recordBlockedEvent(db, effectiveAgentId, provider, providerPolicyResult.reason, providerPolicyResult.message);
|
|
883
|
+
if (providerPolicyResult.reason === "provider_rate_limited") {
|
|
884
|
+
const retryAfter = providerRateLimiter.getRetryAfter(provider, provider);
|
|
885
|
+
const rateLimitResponse = generateRateLimitResponse(provider, effectiveAgentId, retryAfter);
|
|
886
|
+
res.setHeader("Retry-After", String(retryAfter));
|
|
887
|
+
sendJson(res, 429, rateLimitResponse);
|
|
532
888
|
}
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
});
|
|
537
|
-
return;
|
|
889
|
+
else {
|
|
890
|
+
const blockedResponse = generateBlockedResponse(provider, providerPolicyResult.reason, providerPolicyResult.message);
|
|
891
|
+
sendJson(res, 200, blockedResponse);
|
|
538
892
|
}
|
|
893
|
+
return;
|
|
539
894
|
}
|
|
540
|
-
//
|
|
541
|
-
const targetUrl = targetBase.replace(/\/+$/, "") + effectivePath;
|
|
542
|
-
// Detect provider early for policy enforcement response format
|
|
543
|
-
const earlyProvider = pathPrefixProvider ?? (0, shared_1.detectProvider)(targetUrl);
|
|
544
|
-
// Policy check: verify agent is allowed to make requests
|
|
895
|
+
// Agent policy check
|
|
545
896
|
const policyResult = checkAgentPolicy(db, effectiveAgentId);
|
|
546
897
|
if (!policyResult.allowed && policyResult.reason && policyResult.message) {
|
|
547
898
|
log.info(`[PROXY] Request blocked for agent "${effectiveAgentId}": ${policyResult.reason}`);
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
// Return a fake LLM response that indicates the block
|
|
551
|
-
const blockedResponse = generateBlockedResponse(earlyProvider, policyResult.reason, policyResult.message);
|
|
899
|
+
recordBlockedEvent(db, effectiveAgentId, provider, policyResult.reason, policyResult.message);
|
|
900
|
+
const blockedResponse = generateBlockedResponse(provider, policyResult.reason, policyResult.message);
|
|
552
901
|
sendJson(res, 200, blockedResponse);
|
|
553
902
|
return;
|
|
554
903
|
}
|
|
555
|
-
|
|
556
|
-
// Read the full request body
|
|
904
|
+
// Read request body
|
|
557
905
|
let requestBody;
|
|
558
906
|
try {
|
|
559
907
|
requestBody = await readRequestBody(req);
|
|
@@ -567,153 +915,191 @@ function startProxy(options) {
|
|
|
567
915
|
}
|
|
568
916
|
return;
|
|
569
917
|
}
|
|
570
|
-
//
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
?? (0, shared_1.detectProviderByHostname)(targetUrl);
|
|
574
|
-
// Model override: check if we should rewrite the model in request body
|
|
575
|
-
let requestedModel = null;
|
|
576
|
-
let actualModel = null;
|
|
577
|
-
let modifiedRequestBody = requestBody;
|
|
578
|
-
if (detectedProviderStrict !== "unknown") {
|
|
918
|
+
// Kill Switch check
|
|
919
|
+
const killSwitchConfig = getKillSwitchConfig(db, effectiveAgentId);
|
|
920
|
+
if (killSwitchConfig.enabled) {
|
|
579
921
|
try {
|
|
580
922
|
const bodyJson = JSON.parse(requestBody.toString("utf-8"));
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
923
|
+
const { promptHash, toolCalls } = loop_detector_js_1.loopDetector.recordRequest(effectiveAgentId, bodyJson);
|
|
924
|
+
const loopCheck = loop_detector_js_1.loopDetector.checkLoop(effectiveAgentId, promptHash, toolCalls);
|
|
925
|
+
if (loopCheck.isLoop) {
|
|
926
|
+
log.warn(`[PROXY] Kill Switch triggered for agent "${effectiveAgentId}": score=${loopCheck.score.toFixed(2)}`);
|
|
927
|
+
const message = `Agent loop detected (score: ${loopCheck.score.toFixed(1)}). Agent deactivated to prevent runaway costs.`;
|
|
928
|
+
if (db) {
|
|
929
|
+
try {
|
|
930
|
+
(0, server_1.updateAgentPolicy)(db, effectiveAgentId, { active: false, deactivated_by: "kill_switch" });
|
|
931
|
+
log.info(`[PROXY] Agent "${effectiveAgentId}" deactivated by Kill Switch`);
|
|
932
|
+
}
|
|
933
|
+
catch (err) {
|
|
934
|
+
log.error("Failed to deactivate agent", { err: String(err) });
|
|
935
|
+
}
|
|
589
936
|
}
|
|
590
|
-
|
|
591
|
-
|
|
937
|
+
recordBlockedEvent(db, effectiveAgentId, provider, "loop_detected", message);
|
|
938
|
+
if (db) {
|
|
939
|
+
try {
|
|
940
|
+
const killSwitchEvent = {
|
|
941
|
+
agent_id: effectiveAgentId,
|
|
942
|
+
event_type: "kill_switch",
|
|
943
|
+
provider,
|
|
944
|
+
model: null,
|
|
945
|
+
tokens_in: null,
|
|
946
|
+
tokens_out: null,
|
|
947
|
+
tokens_total: null,
|
|
948
|
+
cost_usd: null,
|
|
949
|
+
latency_ms: null,
|
|
950
|
+
status_code: 200,
|
|
951
|
+
source: "proxy",
|
|
952
|
+
timestamp: new Date().toISOString(),
|
|
953
|
+
tags: {
|
|
954
|
+
loop_score: loopCheck.score,
|
|
955
|
+
similar_prompts: loopCheck.details.similarPrompts,
|
|
956
|
+
similar_responses: loopCheck.details.similarResponses,
|
|
957
|
+
repeated_tool_calls: loopCheck.details.repeatedToolCalls,
|
|
958
|
+
action: "deactivated",
|
|
959
|
+
},
|
|
960
|
+
};
|
|
961
|
+
(0, server_1.insertEvents)(db, [killSwitchEvent]);
|
|
962
|
+
// Fire kill_switch alert for Telegram/webhook/email notifications
|
|
963
|
+
const killSwitchData = {
|
|
964
|
+
agent_id: effectiveAgentId,
|
|
965
|
+
score: loopCheck.score,
|
|
966
|
+
window_size: killSwitchConfig.windowSize,
|
|
967
|
+
threshold: killSwitchConfig.threshold,
|
|
968
|
+
details: loopCheck.details,
|
|
969
|
+
};
|
|
970
|
+
void (0, server_1.fireKillSwitchAlert)(db, killSwitchData);
|
|
971
|
+
}
|
|
972
|
+
catch (err) {
|
|
973
|
+
log.error("Failed to record kill_switch event", { err: String(err) });
|
|
974
|
+
}
|
|
592
975
|
}
|
|
976
|
+
const blockedResponse = generateBlockedResponse(provider, "inactive", message);
|
|
977
|
+
sendJson(res, 200, blockedResponse);
|
|
978
|
+
return;
|
|
593
979
|
}
|
|
594
980
|
}
|
|
595
981
|
catch {
|
|
596
|
-
// Not JSON
|
|
597
|
-
}
|
|
598
|
-
}
|
|
599
|
-
//
|
|
600
|
-
let
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
// Rate limiting: check before forwarding (strict match only)
|
|
616
|
-
if (detectedProviderStrict !== "unknown") {
|
|
617
|
-
const rateLimitResult = rateLimiter.check(effectiveAgentId, detectedProviderStrict);
|
|
618
|
-
if (!rateLimitResult.allowed) {
|
|
619
|
-
const retryAfter = rateLimitResult.retryAfterSeconds ?? 60;
|
|
620
|
-
const message = `Rate limit exceeded for agent "${effectiveAgentId}" on ${detectedProviderStrict}. Please retry after ${retryAfter} seconds.`;
|
|
621
|
-
res.writeHead(429, {
|
|
622
|
-
"Content-Type": "application/json",
|
|
623
|
-
"Retry-After": String(retryAfter),
|
|
624
|
-
});
|
|
625
|
-
// Return provider-specific error format
|
|
626
|
-
let errorBody;
|
|
627
|
-
if (detectedProviderStrict === "anthropic") {
|
|
628
|
-
// Anthropic error format
|
|
629
|
-
errorBody = {
|
|
630
|
-
type: "error",
|
|
631
|
-
error: {
|
|
632
|
-
type: "rate_limit_error",
|
|
633
|
-
message,
|
|
634
|
-
},
|
|
635
|
-
retry_after_seconds: retryAfter,
|
|
636
|
-
};
|
|
637
|
-
}
|
|
638
|
-
else {
|
|
639
|
-
// OpenAI-style error format (used by most providers)
|
|
640
|
-
errorBody = {
|
|
641
|
-
error: {
|
|
642
|
-
message,
|
|
643
|
-
type: "rate_limit_error",
|
|
644
|
-
param: null,
|
|
645
|
-
code: "rate_limit_exceeded",
|
|
646
|
-
},
|
|
647
|
-
retry_after_seconds: retryAfter,
|
|
648
|
-
};
|
|
982
|
+
// Not JSON body - skip loop detection
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
// Model override and request normalization
|
|
986
|
+
let requestedModel = null;
|
|
987
|
+
let modifiedRequestBody = requestBody;
|
|
988
|
+
try {
|
|
989
|
+
let bodyJson = JSON.parse(requestBody.toString("utf-8"));
|
|
990
|
+
let bodyModified = false;
|
|
991
|
+
// Extract and optionally override model
|
|
992
|
+
if (bodyJson.model) {
|
|
993
|
+
requestedModel = bodyJson.model;
|
|
994
|
+
const modelOverride = getModelOverride(db, effectiveAgentId, provider);
|
|
995
|
+
if (modelOverride) {
|
|
996
|
+
log.info(`[PROXY] Model override: ${requestedModel} → ${modelOverride}`);
|
|
997
|
+
bodyJson.model = modelOverride;
|
|
998
|
+
bodyModified = true;
|
|
649
999
|
}
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
tokens_total: null,
|
|
660
|
-
cost_usd: null,
|
|
661
|
-
latency_ms: null,
|
|
662
|
-
status_code: 429,
|
|
663
|
-
source: "proxy",
|
|
664
|
-
timestamp: new Date().toISOString(),
|
|
665
|
-
tags: { rate_limited: "true" },
|
|
666
|
-
};
|
|
667
|
-
eventBuffer.add(event);
|
|
668
|
-
return;
|
|
1000
|
+
}
|
|
1001
|
+
// Normalize request body for provider compatibility
|
|
1002
|
+
const normalized = normalizeRequestBody(provider, bodyJson, log);
|
|
1003
|
+
if (normalized.modified) {
|
|
1004
|
+
bodyJson = normalized.body;
|
|
1005
|
+
bodyModified = true;
|
|
1006
|
+
}
|
|
1007
|
+
if (bodyModified) {
|
|
1008
|
+
modifiedRequestBody = Buffer.from(JSON.stringify(bodyJson), "utf-8");
|
|
669
1009
|
}
|
|
670
1010
|
}
|
|
671
|
-
|
|
1011
|
+
catch {
|
|
1012
|
+
// Not JSON or parse error - forward as-is
|
|
1013
|
+
}
|
|
1014
|
+
// Rate limiting check
|
|
1015
|
+
const rateLimitResult = rateLimiter.check(effectiveAgentId, provider);
|
|
1016
|
+
if (!rateLimitResult.allowed) {
|
|
1017
|
+
const retryAfter = rateLimitResult.retryAfterSeconds ?? 60;
|
|
1018
|
+
const message = `Rate limit exceeded for agent "${effectiveAgentId}" on ${provider}. Please retry after ${retryAfter} seconds.`;
|
|
1019
|
+
res.writeHead(429, { "Content-Type": "application/json", "Retry-After": String(retryAfter) });
|
|
1020
|
+
const errorBody = provider === "anthropic"
|
|
1021
|
+
? { type: "error", error: { type: "rate_limit_error", message }, retry_after_seconds: retryAfter }
|
|
1022
|
+
: { error: { message, type: "rate_limit_error", param: null, code: "rate_limit_exceeded" }, retry_after_seconds: retryAfter };
|
|
1023
|
+
res.end(JSON.stringify(errorBody));
|
|
1024
|
+
const event = {
|
|
1025
|
+
agent_id: effectiveAgentId,
|
|
1026
|
+
event_type: "error",
|
|
1027
|
+
provider,
|
|
1028
|
+
model: null,
|
|
1029
|
+
tokens_in: null,
|
|
1030
|
+
tokens_out: null,
|
|
1031
|
+
tokens_total: null,
|
|
1032
|
+
cost_usd: null,
|
|
1033
|
+
latency_ms: null,
|
|
1034
|
+
status_code: 429,
|
|
1035
|
+
source: "proxy",
|
|
1036
|
+
timestamp: new Date().toISOString(),
|
|
1037
|
+
tags: { rate_limited: "true" },
|
|
1038
|
+
};
|
|
1039
|
+
eventBuffer.add(event);
|
|
1040
|
+
return;
|
|
1041
|
+
}
|
|
1042
|
+
// Build headers
|
|
672
1043
|
const forwardHeaders = {};
|
|
673
1044
|
for (const [key, value] of Object.entries(req.headers)) {
|
|
674
1045
|
const lowerKey = key.toLowerCase();
|
|
675
|
-
if (lowerKey === "x-target-url" ||
|
|
676
|
-
lowerKey === "host" ||
|
|
677
|
-
lowerKey === "connection" ||
|
|
678
|
-
lowerKey === "content-length" // Let fetch recalculate after body modification
|
|
679
|
-
) {
|
|
1046
|
+
if (lowerKey === "x-target-url" || lowerKey === "host" || lowerKey === "connection" || lowerKey === "content-length") {
|
|
680
1047
|
continue;
|
|
681
1048
|
}
|
|
682
1049
|
if (value !== undefined) {
|
|
683
1050
|
forwardHeaders[key] = Array.isArray(value) ? value.join(", ") : value;
|
|
684
1051
|
}
|
|
685
1052
|
}
|
|
686
|
-
// Inject
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
1053
|
+
// Inject API key
|
|
1054
|
+
const providerKey = providerKeys[provider];
|
|
1055
|
+
if (providerKey) {
|
|
1056
|
+
const authHeader = (0, shared_1.getProviderAuthHeader)(provider, providerKey, useNativeApi);
|
|
1057
|
+
if (authHeader) {
|
|
1058
|
+
const existingAuthKey = Object.keys(forwardHeaders).find(k => k.toLowerCase() === authHeader.name.toLowerCase());
|
|
1059
|
+
if (existingAuthKey)
|
|
1060
|
+
delete forwardHeaders[existingAuthKey];
|
|
1061
|
+
forwardHeaders[authHeader.name] = authHeader.value;
|
|
1062
|
+
log.info(`[PROXY] Injected ${authHeader.name} header for ${provider}${useNativeApi ? " (native API)" : ""}`);
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
else {
|
|
1066
|
+
log.warn(`[PROXY] No API key configured for provider: ${provider}`);
|
|
1067
|
+
}
|
|
1068
|
+
// Add provider-specific required headers
|
|
1069
|
+
if (provider === "anthropic") {
|
|
1070
|
+
// Anthropic requires anthropic-version header
|
|
1071
|
+
if (!forwardHeaders["anthropic-version"]) {
|
|
1072
|
+
forwardHeaders["anthropic-version"] = "2023-06-01";
|
|
1073
|
+
log.info(`[PROXY] Added anthropic-version header`);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
// Debug logging for request details (mask sensitive headers)
|
|
1077
|
+
const maskedHeaders = {};
|
|
1078
|
+
const sensitiveHeaders = ["authorization", "x-api-key", "x-goog-api-key", "api-key"];
|
|
1079
|
+
for (const [key, value] of Object.entries(forwardHeaders)) {
|
|
1080
|
+
if (sensitiveHeaders.includes(key.toLowerCase())) {
|
|
1081
|
+
// Show first 8 chars + masked rest
|
|
1082
|
+
maskedHeaders[key] = value.length > 12 ? `${value.slice(0, 8)}...****` : "****";
|
|
703
1083
|
}
|
|
704
1084
|
else {
|
|
705
|
-
|
|
1085
|
+
maskedHeaders[key] = value;
|
|
706
1086
|
}
|
|
707
1087
|
}
|
|
1088
|
+
log.debug(`[PROXY] Request headers: ${JSON.stringify(maskedHeaders)}`);
|
|
1089
|
+
try {
|
|
1090
|
+
const bodyPreview = modifiedRequestBody.toString("utf-8").slice(0, 2000);
|
|
1091
|
+
log.debug(`[PROXY] Request body: ${bodyPreview}${modifiedRequestBody.length > 2000 ? "... (truncated)" : ""}`);
|
|
1092
|
+
}
|
|
1093
|
+
catch {
|
|
1094
|
+
log.debug(`[PROXY] Request body: (binary, ${modifiedRequestBody.length} bytes)`);
|
|
1095
|
+
}
|
|
708
1096
|
const requestStart = Date.now();
|
|
709
1097
|
let providerResponse;
|
|
710
1098
|
try {
|
|
711
1099
|
providerResponse = await fetch(targetUrl, {
|
|
712
|
-
method,
|
|
1100
|
+
method: "POST",
|
|
713
1101
|
headers: forwardHeaders,
|
|
714
|
-
body:
|
|
715
|
-
? new Uint8Array(modifiedRequestBody)
|
|
716
|
-
: undefined,
|
|
1102
|
+
body: new Uint8Array(modifiedRequestBody),
|
|
717
1103
|
signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
|
|
718
1104
|
});
|
|
719
1105
|
}
|
|
@@ -724,14 +1110,10 @@ function startProxy(options) {
|
|
|
724
1110
|
return;
|
|
725
1111
|
}
|
|
726
1112
|
log.info(`[PROXY] Response: ${providerResponse.status} ${providerResponse.statusText}`);
|
|
727
|
-
// Check if the response is an SSE stream
|
|
728
1113
|
const contentType = providerResponse.headers.get("content-type") ?? "";
|
|
729
1114
|
const isSSE = contentType.includes("text/event-stream");
|
|
730
1115
|
if (isSSE && providerResponse.body) {
|
|
731
|
-
//
|
|
732
|
-
// STREAMING PATH: pipe chunks through to client in real-time,
|
|
733
|
-
// accumulate them for metric extraction after the stream ends.
|
|
734
|
-
// ---------------------------------------------------------------
|
|
1116
|
+
// Streaming response
|
|
735
1117
|
const responseHeaders = {};
|
|
736
1118
|
providerResponse.headers.forEach((value, key) => {
|
|
737
1119
|
responseHeaders[key] = value;
|
|
@@ -762,127 +1144,61 @@ function startProxy(options) {
|
|
|
762
1144
|
const latencyMs = Date.now() - requestStart;
|
|
763
1145
|
const fullBody = Buffer.concat(chunks);
|
|
764
1146
|
try {
|
|
765
|
-
extractStreamingMetrics(
|
|
1147
|
+
extractStreamingMetrics(provider, providerResponse.status, fullBody, latencyMs, effectiveAgentId, requestedModel);
|
|
766
1148
|
}
|
|
767
1149
|
catch (error) {
|
|
768
1150
|
log.error("Streaming metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|
|
769
1151
|
}
|
|
770
1152
|
}
|
|
771
1153
|
else {
|
|
772
|
-
//
|
|
773
|
-
// NON-STREAMING PATH: buffer full response, forward, extract.
|
|
774
|
-
// ---------------------------------------------------------------
|
|
1154
|
+
// Non-streaming response
|
|
775
1155
|
let responseBodyBuffer;
|
|
776
1156
|
try {
|
|
777
1157
|
const arrayBuffer = await providerResponse.arrayBuffer();
|
|
778
1158
|
responseBodyBuffer = Buffer.from(arrayBuffer);
|
|
779
1159
|
}
|
|
780
1160
|
catch {
|
|
781
|
-
sendJson(res, 502, {
|
|
782
|
-
error: "Failed to read upstream response body",
|
|
783
|
-
});
|
|
1161
|
+
sendJson(res, 502, { error: "Failed to read upstream response body" });
|
|
784
1162
|
return;
|
|
785
1163
|
}
|
|
786
1164
|
const latencyMs = Date.now() - requestStart;
|
|
787
|
-
// Forward status code and headers back to the client
|
|
788
1165
|
const responseHeaders = {};
|
|
789
1166
|
providerResponse.headers.forEach((value, key) => {
|
|
790
|
-
// Skip transfer-encoding since we are sending the full body
|
|
791
1167
|
if (key.toLowerCase() === "transfer-encoding")
|
|
792
1168
|
return;
|
|
793
1169
|
responseHeaders[key] = value;
|
|
794
1170
|
});
|
|
1171
|
+
// Debug log error responses
|
|
1172
|
+
if (providerResponse.status >= 400) {
|
|
1173
|
+
try {
|
|
1174
|
+
const errorBody = responseBodyBuffer.toString("utf-8").slice(0, 2000);
|
|
1175
|
+
log.debug(`[PROXY] Error response body: ${errorBody}${responseBodyBuffer.length > 2000 ? "... (truncated)" : ""}`);
|
|
1176
|
+
}
|
|
1177
|
+
catch {
|
|
1178
|
+
log.debug(`[PROXY] Error response body: (binary, ${responseBodyBuffer.length} bytes)`);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
795
1181
|
res.writeHead(providerResponse.status, responseHeaders);
|
|
796
1182
|
res.end(responseBodyBuffer);
|
|
797
|
-
// After response is sent, extract metrics asynchronously
|
|
798
1183
|
try {
|
|
799
|
-
extractAndQueueMetrics(
|
|
1184
|
+
extractAndQueueMetrics(provider, providerResponse.status, responseBodyBuffer, latencyMs, effectiveAgentId, requestedModel);
|
|
800
1185
|
}
|
|
801
1186
|
catch (error) {
|
|
802
1187
|
log.error("Metric extraction error", { err: error instanceof Error ? error.message : String(error) });
|
|
803
1188
|
}
|
|
804
1189
|
}
|
|
805
1190
|
}
|
|
806
|
-
function extractStreamingMetrics(provider, statusCode, sseBody, latencyMs, effectiveAgentId, requestedModel) {
|
|
807
|
-
if (provider === "unknown") {
|
|
808
|
-
log.warn("Unrecognized provider - skipping streaming metric extraction");
|
|
809
|
-
return;
|
|
810
|
-
}
|
|
811
|
-
const sseText = sseBody.toString("utf-8");
|
|
812
|
-
const parsed = parseSSEResponse(provider, sseText, statusCode);
|
|
813
|
-
if (!parsed) {
|
|
814
|
-
log.warn(`No parseable SSE data for provider: ${provider} — skipping event`);
|
|
815
|
-
return;
|
|
816
|
-
}
|
|
817
|
-
let costUsd = null;
|
|
818
|
-
if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
819
|
-
costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
|
|
820
|
-
}
|
|
821
|
-
const event = {
|
|
822
|
-
agent_id: effectiveAgentId,
|
|
823
|
-
event_type: "llm_call",
|
|
824
|
-
provider,
|
|
825
|
-
model: parsed.model,
|
|
826
|
-
requested_model: requestedModel,
|
|
827
|
-
tokens_in: parsed.tokensIn,
|
|
828
|
-
tokens_out: parsed.tokensOut,
|
|
829
|
-
tokens_total: parsed.tokensTotal,
|
|
830
|
-
cost_usd: costUsd,
|
|
831
|
-
latency_ms: latencyMs,
|
|
832
|
-
status_code: statusCode,
|
|
833
|
-
source: "proxy",
|
|
834
|
-
timestamp: new Date().toISOString(),
|
|
835
|
-
tags: { streaming: "true" },
|
|
836
|
-
};
|
|
837
|
-
eventBuffer.add(event);
|
|
838
|
-
}
|
|
839
|
-
function extractAndQueueMetrics(provider, statusCode, responseBody, latencyMs, effectiveAgentId, requestedModel) {
|
|
840
|
-
if (provider === "unknown") {
|
|
841
|
-
log.warn("Unrecognized provider - skipping metric extraction");
|
|
842
|
-
return;
|
|
843
|
-
}
|
|
844
|
-
// Parse the response body as JSON
|
|
845
|
-
let parsedBody;
|
|
846
|
-
try {
|
|
847
|
-
parsedBody = JSON.parse(responseBody.toString("utf-8"));
|
|
848
|
-
}
|
|
849
|
-
catch {
|
|
850
|
-
log.warn(`Could not parse response body as JSON for ${provider} - skipping metric extraction`);
|
|
851
|
-
return;
|
|
852
|
-
}
|
|
853
|
-
const parsed = (0, shared_1.parseProviderResponse)(provider, parsedBody, statusCode);
|
|
854
|
-
if (!parsed) {
|
|
855
|
-
log.warn(`No parser result for provider: ${provider}`);
|
|
856
|
-
return;
|
|
857
|
-
}
|
|
858
|
-
// Calculate cost if we have the necessary token data
|
|
859
|
-
let costUsd = null;
|
|
860
|
-
if (parsed.model && parsed.tokensIn != null && parsed.tokensOut != null) {
|
|
861
|
-
costUsd = (0, shared_1.calculateCost)(parsed.model, parsed.tokensIn, parsed.tokensOut);
|
|
862
|
-
}
|
|
863
|
-
const event = {
|
|
864
|
-
agent_id: effectiveAgentId,
|
|
865
|
-
event_type: "llm_call",
|
|
866
|
-
provider,
|
|
867
|
-
model: parsed.model,
|
|
868
|
-
requested_model: requestedModel,
|
|
869
|
-
tokens_in: parsed.tokensIn,
|
|
870
|
-
tokens_out: parsed.tokensOut,
|
|
871
|
-
tokens_total: parsed.tokensTotal,
|
|
872
|
-
cost_usd: costUsd,
|
|
873
|
-
latency_ms: latencyMs,
|
|
874
|
-
status_code: statusCode,
|
|
875
|
-
source: "proxy",
|
|
876
|
-
timestamp: new Date().toISOString(),
|
|
877
|
-
tags: {},
|
|
878
|
-
};
|
|
879
|
-
eventBuffer.add(event);
|
|
880
|
-
}
|
|
881
1191
|
server.listen(port);
|
|
1192
|
+
// Start loop detector cleanup timer (cleans inactive agents every hour)
|
|
1193
|
+
loop_detector_js_1.loopDetector.startCleanup();
|
|
882
1194
|
async function shutdown() {
|
|
883
1195
|
if (rateLimitRefreshTimer) {
|
|
884
1196
|
clearInterval(rateLimitRefreshTimer);
|
|
885
1197
|
}
|
|
1198
|
+
if (providerKeysRefreshTimer) {
|
|
1199
|
+
clearInterval(providerKeysRefreshTimer);
|
|
1200
|
+
}
|
|
1201
|
+
loop_detector_js_1.loopDetector.stopCleanup();
|
|
886
1202
|
await eventBuffer.shutdown();
|
|
887
1203
|
return new Promise((resolve, reject) => {
|
|
888
1204
|
server.close((err) => {
|