@juspay/neurolink 9.41.0 → 9.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +7 -1
- package/dist/auth/anthropicOAuth.d.ts +18 -3
- package/dist/auth/anthropicOAuth.js +149 -4
- package/dist/auth/providers/firebase.js +5 -1
- package/dist/auth/providers/jwt.js +5 -1
- package/dist/auth/providers/workos.js +5 -1
- package/dist/auth/sessionManager.d.ts +1 -1
- package/dist/auth/sessionManager.js +58 -27
- package/dist/browser/neurolink.min.js +354 -334
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +188 -181
- package/dist/cli/commands/proxy.d.ts +2 -1
- package/dist/cli/commands/proxy.js +713 -431
- package/dist/cli/commands/task.js +3 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +38 -0
- package/dist/cli/parser.js +4 -3
- package/dist/client/aiSdkAdapter.js +3 -0
- package/dist/client/streamingClient.js +30 -10
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +208 -230
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +138 -188
- package/dist/core/modules/GenerationHandler.js +3 -2
- package/dist/core/redisConversationMemoryManager.js +7 -3
- package/dist/evaluation/BatchEvaluator.js +4 -1
- package/dist/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
- package/dist/lib/auth/anthropicOAuth.js +149 -4
- package/dist/lib/auth/providers/firebase.js +5 -1
- package/dist/lib/auth/providers/jwt.js +5 -1
- package/dist/lib/auth/providers/workos.js +5 -1
- package/dist/lib/auth/sessionManager.d.ts +1 -1
- package/dist/lib/auth/sessionManager.js +58 -27
- package/dist/lib/client/aiSdkAdapter.js +3 -0
- package/dist/lib/client/streamingClient.js +30 -10
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +208 -230
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +138 -188
- package/dist/lib/core/modules/GenerationHandler.js +3 -2
- package/dist/lib/core/redisConversationMemoryManager.js +7 -3
- package/dist/lib/evaluation/BatchEvaluator.js +4 -1
- package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +41 -2
- package/dist/lib/neurolink.js +1616 -1681
- package/dist/lib/observability/otelBridge.d.ts +2 -2
- package/dist/lib/observability/otelBridge.js +12 -3
- package/dist/lib/providers/amazonBedrock.js +2 -4
- package/dist/lib/providers/anthropic.d.ts +9 -5
- package/dist/lib/providers/anthropic.js +19 -14
- package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/lib/providers/anthropicBaseProvider.js +5 -4
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +5 -4
- package/dist/lib/providers/googleAiStudio.js +30 -6
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +437 -423
- package/dist/lib/providers/huggingFace.d.ts +3 -3
- package/dist/lib/providers/huggingFace.js +6 -8
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +76 -55
- package/dist/lib/providers/mistral.js +2 -1
- package/dist/lib/providers/ollama.js +93 -23
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +141 -141
- package/dist/lib/providers/openRouter.js +2 -1
- package/dist/lib/providers/openaiCompatible.d.ts +4 -4
- package/dist/lib/providers/openaiCompatible.js +4 -4
- package/dist/lib/proxy/claudeFormat.d.ts +3 -2
- package/dist/lib/proxy/claudeFormat.js +27 -14
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/lib/proxy/modelRouter.js +3 -0
- package/dist/lib/proxy/oauthFetch.d.ts +1 -1
- package/dist/lib/proxy/oauthFetch.js +289 -316
- package/dist/lib/proxy/proxyConfig.js +46 -24
- package/dist/lib/proxy/proxyEnv.d.ts +19 -0
- package/dist/lib/proxy/proxyEnv.js +73 -0
- package/dist/lib/proxy/proxyFetch.js +291 -217
- package/dist/lib/proxy/proxyTracer.d.ts +133 -0
- package/dist/lib/proxy/proxyTracer.js +645 -0
- package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/lib/proxy/rawStreamCapture.js +83 -0
- package/dist/lib/proxy/requestLogger.d.ts +32 -5
- package/dist/lib/proxy/requestLogger.js +503 -47
- package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
- package/dist/lib/proxy/sseInterceptor.js +427 -0
- package/dist/lib/proxy/usageStats.d.ts +4 -3
- package/dist/lib/proxy/usageStats.js +25 -12
- package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/lib/rag/chunking/markdownChunker.js +15 -6
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
- package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/lib/tasks/store/redisTaskStore.js +54 -39
- package/dist/lib/tasks/taskManager.d.ts +5 -0
- package/dist/lib/tasks/taskManager.js +158 -30
- package/dist/lib/telemetry/index.d.ts +2 -1
- package/dist/lib/telemetry/index.js +2 -1
- package/dist/lib/telemetry/telemetryService.d.ts +3 -0
- package/dist/lib/telemetry/telemetryService.js +69 -5
- package/dist/lib/types/cli.d.ts +10 -0
- package/dist/lib/types/proxyTypes.d.ts +160 -5
- package/dist/lib/types/streamTypes.d.ts +25 -3
- package/dist/lib/utils/messageBuilder.js +3 -2
- package/dist/lib/utils/providerHealth.d.ts +19 -0
- package/dist/lib/utils/providerHealth.js +279 -33
- package/dist/lib/utils/providerUtils.js +17 -22
- package/dist/lib/utils/toolChoice.d.ts +4 -0
- package/dist/lib/utils/toolChoice.js +7 -0
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +41 -2
- package/dist/neurolink.js +1616 -1681
- package/dist/observability/otelBridge.d.ts +2 -2
- package/dist/observability/otelBridge.js +12 -3
- package/dist/providers/amazonBedrock.js +2 -4
- package/dist/providers/anthropic.d.ts +9 -5
- package/dist/providers/anthropic.js +19 -14
- package/dist/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/providers/anthropicBaseProvider.js +5 -4
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +5 -4
- package/dist/providers/googleAiStudio.js +30 -6
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +437 -423
- package/dist/providers/huggingFace.d.ts +3 -3
- package/dist/providers/huggingFace.js +6 -7
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +76 -55
- package/dist/providers/mistral.js +2 -1
- package/dist/providers/ollama.js +93 -23
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +141 -141
- package/dist/providers/openRouter.js +2 -1
- package/dist/providers/openaiCompatible.d.ts +4 -4
- package/dist/providers/openaiCompatible.js +4 -3
- package/dist/proxy/claudeFormat.d.ts +3 -2
- package/dist/proxy/claudeFormat.js +27 -14
- package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/proxy/modelRouter.js +3 -0
- package/dist/proxy/oauthFetch.d.ts +1 -1
- package/dist/proxy/oauthFetch.js +289 -316
- package/dist/proxy/proxyConfig.js +46 -24
- package/dist/proxy/proxyEnv.d.ts +19 -0
- package/dist/proxy/proxyEnv.js +72 -0
- package/dist/proxy/proxyFetch.js +291 -217
- package/dist/proxy/proxyTracer.d.ts +133 -0
- package/dist/proxy/proxyTracer.js +644 -0
- package/dist/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/proxy/rawStreamCapture.js +82 -0
- package/dist/proxy/requestLogger.d.ts +32 -5
- package/dist/proxy/requestLogger.js +503 -47
- package/dist/proxy/sseInterceptor.d.ts +97 -0
- package/dist/proxy/sseInterceptor.js +426 -0
- package/dist/proxy/usageStats.d.ts +4 -3
- package/dist/proxy/usageStats.js +25 -12
- package/dist/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/rag/chunking/markdownChunker.js +15 -6
- package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
- package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
- package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/services/server/ai/observability/instrumentation.js +337 -161
- package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/tasks/backends/bullmqBackend.js +35 -22
- package/dist/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/tasks/store/redisTaskStore.js +54 -39
- package/dist/tasks/taskManager.d.ts +5 -0
- package/dist/tasks/taskManager.js +158 -30
- package/dist/telemetry/index.d.ts +2 -1
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/telemetryService.d.ts +3 -0
- package/dist/telemetry/telemetryService.js +69 -5
- package/dist/types/cli.d.ts +10 -0
- package/dist/types/proxyTypes.d.ts +160 -5
- package/dist/types/streamTypes.d.ts +25 -3
- package/dist/utils/messageBuilder.js +3 -2
- package/dist/utils/providerHealth.d.ts +19 -0
- package/dist/utils/providerHealth.js +279 -33
- package/dist/utils/providerUtils.js +18 -22
- package/dist/utils/toolChoice.d.ts +4 -0
- package/dist/utils/toolChoice.js +6 -0
- package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
- package/docs/changelog.md +252 -0
- package/package.json +19 -2
- package/scripts/observability/check-proxy-telemetry.mjs +235 -0
- package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
- package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
- package/scripts/observability/manage-local-openobserve.sh +215 -0
- package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
- package/scripts/observability/proxy-observability.env.example +23 -0
|
@@ -9,20 +9,24 @@
|
|
|
9
9
|
* provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
|
|
10
10
|
* Without a router, models are passed through to the Anthropic provider.
|
|
11
11
|
*/
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
12
|
+
import { randomUUID } from "node:crypto";
|
|
13
|
+
import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
|
|
14
14
|
import { homedir } from "node:os";
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
import { recordRequest, recordSuccess, recordError, recordCooldown, } from "../../proxy/usageStats.js";
|
|
18
|
-
import { logRequest, logFullRequestResponse, logStreamError, } from "../../proxy/requestLogger.js";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
|
|
19
17
|
import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
|
|
20
|
-
import {
|
|
18
|
+
import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
|
|
19
|
+
import { ProxyTracer } from "../../proxy/proxyTracer.js";
|
|
20
|
+
import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
21
|
+
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
|
|
22
|
+
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
23
|
+
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
|
|
24
|
+
import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
25
|
+
import { logger } from "../../utils/logger.js";
|
|
26
|
+
import { ProviderHealthChecker } from "../../utils/providerHealth.js";
|
|
21
27
|
// ---------------------------------------------------------------------------
|
|
22
28
|
// Helpers
|
|
23
29
|
// ---------------------------------------------------------------------------
|
|
24
|
-
/** Header names whose values must be masked in debug logs. */
|
|
25
|
-
const SENSITIVE_HEADERS = new Set(["authorization", "x-api-key"]);
|
|
26
30
|
/** Headers that must never be forwarded upstream to Anthropic. */
|
|
27
31
|
const BLOCKED_UPSTREAM_HEADERS = new Set([
|
|
28
32
|
"cookie",
|
|
@@ -32,22 +36,6 @@ const BLOCKED_UPSTREAM_HEADERS = new Set([
|
|
|
32
36
|
"content-length",
|
|
33
37
|
"transfer-encoding",
|
|
34
38
|
]);
|
|
35
|
-
/** Return a shallow copy of `headers` with sensitive values redacted. */
|
|
36
|
-
function redactSensitiveHeaders(headers) {
|
|
37
|
-
const redacted = {};
|
|
38
|
-
for (const [key, value] of Object.entries(headers)) {
|
|
39
|
-
if (SENSITIVE_HEADERS.has(key.toLowerCase()) && value.length > 8) {
|
|
40
|
-
redacted[key] = value.substring(0, 8) + "...";
|
|
41
|
-
}
|
|
42
|
-
else if (SENSITIVE_HEADERS.has(key.toLowerCase())) {
|
|
43
|
-
redacted[key] = "***";
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
46
|
-
redacted[key] = value;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
return redacted;
|
|
50
|
-
}
|
|
51
39
|
// ---------------------------------------------------------------------------
|
|
52
40
|
// Module-level state
|
|
53
41
|
// ---------------------------------------------------------------------------
|
|
@@ -83,65 +71,196 @@ function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
|
|
|
83
71
|
}
|
|
84
72
|
primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount;
|
|
85
73
|
}
|
|
86
|
-
// ---------------------------------------------------------------------------
|
|
87
|
-
// OAuth polyfill helpers (extracted to reduce block nesting)
|
|
88
|
-
// ---------------------------------------------------------------------------
|
|
89
74
|
const snapshotCache = new Map();
|
|
90
75
|
const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
76
|
+
const SNAPSHOT_STABLE_HEADERS = new Set([
|
|
77
|
+
"accept",
|
|
78
|
+
"accept-encoding",
|
|
79
|
+
"accept-language",
|
|
80
|
+
"anthropic-beta",
|
|
81
|
+
"anthropic-dangerous-direct-browser-access",
|
|
82
|
+
"anthropic-version",
|
|
83
|
+
"sec-fetch-mode",
|
|
84
|
+
"user-agent",
|
|
85
|
+
"x-app",
|
|
86
|
+
"x-stainless-arch",
|
|
87
|
+
"x-stainless-lang",
|
|
88
|
+
"x-stainless-os",
|
|
89
|
+
"x-stainless-package-version",
|
|
90
|
+
"x-stainless-retry-count",
|
|
91
|
+
"x-stainless-runtime",
|
|
92
|
+
"x-stainless-runtime-version",
|
|
93
|
+
"x-stainless-timeout",
|
|
94
|
+
"x-subscription-tier",
|
|
95
|
+
]);
|
|
96
|
+
const NON_CLAUDE_OAUTH_BETAS = [
|
|
97
|
+
"oauth-2025-04-20",
|
|
98
|
+
"claude-code-20250219",
|
|
99
|
+
"fine-grained-tool-streaming-2025-05-14",
|
|
100
|
+
];
|
|
101
|
+
function getSnapshotSafeLabel(accountLabel) {
|
|
102
|
+
return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
|
|
103
|
+
}
|
|
104
|
+
function getSnapshotPath(accountLabel) {
|
|
105
|
+
return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`);
|
|
106
|
+
}
|
|
107
|
+
function applySnapshotHeaders(headers, snapshot) {
|
|
108
|
+
if (!snapshot?.headers) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
for (const [sk, sv] of Object.entries(snapshot.headers)) {
|
|
112
|
+
const lower = sk.toLowerCase();
|
|
113
|
+
if (typeof sv === "string" &&
|
|
114
|
+
!headers[lower] &&
|
|
115
|
+
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
116
|
+
lower !== "authorization" &&
|
|
117
|
+
lower !== "x-api-key" &&
|
|
118
|
+
lower !== "x-claude-code-session-id") {
|
|
119
|
+
headers[lower] = sv;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async function loadClaudeSnapshot(accountLabel) {
|
|
97
124
|
try {
|
|
98
|
-
|
|
99
|
-
const safeLabel = accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
|
|
100
|
-
// Check cache first
|
|
125
|
+
const safeLabel = getSnapshotSafeLabel(accountLabel);
|
|
101
126
|
const cached = snapshotCache.get(safeLabel);
|
|
102
127
|
if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) {
|
|
103
|
-
|
|
104
|
-
const lower = sk.toLowerCase();
|
|
105
|
-
if (typeof sv === "string" &&
|
|
106
|
-
!headers[lower] &&
|
|
107
|
-
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
108
|
-
lower !== "authorization" &&
|
|
109
|
-
lower !== "x-api-key") {
|
|
110
|
-
headers[lower] = sv;
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
return;
|
|
128
|
+
return cached.snapshot;
|
|
114
129
|
}
|
|
115
|
-
const snapshotPath =
|
|
130
|
+
const snapshotPath = getSnapshotPath(accountLabel);
|
|
116
131
|
try {
|
|
117
132
|
await access(snapshotPath);
|
|
118
133
|
}
|
|
119
134
|
catch {
|
|
120
|
-
return;
|
|
135
|
+
return null;
|
|
121
136
|
}
|
|
122
137
|
const snapshot = JSON.parse(await readFile(snapshotPath, "utf8"));
|
|
123
|
-
if (!snapshot
|
|
124
|
-
return;
|
|
138
|
+
if (!snapshot || typeof snapshot !== "object") {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
const normalized = {
|
|
142
|
+
accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string"
|
|
143
|
+
? snapshot.accountKey
|
|
144
|
+
: `anthropic:${accountLabel}`,
|
|
145
|
+
capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string"
|
|
146
|
+
? snapshot.capturedAt
|
|
147
|
+
: new Date(0).toISOString(),
|
|
148
|
+
source: "claude-code",
|
|
149
|
+
headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {},
|
|
150
|
+
...(snapshot.body ? { body: snapshot.body } : {}),
|
|
151
|
+
};
|
|
152
|
+
if (Object.keys(normalized.headers).length === 0 &&
|
|
153
|
+
Object.keys(normalized.body ?? {}).length === 0) {
|
|
154
|
+
return null;
|
|
125
155
|
}
|
|
126
|
-
// Store in cache
|
|
127
156
|
snapshotCache.set(safeLabel, {
|
|
128
|
-
|
|
157
|
+
snapshot: normalized,
|
|
129
158
|
loadedAt: Date.now(),
|
|
130
159
|
});
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
160
|
+
return normalized;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
function buildSnapshotHeaders(headers, existingHeaders) {
|
|
167
|
+
const merged = { ...(existingHeaders ?? {}) };
|
|
168
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
169
|
+
const lower = key.toLowerCase();
|
|
170
|
+
if (typeof value === "string" &&
|
|
171
|
+
SNAPSHOT_STABLE_HEADERS.has(lower) &&
|
|
172
|
+
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
173
|
+
lower !== "authorization" &&
|
|
174
|
+
lower !== "x-api-key" &&
|
|
175
|
+
lower !== "x-claude-code-session-id") {
|
|
176
|
+
merged[lower] = value;
|
|
140
177
|
}
|
|
141
178
|
}
|
|
179
|
+
return merged;
|
|
180
|
+
}
|
|
181
|
+
function extractSnapshotBody(body) {
|
|
182
|
+
if (!body || typeof body !== "object") {
|
|
183
|
+
return undefined;
|
|
184
|
+
}
|
|
185
|
+
const parsed = body;
|
|
186
|
+
const identity = parseClaudeCodeUserId(parsed.metadata?.user_id);
|
|
187
|
+
const systemBlocks = Array.isArray(parsed.system)
|
|
188
|
+
? parsed.system
|
|
189
|
+
: typeof parsed.system === "string"
|
|
190
|
+
? [{ type: "text", text: parsed.system }]
|
|
191
|
+
: [];
|
|
192
|
+
const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" &&
|
|
193
|
+
block.text.includes("x-anthropic-billing-header"))?.text;
|
|
194
|
+
const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" &&
|
|
195
|
+
block.text.includes("Claude Agent SDK"))?.text;
|
|
196
|
+
if (!identity && !billingHeader && !agentBlock) {
|
|
197
|
+
return undefined;
|
|
198
|
+
}
|
|
199
|
+
return {
|
|
200
|
+
...(identity ? { metadataUserId: identity.metadataUserId } : {}),
|
|
201
|
+
...(identity ? { sessionId: identity.sessionId } : {}),
|
|
202
|
+
...(billingHeader ? { billingHeader } : {}),
|
|
203
|
+
...(agentBlock ? { agentBlock } : {}),
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
function isLikelyClaudeClient(headers, snapshotBody) {
|
|
207
|
+
return (typeof headers["x-claude-code-session-id"] === "string" ||
|
|
208
|
+
headers["user-agent"]?.startsWith("claude-cli/") ||
|
|
209
|
+
!!snapshotBody?.metadataUserId ||
|
|
210
|
+
!!snapshotBody?.billingHeader ||
|
|
211
|
+
!!snapshotBody?.agentBlock);
|
|
212
|
+
}
|
|
213
|
+
function snapshotsMatch(existing, next) {
|
|
214
|
+
if (!existing) {
|
|
215
|
+
return false;
|
|
216
|
+
}
|
|
217
|
+
return (JSON.stringify(existing.headers ?? {}) ===
|
|
218
|
+
JSON.stringify(next.headers ?? {}) &&
|
|
219
|
+
JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {}));
|
|
220
|
+
}
|
|
221
|
+
async function persistClaudeSnapshot(accountLabel, snapshot) {
|
|
222
|
+
const snapshotPath = getSnapshotPath(accountLabel);
|
|
223
|
+
const dirPath = join(homedir(), ".neurolink", "header-snapshots");
|
|
224
|
+
await mkdir(dirPath, { recursive: true });
|
|
225
|
+
const tmpPath = `${snapshotPath}.${process.pid}.${randomUUID()}.tmp`;
|
|
226
|
+
await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
|
|
227
|
+
await rename(tmpPath, snapshotPath);
|
|
228
|
+
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
|
|
229
|
+
snapshot,
|
|
230
|
+
loadedAt: Date.now(),
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) {
|
|
234
|
+
const existing = await loadClaudeSnapshot(accountLabel);
|
|
235
|
+
let parsedBody;
|
|
236
|
+
try {
|
|
237
|
+
parsedBody = JSON.parse(bodyStr);
|
|
238
|
+
}
|
|
142
239
|
catch {
|
|
143
|
-
|
|
240
|
+
return existing;
|
|
241
|
+
}
|
|
242
|
+
const body = extractSnapshotBody(parsedBody);
|
|
243
|
+
if (!isLikelyClaudeClient(headers, body)) {
|
|
244
|
+
return existing;
|
|
245
|
+
}
|
|
246
|
+
const next = {
|
|
247
|
+
accountKey,
|
|
248
|
+
capturedAt: new Date().toISOString(),
|
|
249
|
+
source: "claude-code",
|
|
250
|
+
headers: buildSnapshotHeaders(headers, existing?.headers),
|
|
251
|
+
body: {
|
|
252
|
+
...(existing?.body ?? {}),
|
|
253
|
+
...(body ?? {}),
|
|
254
|
+
...(typeof headers["x-claude-code-session-id"] === "string"
|
|
255
|
+
? { sessionId: headers["x-claude-code-session-id"] }
|
|
256
|
+
: {}),
|
|
257
|
+
},
|
|
258
|
+
};
|
|
259
|
+
if (snapshotsMatch(existing, next)) {
|
|
260
|
+
return existing;
|
|
144
261
|
}
|
|
262
|
+
await persistClaudeSnapshot(accountLabel, next);
|
|
263
|
+
return next;
|
|
145
264
|
}
|
|
146
265
|
/**
|
|
147
266
|
* Polyfill the request body for OAuth accounts.
|
|
@@ -149,59 +268,78 @@ async function applyHeaderSnapshot(headers, accountLabel) {
|
|
|
149
268
|
* into the body. Non-CC clients (Curator, custom apps) don't send these —
|
|
150
269
|
* Anthropic rejects without them.
|
|
151
270
|
*/
|
|
152
|
-
function polyfillOAuthBody(bodyStr, accountToken) {
|
|
271
|
+
function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) {
|
|
153
272
|
try {
|
|
154
273
|
const parsed = JSON.parse(bodyStr);
|
|
155
274
|
// Billing header block (required by Anthropic for OAuth)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
text: `x-anthropic-billing-header: cc_version=2.1.86.${randomHex}; cc_entrypoint=cli; cch=proxy;`,
|
|
160
|
-
};
|
|
275
|
+
// NOTE: This block MUST be deterministic (no random values) to preserve
|
|
276
|
+
// Anthropic's prompt caching prefix chain. We keep the real Claude Code
|
|
277
|
+
// version/entrypoint shape when present, but stabilize the volatile cch.
|
|
161
278
|
const agentBlock = {
|
|
162
279
|
type: "text",
|
|
163
|
-
text:
|
|
280
|
+
text: snapshot?.body?.agentBlock ||
|
|
281
|
+
"You are a Claude agent, built on Anthropic's Claude Agent SDK.",
|
|
164
282
|
};
|
|
165
|
-
// Normalise system to array and
|
|
283
|
+
// Normalise system to array and APPEND billing + agent blocks.
|
|
284
|
+
// IMPORTANT: We append (not prepend) to preserve the client's cache
|
|
285
|
+
// prefix chain. Anthropic's prompt caching uses prefix matching — if we
|
|
286
|
+
// insert anything before the client's system blocks, we invalidate all
|
|
287
|
+
// cached content (tools, system prompt, message history).
|
|
288
|
+
//
|
|
289
|
+
// Claude Code sends a billing block with a `cch=<hash>` value that changes
|
|
290
|
+
// on every request. We fix this by:
|
|
291
|
+
// 1. Removing the client's billing block from its current position
|
|
292
|
+
// 2. Stabilizing it while keeping the official Claude Code shape
|
|
293
|
+
// 3. Appending it at the END so the cacheable system blocks stay
|
|
294
|
+
// at the front of the prefix chain
|
|
166
295
|
if (parsed.system) {
|
|
167
296
|
if (typeof parsed.system === "string") {
|
|
168
297
|
parsed.system = [{ type: "text", text: parsed.system }];
|
|
169
298
|
}
|
|
170
299
|
if (Array.isArray(parsed.system)) {
|
|
171
|
-
|
|
300
|
+
// Find and remove existing billing/agent blocks from wherever
|
|
301
|
+
// the client placed them (typically at system[0])
|
|
302
|
+
const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" &&
|
|
172
303
|
b.text.includes("x-anthropic-billing-header"));
|
|
173
|
-
const
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
304
|
+
const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
|
|
305
|
+
const billingBlock = {
|
|
306
|
+
type: "text",
|
|
307
|
+
text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader),
|
|
308
|
+
};
|
|
309
|
+
// Remove in reverse index order so indices stay valid
|
|
310
|
+
const indicesToRemove = [billingIdx, agentIdx]
|
|
311
|
+
.filter((i) => i >= 0)
|
|
312
|
+
.sort((a, b) => b - a);
|
|
313
|
+
for (const idx of indicesToRemove) {
|
|
314
|
+
parsed.system.splice(idx, 1);
|
|
183
315
|
}
|
|
316
|
+
// Always append a deterministic billing block at the end.
|
|
317
|
+
// If the client sent one, we stripped its dynamic cch= and use
|
|
318
|
+
// our stable version instead. If not, we add ours.
|
|
319
|
+
parsed.system = [...parsed.system, billingBlock, agentBlock];
|
|
184
320
|
}
|
|
185
321
|
}
|
|
186
322
|
else {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if (!parsed.metadata?.user_id) {
|
|
191
|
-
const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
|
|
192
|
-
const hash = Array.from(new TextEncoder().encode(tokenPrefix))
|
|
193
|
-
.reduce((a, b) => ((a << 5) - a + b) | 0, 0)
|
|
194
|
-
.toString(16)
|
|
195
|
-
.replace("-", "");
|
|
196
|
-
parsed.metadata = {
|
|
197
|
-
...parsed.metadata,
|
|
198
|
-
user_id: `proxy-${hash}`,
|
|
323
|
+
const billingBlock = {
|
|
324
|
+
type: "text",
|
|
325
|
+
text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader),
|
|
199
326
|
};
|
|
327
|
+
parsed.system = [billingBlock, agentBlock];
|
|
200
328
|
}
|
|
201
|
-
|
|
329
|
+
// Inject Claude-Code-shaped metadata.user_id (required for OAuth).
|
|
330
|
+
const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
|
|
331
|
+
const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, {
|
|
332
|
+
existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId,
|
|
333
|
+
preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId,
|
|
334
|
+
});
|
|
335
|
+
parsed.metadata = {
|
|
336
|
+
...parsed.metadata,
|
|
337
|
+
user_id: identity.metadataUserId,
|
|
338
|
+
};
|
|
339
|
+
return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId };
|
|
202
340
|
}
|
|
203
341
|
catch {
|
|
204
|
-
return bodyStr; // JSON parse failed — use original body
|
|
342
|
+
return { bodyStr }; // JSON parse failed — use original body
|
|
205
343
|
}
|
|
206
344
|
}
|
|
207
345
|
// ---------------------------------------------------------------------------
|
|
@@ -256,6 +394,2617 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
|
|
|
256
394
|
persistTarget: { credPath: legacyCredPath },
|
|
257
395
|
};
|
|
258
396
|
}
|
|
397
|
+
async function handleTranslatedClaudeRequest(args) {
|
|
398
|
+
const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args;
|
|
399
|
+
tracer?.setMode("full");
|
|
400
|
+
const parsed = parseClaudeRequest(body);
|
|
401
|
+
const attempts = buildProxyTranslationAttempts({
|
|
402
|
+
provider: route.provider,
|
|
403
|
+
model: route.model,
|
|
404
|
+
}, modelRouter, parsed);
|
|
405
|
+
if (body.stream) {
|
|
406
|
+
return handleTranslatedClaudeStreamRequest({
|
|
407
|
+
ctx,
|
|
408
|
+
body,
|
|
409
|
+
attempts,
|
|
410
|
+
parsed,
|
|
411
|
+
tracer,
|
|
412
|
+
requestStartTime,
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
return handleTranslatedClaudeJsonRequest({
|
|
416
|
+
ctx,
|
|
417
|
+
body,
|
|
418
|
+
attempts,
|
|
419
|
+
parsed,
|
|
420
|
+
tracer,
|
|
421
|
+
requestStartTime,
|
|
422
|
+
logProxyBody,
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
async function handleTranslatedClaudeStreamRequest(args) {
|
|
426
|
+
const { ctx, body, attempts, parsed, tracer, requestStartTime } = args;
|
|
427
|
+
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
428
|
+
const KEEPALIVE_INTERVAL_MS = 15_000;
|
|
429
|
+
const encoder = new TextEncoder();
|
|
430
|
+
let translationKeepAliveTimer;
|
|
431
|
+
let translationCancelled = false;
|
|
432
|
+
let translationSucceeded = false;
|
|
433
|
+
let translatedModel;
|
|
434
|
+
let finalStreamError = "No translation providers succeeded";
|
|
435
|
+
let upstreamIterator;
|
|
436
|
+
const translationStream = new ReadableStream({
|
|
437
|
+
async start(controller) {
|
|
438
|
+
for (const frame of serializer.start()) {
|
|
439
|
+
controller.enqueue(encoder.encode(frame));
|
|
440
|
+
}
|
|
441
|
+
translationKeepAliveTimer = setInterval(() => {
|
|
442
|
+
try {
|
|
443
|
+
controller.enqueue(encoder.encode(": keep-alive\n\n"));
|
|
444
|
+
}
|
|
445
|
+
catch {
|
|
446
|
+
// Controller already closed.
|
|
447
|
+
}
|
|
448
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
449
|
+
try {
|
|
450
|
+
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
|
|
451
|
+
const attempt = attempts[attemptIndex];
|
|
452
|
+
if (attemptIndex > 0) {
|
|
453
|
+
logger.always(`[proxy] fallback → ${attempt.label}`);
|
|
454
|
+
}
|
|
455
|
+
let collectedText = "";
|
|
456
|
+
try {
|
|
457
|
+
const options = buildProxyFallbackOptions(parsed, attempt.provider
|
|
458
|
+
? {
|
|
459
|
+
provider: attempt.provider,
|
|
460
|
+
model: attempt.model,
|
|
461
|
+
}
|
|
462
|
+
: {});
|
|
463
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
464
|
+
const iterable = streamResult.stream;
|
|
465
|
+
upstreamIterator = iterable[Symbol.asyncIterator]();
|
|
466
|
+
while (true) {
|
|
467
|
+
if (translationCancelled) {
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
const { value: chunk, done } = await upstreamIterator.next();
|
|
471
|
+
if (done || translationCancelled) {
|
|
472
|
+
break;
|
|
473
|
+
}
|
|
474
|
+
const text = extractText(chunk);
|
|
475
|
+
if (text) {
|
|
476
|
+
collectedText += text;
|
|
477
|
+
for (const frame of serializer.pushDelta(text)) {
|
|
478
|
+
controller.enqueue(encoder.encode(frame));
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
const toolCalls = streamResult.toolCalls ?? [];
|
|
483
|
+
if (!hasTranslatedOutput(collectedText, toolCalls)) {
|
|
484
|
+
finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`;
|
|
485
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
|
|
486
|
+
continue;
|
|
487
|
+
}
|
|
488
|
+
if (!translationCancelled && toolCalls.length) {
|
|
489
|
+
for (const toolCall of toolCalls) {
|
|
490
|
+
const toolName = toolCall.toolName ??
|
|
491
|
+
toolCall.name ??
|
|
492
|
+
"unknown";
|
|
493
|
+
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
|
|
494
|
+
controller.enqueue(encoder.encode(frame));
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
if (!translationCancelled) {
|
|
499
|
+
const reason = streamResult.finishReason ?? "end_turn";
|
|
500
|
+
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
|
|
501
|
+
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
|
|
502
|
+
controller.enqueue(encoder.encode(frame));
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
translatedModel = streamResult.model;
|
|
506
|
+
translationSucceeded = true;
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
catch (streamErr) {
|
|
510
|
+
if (translationCancelled) {
|
|
511
|
+
return;
|
|
512
|
+
}
|
|
513
|
+
finalStreamError =
|
|
514
|
+
streamErr instanceof Error
|
|
515
|
+
? streamErr.message
|
|
516
|
+
: String(streamErr);
|
|
517
|
+
if (collectedText.trim().length > 0) {
|
|
518
|
+
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
|
|
519
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
|
|
520
|
+
controller.enqueue(encoder.encode(errorEvent));
|
|
521
|
+
return;
|
|
522
|
+
}
|
|
523
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
if (!translationCancelled) {
|
|
527
|
+
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
|
|
528
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
|
|
529
|
+
controller.enqueue(encoder.encode(errorEvent));
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
finally {
|
|
533
|
+
if (translationKeepAliveTimer) {
|
|
534
|
+
clearInterval(translationKeepAliveTimer);
|
|
535
|
+
}
|
|
536
|
+
if (!translationCancelled) {
|
|
537
|
+
controller.close();
|
|
538
|
+
}
|
|
539
|
+
if (tracer && translatedModel && translatedModel !== body.model) {
|
|
540
|
+
tracer.setModelSubstitution(body.model, translatedModel);
|
|
541
|
+
}
|
|
542
|
+
if (!translationSucceeded) {
|
|
543
|
+
tracer?.setError("generation_error", finalStreamError.slice(0, 500));
|
|
544
|
+
}
|
|
545
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
546
|
+
}
|
|
547
|
+
},
|
|
548
|
+
cancel() {
|
|
549
|
+
translationCancelled = true;
|
|
550
|
+
if (translationKeepAliveTimer) {
|
|
551
|
+
clearInterval(translationKeepAliveTimer);
|
|
552
|
+
translationKeepAliveTimer = undefined;
|
|
553
|
+
}
|
|
554
|
+
if (upstreamIterator?.return) {
|
|
555
|
+
upstreamIterator.return(undefined).catch((cancelErr) => {
|
|
556
|
+
logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`);
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
},
|
|
560
|
+
});
|
|
561
|
+
return new Response(translationStream, {
|
|
562
|
+
headers: {
|
|
563
|
+
"content-type": "text/event-stream",
|
|
564
|
+
"cache-control": "no-cache",
|
|
565
|
+
connection: "keep-alive",
|
|
566
|
+
},
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
async function handleTranslatedClaudeJsonRequest(args) {
|
|
570
|
+
const { ctx, body, attempts, parsed, tracer, requestStartTime, logProxyBody, } = args;
|
|
571
|
+
let lastAttemptError = "No translation providers succeeded";
|
|
572
|
+
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
|
|
573
|
+
const attempt = attempts[attemptIndex];
|
|
574
|
+
if (attemptIndex > 0) {
|
|
575
|
+
logger.always(`[proxy] fallback → ${attempt.label}`);
|
|
576
|
+
}
|
|
577
|
+
try {
|
|
578
|
+
const options = buildProxyFallbackOptions(parsed, attempt.provider
|
|
579
|
+
? {
|
|
580
|
+
provider: attempt.provider,
|
|
581
|
+
model: attempt.model,
|
|
582
|
+
}
|
|
583
|
+
: {});
|
|
584
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
585
|
+
let collectedText = "";
|
|
586
|
+
for await (const chunk of streamResult.stream) {
|
|
587
|
+
const text = extractText(chunk);
|
|
588
|
+
if (text) {
|
|
589
|
+
collectedText += text;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
|
|
593
|
+
lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`;
|
|
594
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
|
|
595
|
+
continue;
|
|
596
|
+
}
|
|
597
|
+
const internal = {
|
|
598
|
+
content: collectedText,
|
|
599
|
+
model: streamResult.model,
|
|
600
|
+
finishReason: streamResult.finishReason ?? "end_turn",
|
|
601
|
+
reasoning: undefined,
|
|
602
|
+
usage: streamResult.usage
|
|
603
|
+
? extractUsageFromStreamResult(streamResult.usage)
|
|
604
|
+
: undefined,
|
|
605
|
+
toolCalls: streamResult.toolCalls,
|
|
606
|
+
};
|
|
607
|
+
if (tracer && streamResult.model && streamResult.model !== body.model) {
|
|
608
|
+
tracer.setModelSubstitution(body.model, streamResult.model);
|
|
609
|
+
}
|
|
610
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
611
|
+
const clientResponse = serializeClaudeResponse(internal, body.model);
|
|
612
|
+
const clientResponseText = JSON.stringify(clientResponse);
|
|
613
|
+
logProxyBody({
|
|
614
|
+
phase: "client_response",
|
|
615
|
+
headers: { "content-type": "application/json" },
|
|
616
|
+
body: clientResponseText,
|
|
617
|
+
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
|
|
618
|
+
contentType: "application/json",
|
|
619
|
+
responseStatus: 200,
|
|
620
|
+
durationMs: Date.now() - requestStartTime,
|
|
621
|
+
});
|
|
622
|
+
return clientResponse;
|
|
623
|
+
}
|
|
624
|
+
catch (attemptError) {
|
|
625
|
+
lastAttemptError =
|
|
626
|
+
attemptError instanceof Error
|
|
627
|
+
? attemptError.message
|
|
628
|
+
: String(attemptError);
|
|
629
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
throw new Error(lastAttemptError);
|
|
633
|
+
}
|
|
634
|
+
async function handleClaudePassthroughRequest(args) {
|
|
635
|
+
const { ctx, body, clientRequestBody, tracer, requestStartTime, logProxyBody, } = args;
|
|
636
|
+
tracer?.setMode("passthrough-cli");
|
|
637
|
+
const bodyStr = clientRequestBody;
|
|
638
|
+
const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
|
|
639
|
+
const upstreamHeaders = {};
|
|
640
|
+
for (const [key, value] of Object.entries(ctx.headers)) {
|
|
641
|
+
if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) {
|
|
642
|
+
upstreamHeaders[key] = value;
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
if (!upstreamHeaders["content-type"]) {
|
|
646
|
+
upstreamHeaders["content-type"] = "application/json";
|
|
647
|
+
}
|
|
648
|
+
const upstreamSpan = tracer?.startUpstreamAttempt({
|
|
649
|
+
account: "passthrough",
|
|
650
|
+
attempt: 1,
|
|
651
|
+
polyfillHeaders: false,
|
|
652
|
+
polyfillBody: false,
|
|
653
|
+
upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true",
|
|
654
|
+
});
|
|
655
|
+
tracer?.logUpstreamRequestHeaders(upstreamHeaders);
|
|
656
|
+
tracer?.logUpstreamRequestBody(bodyStr);
|
|
657
|
+
logProxyBody({
|
|
658
|
+
phase: "upstream_request",
|
|
659
|
+
headers: upstreamHeaders,
|
|
660
|
+
body: bodyStr,
|
|
661
|
+
bodySize: Buffer.byteLength(bodyStr, "utf8"),
|
|
662
|
+
contentType: upstreamHeaders["content-type"] ?? "application/json",
|
|
663
|
+
account: "passthrough",
|
|
664
|
+
accountType: "passthrough",
|
|
665
|
+
attempt: 1,
|
|
666
|
+
});
|
|
667
|
+
let response;
|
|
668
|
+
try {
|
|
669
|
+
response = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
|
|
670
|
+
method: "POST",
|
|
671
|
+
headers: upstreamHeaders,
|
|
672
|
+
body: bodyStr,
|
|
673
|
+
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
674
|
+
});
|
|
675
|
+
}
|
|
676
|
+
catch (fetchErr) {
|
|
677
|
+
const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
|
|
678
|
+
tracer?.setError("network_error", errMsg);
|
|
679
|
+
upstreamSpan?.end();
|
|
680
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
681
|
+
logRequest({
|
|
682
|
+
timestamp: new Date().toISOString(),
|
|
683
|
+
requestId: ctx.requestId,
|
|
684
|
+
method: ctx.method,
|
|
685
|
+
path: ctx.path,
|
|
686
|
+
model: body.model,
|
|
687
|
+
stream: body.stream ?? false,
|
|
688
|
+
toolCount,
|
|
689
|
+
account: "passthrough",
|
|
690
|
+
accountType: "passthrough",
|
|
691
|
+
responseStatus: 502,
|
|
692
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
693
|
+
errorType: "network_error",
|
|
694
|
+
errorMessage: errMsg,
|
|
695
|
+
});
|
|
696
|
+
const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`);
|
|
697
|
+
const errorBodyText = JSON.stringify(errorBody);
|
|
698
|
+
logProxyBody({
|
|
699
|
+
phase: "client_response",
|
|
700
|
+
headers: { "content-type": "application/json" },
|
|
701
|
+
body: errorBodyText,
|
|
702
|
+
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
|
|
703
|
+
contentType: "application/json",
|
|
704
|
+
account: "passthrough",
|
|
705
|
+
accountType: "passthrough",
|
|
706
|
+
attempt: 1,
|
|
707
|
+
responseStatus: 502,
|
|
708
|
+
durationMs: Date.now() - requestStartTime,
|
|
709
|
+
});
|
|
710
|
+
return errorBody;
|
|
711
|
+
}
|
|
712
|
+
const upstreamResponseHeaders = {};
|
|
713
|
+
response.headers.forEach((value, key) => {
|
|
714
|
+
upstreamResponseHeaders[key] = value;
|
|
715
|
+
});
|
|
716
|
+
tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders);
|
|
717
|
+
if (!response.ok) {
|
|
718
|
+
const errorText = await response.text();
|
|
719
|
+
tracer?.logUpstreamResponseBody(errorText);
|
|
720
|
+
logProxyBody({
|
|
721
|
+
phase: "upstream_response",
|
|
722
|
+
headers: upstreamResponseHeaders,
|
|
723
|
+
body: errorText,
|
|
724
|
+
bodySize: Buffer.byteLength(errorText, "utf8"),
|
|
725
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
726
|
+
account: "passthrough",
|
|
727
|
+
accountType: "passthrough",
|
|
728
|
+
attempt: 1,
|
|
729
|
+
responseStatus: response.status,
|
|
730
|
+
durationMs: Date.now() - requestStartTime,
|
|
731
|
+
});
|
|
732
|
+
logProxyBody({
|
|
733
|
+
phase: "client_response",
|
|
734
|
+
headers: upstreamResponseHeaders,
|
|
735
|
+
body: errorText,
|
|
736
|
+
bodySize: Buffer.byteLength(errorText, "utf8"),
|
|
737
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
738
|
+
account: "passthrough",
|
|
739
|
+
accountType: "passthrough",
|
|
740
|
+
attempt: 1,
|
|
741
|
+
responseStatus: response.status,
|
|
742
|
+
durationMs: Date.now() - requestStartTime,
|
|
743
|
+
});
|
|
744
|
+
upstreamSpan?.end();
|
|
745
|
+
tracer?.setError("api_error", errorText.slice(0, 500));
|
|
746
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
747
|
+
try {
|
|
748
|
+
return JSON.parse(errorText);
|
|
749
|
+
}
|
|
750
|
+
catch {
|
|
751
|
+
return buildClaudeError(response.status, errorText);
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
if (body.stream && response.body) {
|
|
755
|
+
return handleClaudePassthroughStreamResponse({
|
|
756
|
+
ctx,
|
|
757
|
+
body,
|
|
758
|
+
bodyStr,
|
|
759
|
+
response,
|
|
760
|
+
tracer,
|
|
761
|
+
requestStartTime,
|
|
762
|
+
toolCount,
|
|
763
|
+
upstreamSpan,
|
|
764
|
+
upstreamResponseHeaders,
|
|
765
|
+
logProxyBody,
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
return handleClaudePassthroughJsonResponse({
|
|
769
|
+
ctx,
|
|
770
|
+
body,
|
|
771
|
+
bodyStr,
|
|
772
|
+
response,
|
|
773
|
+
tracer,
|
|
774
|
+
requestStartTime,
|
|
775
|
+
toolCount,
|
|
776
|
+
upstreamSpan,
|
|
777
|
+
upstreamResponseHeaders,
|
|
778
|
+
logProxyBody,
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
async function handleClaudePassthroughStreamResponse(args) {
|
|
782
|
+
const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
|
|
783
|
+
const responseHeaders = { ...upstreamResponseHeaders };
|
|
784
|
+
const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
|
|
785
|
+
const responseBody = response.body;
|
|
786
|
+
if (!responseBody) {
|
|
787
|
+
throw new Error("Expected passthrough stream response body");
|
|
788
|
+
}
|
|
789
|
+
let streamSource = responseBody;
|
|
790
|
+
if (tracer) {
|
|
791
|
+
try {
|
|
792
|
+
const { stream: interceptor, telemetry } = createSSEInterceptor({
|
|
793
|
+
captureRawText: true,
|
|
794
|
+
});
|
|
795
|
+
streamSource = streamSource.pipeThrough(interceptor);
|
|
796
|
+
const capturedTracer = tracer;
|
|
797
|
+
const capturedUpstreamSpan = upstreamSpan;
|
|
798
|
+
const capturedResponse = response;
|
|
799
|
+
const capturedRequestBytes = bodyStr.length;
|
|
800
|
+
Promise.all([telemetry, clientCapture])
|
|
801
|
+
.then(([data, clientBody]) => {
|
|
802
|
+
capturedTracer.setUsage({
|
|
803
|
+
inputTokens: data.usage.inputTokens,
|
|
804
|
+
outputTokens: data.usage.outputTokens,
|
|
805
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
806
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
807
|
+
});
|
|
808
|
+
capturedTracer.logStreamEvents(data.events);
|
|
809
|
+
const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
810
|
+
const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
811
|
+
const usageUpdate = {
|
|
812
|
+
inputTokens: data.usage.inputTokens,
|
|
813
|
+
outputTokens: data.usage.outputTokens,
|
|
814
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
815
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
816
|
+
};
|
|
817
|
+
if (!isNaN(rateLimit5h)) {
|
|
818
|
+
usageUpdate.rateLimitAfter5h = rateLimit5h;
|
|
819
|
+
}
|
|
820
|
+
if (!isNaN(rateLimit7d)) {
|
|
821
|
+
usageUpdate.rateLimitAfter7d = rateLimit7d;
|
|
822
|
+
}
|
|
823
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
824
|
+
capturedTracer.setUsage(usageUpdate);
|
|
825
|
+
}
|
|
826
|
+
capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
|
|
827
|
+
capturedTracer.recordMetrics();
|
|
828
|
+
capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
|
|
829
|
+
capturedUpstreamSpan?.end();
|
|
830
|
+
capturedTracer.end(200, Date.now() - requestStartTime);
|
|
831
|
+
const traceCtx = capturedTracer.getTraceContext();
|
|
832
|
+
logRequest({
|
|
833
|
+
timestamp: new Date().toISOString(),
|
|
834
|
+
requestId: ctx.requestId,
|
|
835
|
+
method: ctx.method,
|
|
836
|
+
path: ctx.path,
|
|
837
|
+
model: body.model,
|
|
838
|
+
stream: true,
|
|
839
|
+
toolCount,
|
|
840
|
+
account: "passthrough",
|
|
841
|
+
accountType: "passthrough",
|
|
842
|
+
responseStatus: 200,
|
|
843
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
844
|
+
inputTokens: data.usage.inputTokens,
|
|
845
|
+
outputTokens: data.usage.outputTokens,
|
|
846
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
847
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
848
|
+
traceId: traceCtx.traceId,
|
|
849
|
+
spanId: traceCtx.spanId,
|
|
850
|
+
});
|
|
851
|
+
logProxyBody({
|
|
852
|
+
phase: "upstream_response",
|
|
853
|
+
headers: responseHeaders,
|
|
854
|
+
body: data.rawText ?? "",
|
|
855
|
+
bodySize: data.totalBytesReceived,
|
|
856
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
857
|
+
account: "passthrough",
|
|
858
|
+
accountType: "passthrough",
|
|
859
|
+
attempt: 1,
|
|
860
|
+
responseStatus: 200,
|
|
861
|
+
durationMs: Date.now() - requestStartTime,
|
|
862
|
+
});
|
|
863
|
+
logProxyBody({
|
|
864
|
+
phase: "client_response",
|
|
865
|
+
headers: responseHeaders,
|
|
866
|
+
body: clientBody.text,
|
|
867
|
+
bodySize: clientBody.totalBytes,
|
|
868
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
869
|
+
account: "passthrough",
|
|
870
|
+
accountType: "passthrough",
|
|
871
|
+
attempt: 1,
|
|
872
|
+
responseStatus: 200,
|
|
873
|
+
durationMs: Date.now() - requestStartTime,
|
|
874
|
+
});
|
|
875
|
+
})
|
|
876
|
+
.catch((error) => {
|
|
877
|
+
capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
|
|
878
|
+
capturedUpstreamSpan?.end();
|
|
879
|
+
capturedTracer.end(500, Date.now() - requestStartTime);
|
|
880
|
+
const traceCtx = capturedTracer.getTraceContext();
|
|
881
|
+
logRequest({
|
|
882
|
+
timestamp: new Date().toISOString(),
|
|
883
|
+
requestId: ctx.requestId,
|
|
884
|
+
method: ctx.method,
|
|
885
|
+
path: ctx.path,
|
|
886
|
+
model: body.model,
|
|
887
|
+
stream: true,
|
|
888
|
+
toolCount,
|
|
889
|
+
account: "passthrough",
|
|
890
|
+
accountType: "passthrough",
|
|
891
|
+
responseStatus: 500,
|
|
892
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
893
|
+
errorType: "stream_error",
|
|
894
|
+
errorMessage: error instanceof Error ? error.message : String(error),
|
|
895
|
+
traceId: traceCtx.traceId,
|
|
896
|
+
spanId: traceCtx.spanId,
|
|
897
|
+
});
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
catch {
|
|
901
|
+
// Streaming capture is best-effort.
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
else {
|
|
905
|
+
clientCapture
|
|
906
|
+
.then((clientBody) => {
|
|
907
|
+
logProxyBody({
|
|
908
|
+
phase: "upstream_response",
|
|
909
|
+
headers: responseHeaders,
|
|
910
|
+
body: clientBody.text,
|
|
911
|
+
bodySize: clientBody.totalBytes,
|
|
912
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
913
|
+
account: "passthrough",
|
|
914
|
+
accountType: "passthrough",
|
|
915
|
+
attempt: 1,
|
|
916
|
+
responseStatus: 200,
|
|
917
|
+
durationMs: Date.now() - requestStartTime,
|
|
918
|
+
});
|
|
919
|
+
logProxyBody({
|
|
920
|
+
phase: "client_response",
|
|
921
|
+
headers: responseHeaders,
|
|
922
|
+
body: clientBody.text,
|
|
923
|
+
bodySize: clientBody.totalBytes,
|
|
924
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
925
|
+
account: "passthrough",
|
|
926
|
+
accountType: "passthrough",
|
|
927
|
+
attempt: 1,
|
|
928
|
+
responseStatus: 200,
|
|
929
|
+
durationMs: Date.now() - requestStartTime,
|
|
930
|
+
});
|
|
931
|
+
})
|
|
932
|
+
.catch(() => {
|
|
933
|
+
// Non-fatal
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
const clientStream = streamSource.pipeThrough(clientCaptureStream);
|
|
937
|
+
return new Response(clientStream, {
|
|
938
|
+
status: response.status,
|
|
939
|
+
headers: responseHeaders,
|
|
940
|
+
});
|
|
941
|
+
}
|
|
942
|
+
async function handleClaudePassthroughJsonResponse(args) {
|
|
943
|
+
const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
|
|
944
|
+
const responseText = await response.text();
|
|
945
|
+
tracer?.logUpstreamResponseBody(responseText);
|
|
946
|
+
logProxyBody({
|
|
947
|
+
phase: "upstream_response",
|
|
948
|
+
headers: upstreamResponseHeaders,
|
|
949
|
+
body: responseText,
|
|
950
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
951
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
952
|
+
account: "passthrough",
|
|
953
|
+
accountType: "passthrough",
|
|
954
|
+
attempt: 1,
|
|
955
|
+
responseStatus: response.status,
|
|
956
|
+
durationMs: Date.now() - requestStartTime,
|
|
957
|
+
});
|
|
958
|
+
logProxyBody({
|
|
959
|
+
phase: "client_response",
|
|
960
|
+
headers: upstreamResponseHeaders,
|
|
961
|
+
body: responseText,
|
|
962
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
963
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
964
|
+
account: "passthrough",
|
|
965
|
+
accountType: "passthrough",
|
|
966
|
+
attempt: 1,
|
|
967
|
+
responseStatus: response.status,
|
|
968
|
+
durationMs: Date.now() - requestStartTime,
|
|
969
|
+
});
|
|
970
|
+
const responseJson = JSON.parse(responseText);
|
|
971
|
+
if (tracer && responseJson && typeof responseJson === "object") {
|
|
972
|
+
const usage = responseJson.usage;
|
|
973
|
+
if (usage) {
|
|
974
|
+
tracer.setUsage({
|
|
975
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
976
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
977
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
978
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
979
|
+
});
|
|
980
|
+
const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ??
|
|
981
|
+
"");
|
|
982
|
+
const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ??
|
|
983
|
+
"");
|
|
984
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
985
|
+
const usageWithRates = {
|
|
986
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
987
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
988
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
989
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
990
|
+
};
|
|
991
|
+
if (!isNaN(rateLimit5h)) {
|
|
992
|
+
usageWithRates.rateLimitAfter5h = rateLimit5h;
|
|
993
|
+
}
|
|
994
|
+
if (!isNaN(rateLimit7d)) {
|
|
995
|
+
usageWithRates.rateLimitAfter7d = rateLimit7d;
|
|
996
|
+
}
|
|
997
|
+
tracer.setUsage(usageWithRates);
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
tracer.recordMetrics();
|
|
1001
|
+
const responseJsonStr = JSON.stringify(responseJson);
|
|
1002
|
+
tracer.recordBodySizes(bodyStr.length, responseJsonStr.length);
|
|
1003
|
+
upstreamSpan?.end();
|
|
1004
|
+
tracer.end(response.status, Date.now() - requestStartTime);
|
|
1005
|
+
const traceCtx = tracer.getTraceContext();
|
|
1006
|
+
logRequest({
|
|
1007
|
+
timestamp: new Date().toISOString(),
|
|
1008
|
+
requestId: ctx.requestId,
|
|
1009
|
+
method: ctx.method,
|
|
1010
|
+
path: ctx.path,
|
|
1011
|
+
model: body.model,
|
|
1012
|
+
stream: false,
|
|
1013
|
+
toolCount,
|
|
1014
|
+
account: "passthrough",
|
|
1015
|
+
accountType: "passthrough",
|
|
1016
|
+
responseStatus: response.status,
|
|
1017
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
1018
|
+
inputTokens: usage?.input_tokens,
|
|
1019
|
+
outputTokens: usage?.output_tokens,
|
|
1020
|
+
cacheCreationTokens: usage?.cache_creation_input_tokens,
|
|
1021
|
+
cacheReadTokens: usage?.cache_read_input_tokens,
|
|
1022
|
+
traceId: traceCtx.traceId,
|
|
1023
|
+
spanId: traceCtx.spanId,
|
|
1024
|
+
});
|
|
1025
|
+
}
|
|
1026
|
+
else {
|
|
1027
|
+
upstreamSpan?.end();
|
|
1028
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
1029
|
+
logRequest({
|
|
1030
|
+
timestamp: new Date().toISOString(),
|
|
1031
|
+
requestId: ctx.requestId,
|
|
1032
|
+
method: ctx.method,
|
|
1033
|
+
path: ctx.path,
|
|
1034
|
+
model: body.model,
|
|
1035
|
+
stream: false,
|
|
1036
|
+
toolCount,
|
|
1037
|
+
account: "passthrough",
|
|
1038
|
+
accountType: "passthrough",
|
|
1039
|
+
responseStatus: response.status,
|
|
1040
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
1041
|
+
});
|
|
1042
|
+
}
|
|
1043
|
+
return responseJson;
|
|
1044
|
+
}
|
|
1045
|
+
async function loadClaudeProxyAccounts(args) {
|
|
1046
|
+
const { ctx, body, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, } = args;
|
|
1047
|
+
const fs = await import("fs");
|
|
1048
|
+
const os = await import("os");
|
|
1049
|
+
const accounts = [];
|
|
1050
|
+
const legacyCredPath = `${os.homedir()}/.neurolink/anthropic-credentials.json`;
|
|
1051
|
+
const { tokenStore } = await import("../../auth/tokenStore.js");
|
|
1052
|
+
if (!startupPruneDone) {
|
|
1053
|
+
await tokenStore.pruneExpired();
|
|
1054
|
+
startupPruneDone = true;
|
|
1055
|
+
}
|
|
1056
|
+
const compoundKeys = await tokenStore.listByPrefix("anthropic:");
|
|
1057
|
+
for (const key of compoundKeys) {
|
|
1058
|
+
if (await tokenStore.isDisabled(key)) {
|
|
1059
|
+
const existingState = getOrCreateRuntimeState(key);
|
|
1060
|
+
const tokens = await tokenStore.loadTokens(key);
|
|
1061
|
+
const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== "";
|
|
1062
|
+
const tokenChanged = tokens &&
|
|
1063
|
+
hasTrackedTokens &&
|
|
1064
|
+
(existingState.lastToken !== tokens.accessToken ||
|
|
1065
|
+
existingState.lastRefreshToken !== tokens.refreshToken);
|
|
1066
|
+
if (tokenChanged) {
|
|
1067
|
+
await tokenStore.markEnabled(key);
|
|
1068
|
+
logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials changed)`);
|
|
1069
|
+
existingState.permanentlyDisabled = false;
|
|
1070
|
+
existingState.coolingUntil = undefined;
|
|
1071
|
+
existingState.backoffLevel = 0;
|
|
1072
|
+
existingState.consecutiveRefreshFailures = 0;
|
|
1073
|
+
}
|
|
1074
|
+
else {
|
|
1075
|
+
logger.debug(`[proxy] skipping disabled account=${key.split(":")[1] ?? key}`);
|
|
1076
|
+
existingState.permanentlyDisabled = true;
|
|
1077
|
+
continue;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
const tokens = await tokenStore.loadTokens(key);
|
|
1081
|
+
if (!tokens) {
|
|
1082
|
+
continue;
|
|
1083
|
+
}
|
|
1084
|
+
let accessToken = tokens.accessToken;
|
|
1085
|
+
let refreshTok = tokens.refreshToken;
|
|
1086
|
+
let expiresAt = tokens.expiresAt;
|
|
1087
|
+
const isExpired = expiresAt ? expiresAt < Date.now() : false;
|
|
1088
|
+
if (isExpired) {
|
|
1089
|
+
const label = key.split(":")[1] ?? key;
|
|
1090
|
+
const existingState = getOrCreateRuntimeState(key);
|
|
1091
|
+
if (existingState.permanentlyDisabled) {
|
|
1092
|
+
continue;
|
|
1093
|
+
}
|
|
1094
|
+
if (!refreshTok) {
|
|
1095
|
+
logger.always(`[proxy] skipping account=${label} (expired, no refresh token)`);
|
|
1096
|
+
await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
|
|
1097
|
+
continue;
|
|
1098
|
+
}
|
|
1099
|
+
const tempAccount = {
|
|
1100
|
+
token: accessToken,
|
|
1101
|
+
refreshToken: refreshTok,
|
|
1102
|
+
expiresAt,
|
|
1103
|
+
label,
|
|
1104
|
+
};
|
|
1105
|
+
const refreshed = await refreshToken(tempAccount);
|
|
1106
|
+
if (!refreshed.success) {
|
|
1107
|
+
logger.always(`[proxy] skipping account=${label} (expired, refresh failed: ${refreshed.error?.slice(0, 200) ?? "unknown"})`);
|
|
1108
|
+
await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
|
|
1109
|
+
continue;
|
|
1110
|
+
}
|
|
1111
|
+
accessToken = tempAccount.token;
|
|
1112
|
+
refreshTok = tempAccount.refreshToken;
|
|
1113
|
+
expiresAt = tempAccount.expiresAt;
|
|
1114
|
+
await tokenStore.saveTokens(key, {
|
|
1115
|
+
accessToken,
|
|
1116
|
+
refreshToken: refreshTok,
|
|
1117
|
+
expiresAt: expiresAt ?? Date.now() + 3600_000,
|
|
1118
|
+
tokenType: "Bearer",
|
|
1119
|
+
});
|
|
1120
|
+
logger.always(`[proxy] refreshed expired account=${key.split(":")[1] ?? key} at startup`);
|
|
1121
|
+
}
|
|
1122
|
+
const accountType = tokens.tokenType === "Bearer" ? "oauth" : "api_key";
|
|
1123
|
+
accounts.push({
|
|
1124
|
+
key,
|
|
1125
|
+
label: key.split(":")[1] ?? key,
|
|
1126
|
+
token: accessToken,
|
|
1127
|
+
refreshToken: refreshTok,
|
|
1128
|
+
expiresAt,
|
|
1129
|
+
type: accountType,
|
|
1130
|
+
persistTarget: { providerKey: key },
|
|
1131
|
+
});
|
|
1132
|
+
}
|
|
1133
|
+
if (accounts.length === 0) {
|
|
1134
|
+
try {
|
|
1135
|
+
const creds = JSON.parse(fs.readFileSync(legacyCredPath, "utf8"));
|
|
1136
|
+
const legacyAccount = await tryLoadLegacyAccount(creds, legacyCredPath);
|
|
1137
|
+
if (legacyAccount) {
|
|
1138
|
+
accounts.push(legacyAccount);
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
catch {
|
|
1142
|
+
// file absent or invalid
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
if (process.env.ANTHROPIC_API_KEY && accounts.length === 0) {
|
|
1146
|
+
accounts.push({
|
|
1147
|
+
key: "anthropic:env",
|
|
1148
|
+
label: "env",
|
|
1149
|
+
token: process.env.ANTHROPIC_API_KEY,
|
|
1150
|
+
type: "api_key",
|
|
1151
|
+
});
|
|
1152
|
+
}
|
|
1153
|
+
if (accounts.length === 0) {
|
|
1154
|
+
tracer?.setError("authentication_error", "No Anthropic credentials found");
|
|
1155
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
1156
|
+
return {
|
|
1157
|
+
response: buildLoggedClaudeError(401, "No Anthropic credentials found"),
|
|
1158
|
+
};
|
|
1159
|
+
}
|
|
1160
|
+
for (const account of accounts) {
|
|
1161
|
+
const state = getOrCreateRuntimeState(account.key);
|
|
1162
|
+
const tokenChanged = state.lastToken !== account.token ||
|
|
1163
|
+
state.lastRefreshToken !== account.refreshToken;
|
|
1164
|
+
if (tokenChanged) {
|
|
1165
|
+
if (state.permanentlyDisabled) {
|
|
1166
|
+
logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
|
|
1167
|
+
}
|
|
1168
|
+
state.coolingUntil = undefined;
|
|
1169
|
+
state.backoffLevel = 0;
|
|
1170
|
+
state.consecutiveRefreshFailures = 0;
|
|
1171
|
+
state.permanentlyDisabled = false;
|
|
1172
|
+
}
|
|
1173
|
+
state.lastToken = account.token;
|
|
1174
|
+
state.lastRefreshToken = account.refreshToken;
|
|
1175
|
+
}
|
|
1176
|
+
const enabledAccounts = accounts.filter((account) => {
|
|
1177
|
+
return !getOrCreateRuntimeState(account.key).permanentlyDisabled;
|
|
1178
|
+
});
|
|
1179
|
+
if (enabledAccounts.length === 0) {
|
|
1180
|
+
const reauthMsg = formatReauthMessage(accounts.map((account) => account.label));
|
|
1181
|
+
tracer?.setError("authentication_error", reauthMsg);
|
|
1182
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
1183
|
+
return { response: buildLoggedClaudeError(401, reauthMsg) };
|
|
1184
|
+
}
|
|
1185
|
+
const orderedAccounts = [...enabledAccounts];
|
|
1186
|
+
if (accountStrategy === "round-robin" &&
|
|
1187
|
+
orderedAccounts.length !== lastKnownAccountCount) {
|
|
1188
|
+
primaryAccountIndex = 0;
|
|
1189
|
+
lastKnownAccountCount = orderedAccounts.length;
|
|
1190
|
+
}
|
|
1191
|
+
if (orderedAccounts.length > 1) {
|
|
1192
|
+
const idx = primaryAccountIndex % orderedAccounts.length;
|
|
1193
|
+
if (accountStrategy === "round-robin") {
|
|
1194
|
+
primaryAccountIndex = (primaryAccountIndex + 1) % orderedAccounts.length;
|
|
1195
|
+
}
|
|
1196
|
+
if (idx > 0) {
|
|
1197
|
+
const head = orderedAccounts.splice(0, idx);
|
|
1198
|
+
orderedAccounts.push(...head);
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
const normalizedAnthropicBody = normalizeClaudeRequestForAnthropic(body);
|
|
1202
|
+
const bodyStr = JSON.stringify(normalizedAnthropicBody);
|
|
1203
|
+
const requestStart = Date.now();
|
|
1204
|
+
const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
|
|
1205
|
+
const url = "https://api.anthropic.com/v1/messages?beta=true";
|
|
1206
|
+
const clientHeaders = ctx.headers ?? {};
|
|
1207
|
+
const clientSnapshotBody = extractSnapshotBody(body);
|
|
1208
|
+
return {
|
|
1209
|
+
accounts,
|
|
1210
|
+
enabledAccounts,
|
|
1211
|
+
orderedAccounts,
|
|
1212
|
+
bodyStr,
|
|
1213
|
+
requestStart,
|
|
1214
|
+
toolCount,
|
|
1215
|
+
url,
|
|
1216
|
+
clientHeaders,
|
|
1217
|
+
isClaudeClientRequest: isLikelyClaudeClient(clientHeaders, clientSnapshotBody),
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
async function executeClaudeFallbackTranslation(args) {
|
|
1221
|
+
const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest, options, providerLabel, } = args;
|
|
1222
|
+
if (body.stream) {
|
|
1223
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
1224
|
+
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
1225
|
+
async function* sseGenerator() {
|
|
1226
|
+
for (const frame of serializer.start()) {
|
|
1227
|
+
yield frame;
|
|
1228
|
+
}
|
|
1229
|
+
let collectedText = "";
|
|
1230
|
+
for await (const chunk of streamResult.stream) {
|
|
1231
|
+
const text = extractText(chunk);
|
|
1232
|
+
if (text) {
|
|
1233
|
+
collectedText += text;
|
|
1234
|
+
for (const frame of serializer.pushDelta(text)) {
|
|
1235
|
+
yield frame;
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
const toolCalls = streamResult.toolCalls ?? [];
|
|
1240
|
+
if (!hasTranslatedOutput(collectedText, toolCalls)) {
|
|
1241
|
+
throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
|
|
1242
|
+
}
|
|
1243
|
+
if (toolCalls.length) {
|
|
1244
|
+
for (const toolCall of toolCalls) {
|
|
1245
|
+
const toolName = toolCall.toolName ??
|
|
1246
|
+
toolCall.name ??
|
|
1247
|
+
"unknown";
|
|
1248
|
+
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
|
|
1249
|
+
yield frame;
|
|
1250
|
+
}
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
const reason = streamResult.finishReason ?? "end_turn";
|
|
1254
|
+
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
|
|
1255
|
+
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
|
|
1256
|
+
yield frame;
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
1260
|
+
recordFinalSuccess();
|
|
1261
|
+
logFinalRequest(200, "", providerLabel);
|
|
1262
|
+
return sseGenerator();
|
|
1263
|
+
}
|
|
1264
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
1265
|
+
let collectedText = "";
|
|
1266
|
+
for await (const chunk of streamResult.stream) {
|
|
1267
|
+
const text = extractText(chunk);
|
|
1268
|
+
if (text) {
|
|
1269
|
+
collectedText += text;
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
|
|
1273
|
+
throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
|
|
1274
|
+
}
|
|
1275
|
+
const internal = {
|
|
1276
|
+
content: collectedText,
|
|
1277
|
+
model: streamResult.model,
|
|
1278
|
+
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1279
|
+
reasoning: undefined,
|
|
1280
|
+
usage: streamResult.usage
|
|
1281
|
+
? extractUsageFromStreamResult(streamResult.usage)
|
|
1282
|
+
: undefined,
|
|
1283
|
+
toolCalls: streamResult.toolCalls,
|
|
1284
|
+
};
|
|
1285
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
1286
|
+
recordFinalSuccess();
|
|
1287
|
+
const clientResponse = serializeClaudeResponse(internal, body.model);
|
|
1288
|
+
logFinalRequest(200, "", providerLabel, undefined, undefined, {
|
|
1289
|
+
inputTokens: internal.usage?.input,
|
|
1290
|
+
outputTokens: internal.usage?.output,
|
|
1291
|
+
});
|
|
1292
|
+
const clientResponseText = JSON.stringify(clientResponse);
|
|
1293
|
+
logProxyBody({
|
|
1294
|
+
phase: "client_response",
|
|
1295
|
+
headers: { "content-type": "application/json" },
|
|
1296
|
+
body: clientResponseText,
|
|
1297
|
+
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
|
|
1298
|
+
contentType: "application/json",
|
|
1299
|
+
responseStatus: 200,
|
|
1300
|
+
durationMs: Date.now() - requestStartTime,
|
|
1301
|
+
});
|
|
1302
|
+
return clientResponse;
|
|
1303
|
+
}
|
|
1304
|
+
async function tryConfiguredClaudeFallbackChain(args) {
|
|
1305
|
+
const { ctx, body, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
|
|
1306
|
+
const parsedFallbackRequest = parseClaudeRequest(body);
|
|
1307
|
+
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1308
|
+
for (const fallback of chain) {
|
|
1309
|
+
if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsedFallbackRequest)) {
|
|
1310
|
+
logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: incompatible with request shape`);
|
|
1311
|
+
continue;
|
|
1312
|
+
}
|
|
1313
|
+
const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
|
|
1314
|
+
if (!availability.available) {
|
|
1315
|
+
logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: ${availability.reason ?? "provider unavailable"}`);
|
|
1316
|
+
continue;
|
|
1317
|
+
}
|
|
1318
|
+
try {
|
|
1319
|
+
logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
|
|
1320
|
+
const options = buildProxyFallbackOptions(parsedFallbackRequest, {
|
|
1321
|
+
provider: fallback.provider,
|
|
1322
|
+
model: fallback.model,
|
|
1323
|
+
});
|
|
1324
|
+
return await executeClaudeFallbackTranslation({
|
|
1325
|
+
ctx,
|
|
1326
|
+
body,
|
|
1327
|
+
tracer,
|
|
1328
|
+
requestStartTime,
|
|
1329
|
+
logProxyBody,
|
|
1330
|
+
logFinalRequest,
|
|
1331
|
+
options: options,
|
|
1332
|
+
providerLabel: fallback.provider,
|
|
1333
|
+
});
|
|
1334
|
+
}
|
|
1335
|
+
catch (fallbackErr) {
|
|
1336
|
+
logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
return null;
|
|
1340
|
+
}
|
|
1341
|
+
async function tryAutoClaudeFallback(args) {
|
|
1342
|
+
const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
|
|
1343
|
+
try {
|
|
1344
|
+
logger.always("[proxy] fallback → auto-provider");
|
|
1345
|
+
const parsed = parseClaudeRequest(body);
|
|
1346
|
+
const options = buildProxyFallbackOptions(parsed);
|
|
1347
|
+
return await executeClaudeFallbackTranslation({
|
|
1348
|
+
ctx,
|
|
1349
|
+
body,
|
|
1350
|
+
tracer,
|
|
1351
|
+
requestStartTime,
|
|
1352
|
+
logProxyBody,
|
|
1353
|
+
logFinalRequest,
|
|
1354
|
+
options: options,
|
|
1355
|
+
providerLabel: "auto-provider",
|
|
1356
|
+
});
|
|
1357
|
+
}
|
|
1358
|
+
catch (fallbackErr) {
|
|
1359
|
+
logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1360
|
+
return null;
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
1363
|
+
function buildClaudeAnthropicFailureResponse(args) {
|
|
1364
|
+
const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
1365
|
+
if (authFailureMessage && !sawRateLimit) {
|
|
1366
|
+
tracer?.setError("authentication_error", authFailureMessage);
|
|
1367
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
1368
|
+
return buildLoggedClaudeError(401, authFailureMessage);
|
|
1369
|
+
}
|
|
1370
|
+
if (invalidRequestFailure) {
|
|
1371
|
+
tracer?.setError("invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
|
|
1372
|
+
tracer?.end(invalidRequestFailure.status, Date.now() - requestStartTime);
|
|
1373
|
+
recordFinalError(invalidRequestFailure.status);
|
|
1374
|
+
try {
|
|
1375
|
+
const parsedError = JSON.parse(invalidRequestFailure.body);
|
|
1376
|
+
logFinalRequest(invalidRequestFailure.status, "", "final", "invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
|
|
1377
|
+
logProxyBody({
|
|
1378
|
+
phase: "client_response",
|
|
1379
|
+
headers: {
|
|
1380
|
+
"content-type": invalidRequestFailure.contentType ?? "application/json",
|
|
1381
|
+
},
|
|
1382
|
+
body: invalidRequestFailure.body,
|
|
1383
|
+
bodySize: Buffer.byteLength(invalidRequestFailure.body, "utf8"),
|
|
1384
|
+
contentType: invalidRequestFailure.contentType ?? "application/json",
|
|
1385
|
+
responseStatus: invalidRequestFailure.status,
|
|
1386
|
+
durationMs: Date.now() - requestStartTime,
|
|
1387
|
+
});
|
|
1388
|
+
return parsedError;
|
|
1389
|
+
}
|
|
1390
|
+
catch {
|
|
1391
|
+
return buildLoggedClaudeError(invalidRequestFailure.status, summarizeErrorMessage(invalidRequestFailure.body), "invalid_request_error");
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
|
|
1395
|
+
const msg = `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error
|
|
1396
|
+
? lastError.message
|
|
1397
|
+
: String(lastError ?? "unknown")}`;
|
|
1398
|
+
tracer?.setError("transient_error", msg.slice(0, 500));
|
|
1399
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
1400
|
+
return buildLoggedClaudeError(502, msg);
|
|
1401
|
+
}
|
|
1402
|
+
if (!sawRateLimit) {
|
|
1403
|
+
const msg = `All Anthropic accounts failed. Last error: ${lastError instanceof Error
|
|
1404
|
+
? lastError.message
|
|
1405
|
+
: String(lastError ?? "unknown")}`;
|
|
1406
|
+
tracer?.setError("all_accounts_failed", msg.slice(0, 500));
|
|
1407
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
1408
|
+
return buildLoggedClaudeError(502, msg);
|
|
1409
|
+
}
|
|
1410
|
+
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
1411
|
+
const coolingUntil = getOrCreateRuntimeState(account.key).coolingUntil;
|
|
1412
|
+
return coolingUntil ? Math.min(min, coolingUntil) : min;
|
|
1413
|
+
}, Infinity);
|
|
1414
|
+
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1415
|
+
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1416
|
+
: 60;
|
|
1417
|
+
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1418
|
+
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
|
|
1419
|
+
tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
|
|
1420
|
+
tracer?.end(429, Date.now() - requestStartTime);
|
|
1421
|
+
recordFinalError(429);
|
|
1422
|
+
logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
|
|
1423
|
+
const errorBodyText = JSON.stringify(errorBody);
|
|
1424
|
+
logProxyBody({
|
|
1425
|
+
phase: "client_response",
|
|
1426
|
+
headers: {
|
|
1427
|
+
"content-type": "application/json",
|
|
1428
|
+
"retry-after": String(retryAfterSec),
|
|
1429
|
+
},
|
|
1430
|
+
body: errorBodyText,
|
|
1431
|
+
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
|
|
1432
|
+
contentType: "application/json",
|
|
1433
|
+
responseStatus: 429,
|
|
1434
|
+
durationMs: Date.now() - requestStartTime,
|
|
1435
|
+
});
|
|
1436
|
+
return new Response(errorBodyText, {
|
|
1437
|
+
status: 429,
|
|
1438
|
+
headers: {
|
|
1439
|
+
"content-type": "application/json",
|
|
1440
|
+
"retry-after": String(retryAfterSec),
|
|
1441
|
+
},
|
|
1442
|
+
});
|
|
1443
|
+
}
|
|
1444
|
+
async function handleAnthropicSuccessfulResponse(args) {
|
|
1445
|
+
const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1446
|
+
accountState.backoffLevel = 0;
|
|
1447
|
+
accountState.coolingUntil = undefined;
|
|
1448
|
+
accountState.consecutiveRefreshFailures = 0;
|
|
1449
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1450
|
+
const quota = parseQuotaHeaders(response.headers);
|
|
1451
|
+
if (quota) {
|
|
1452
|
+
saveAccountQuota(account.label, quota).catch(() => {
|
|
1453
|
+
// Non-fatal: quota persistence is best-effort
|
|
1454
|
+
});
|
|
1455
|
+
}
|
|
1456
|
+
const responseHeaders = {};
|
|
1457
|
+
response.headers.forEach((value, key) => {
|
|
1458
|
+
responseHeaders[key] = value;
|
|
1459
|
+
});
|
|
1460
|
+
tracer?.logUpstreamResponseHeaders(responseHeaders);
|
|
1461
|
+
if (body.stream) {
|
|
1462
|
+
return handleAnthropicStreamingSuccessResponse({
|
|
1463
|
+
ctx,
|
|
1464
|
+
body,
|
|
1465
|
+
account,
|
|
1466
|
+
accountState,
|
|
1467
|
+
response,
|
|
1468
|
+
responseHeaders,
|
|
1469
|
+
tracer,
|
|
1470
|
+
requestStartTime,
|
|
1471
|
+
fetchStartMs,
|
|
1472
|
+
attemptNumber,
|
|
1473
|
+
finalBodyStr,
|
|
1474
|
+
upstreamSpan,
|
|
1475
|
+
logProxyBody,
|
|
1476
|
+
logFinalRequest,
|
|
1477
|
+
});
|
|
1478
|
+
}
|
|
1479
|
+
return handleAnthropicJsonSuccessResponse({
|
|
1480
|
+
account,
|
|
1481
|
+
response,
|
|
1482
|
+
responseHeaders,
|
|
1483
|
+
tracer,
|
|
1484
|
+
requestStartTime,
|
|
1485
|
+
fetchStartMs,
|
|
1486
|
+
attemptNumber,
|
|
1487
|
+
finalBodyStr,
|
|
1488
|
+
upstreamSpan,
|
|
1489
|
+
logProxyBody,
|
|
1490
|
+
logFinalRequest,
|
|
1491
|
+
});
|
|
1492
|
+
}
|
|
1493
|
+
async function handleAnthropicStreamingSuccessResponse(args) {
|
|
1494
|
+
const { ctx, body, account, accountState, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1495
|
+
if (!response.body) {
|
|
1496
|
+
upstreamSpan?.end();
|
|
1497
|
+
tracer?.setError("stream_error", "No response body from upstream");
|
|
1498
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
1499
|
+
recordFinalError(502, account.label, account.type);
|
|
1500
|
+
logFinalRequest(502, account.label, account.type, "stream_error", "No response body from upstream");
|
|
1501
|
+
const clientError = buildClaudeError(502, "No response body from upstream");
|
|
1502
|
+
const clientErrorBody = JSON.stringify(clientError);
|
|
1503
|
+
logProxyBody({
|
|
1504
|
+
phase: "client_response",
|
|
1505
|
+
headers: { "content-type": "application/json" },
|
|
1506
|
+
body: clientErrorBody,
|
|
1507
|
+
bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
|
|
1508
|
+
contentType: "application/json",
|
|
1509
|
+
account: account.label,
|
|
1510
|
+
accountType: account.type,
|
|
1511
|
+
attempt: attemptNumber,
|
|
1512
|
+
responseStatus: 502,
|
|
1513
|
+
durationMs: Date.now() - requestStartTime,
|
|
1514
|
+
});
|
|
1515
|
+
return { response: clientError };
|
|
1516
|
+
}
|
|
1517
|
+
const reader = response.body.getReader();
|
|
1518
|
+
const firstChunk = await reader.read();
|
|
1519
|
+
if (firstChunk.done || !firstChunk.value || firstChunk.value.length === 0) {
|
|
1520
|
+
reader.cancel();
|
|
1521
|
+
accountState.coolingUntil = Date.now() + 10_000;
|
|
1522
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
1523
|
+
logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
|
|
1524
|
+
tracer?.recordRetry(account.label, "empty_stream");
|
|
1525
|
+
upstreamSpan?.end();
|
|
1526
|
+
return { retryNextAccount: true };
|
|
1527
|
+
}
|
|
1528
|
+
let mainStreamClosed = false;
|
|
1529
|
+
const remainingStream = new ReadableStream({
|
|
1530
|
+
start(controller) {
|
|
1531
|
+
controller.enqueue(firstChunk.value);
|
|
1532
|
+
},
|
|
1533
|
+
async pull(controller) {
|
|
1534
|
+
if (mainStreamClosed) {
|
|
1535
|
+
return;
|
|
1536
|
+
}
|
|
1537
|
+
try {
|
|
1538
|
+
const { done, value } = await reader.read();
|
|
1539
|
+
if (mainStreamClosed) {
|
|
1540
|
+
return;
|
|
1541
|
+
}
|
|
1542
|
+
if (done) {
|
|
1543
|
+
mainStreamClosed = true;
|
|
1544
|
+
controller.close();
|
|
1545
|
+
return;
|
|
1546
|
+
}
|
|
1547
|
+
controller.enqueue(value);
|
|
1548
|
+
}
|
|
1549
|
+
catch (streamErr) {
|
|
1550
|
+
const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
|
|
1551
|
+
logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
|
|
1552
|
+
logStreamError({
|
|
1553
|
+
timestamp: new Date().toISOString(),
|
|
1554
|
+
requestId: ctx.requestId,
|
|
1555
|
+
account: account.label,
|
|
1556
|
+
model: body.model,
|
|
1557
|
+
errorMessage: errMsg,
|
|
1558
|
+
durationMs: Date.now() - fetchStartMs,
|
|
1559
|
+
});
|
|
1560
|
+
if (!mainStreamClosed) {
|
|
1561
|
+
mainStreamClosed = true;
|
|
1562
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
1563
|
+
controller.enqueue(new TextEncoder().encode(errorEvent));
|
|
1564
|
+
controller.close();
|
|
1565
|
+
}
|
|
1566
|
+
}
|
|
1567
|
+
},
|
|
1568
|
+
cancel() {
|
|
1569
|
+
mainStreamClosed = true;
|
|
1570
|
+
reader.cancel();
|
|
1571
|
+
},
|
|
1572
|
+
});
|
|
1573
|
+
const result = attachAnthropicSuccessStreamTelemetry({
|
|
1574
|
+
account,
|
|
1575
|
+
response,
|
|
1576
|
+
responseHeaders,
|
|
1577
|
+
remainingStream,
|
|
1578
|
+
tracer,
|
|
1579
|
+
requestStartTime,
|
|
1580
|
+
attemptNumber,
|
|
1581
|
+
finalBodyStr,
|
|
1582
|
+
upstreamSpan,
|
|
1583
|
+
logProxyBody,
|
|
1584
|
+
logFinalRequest,
|
|
1585
|
+
});
|
|
1586
|
+
return { response: result };
|
|
1587
|
+
}
|
|
1588
|
+
function attachAnthropicSuccessStreamTelemetry(args) {
|
|
1589
|
+
const { account, response, responseHeaders, remainingStream, tracer, requestStartTime, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1590
|
+
const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
|
|
1591
|
+
let streamSource = remainingStream;
|
|
1592
|
+
if (tracer) {
|
|
1593
|
+
try {
|
|
1594
|
+
const { stream: interceptor, telemetry } = createSSEInterceptor({
|
|
1595
|
+
captureRawText: true,
|
|
1596
|
+
});
|
|
1597
|
+
streamSource = streamSource.pipeThrough(interceptor);
|
|
1598
|
+
const capturedTracer = tracer;
|
|
1599
|
+
const capturedUpstreamSpan = upstreamSpan;
|
|
1600
|
+
const capturedResponse = response;
|
|
1601
|
+
const capturedRequestBytes = finalBodyStr.length;
|
|
1602
|
+
const capturedAccountLabel = account.label;
|
|
1603
|
+
Promise.all([telemetry, clientCapture])
|
|
1604
|
+
.then(([data, clientBody]) => {
|
|
1605
|
+
capturedTracer.setUsage({
|
|
1606
|
+
inputTokens: data.usage.inputTokens,
|
|
1607
|
+
outputTokens: data.usage.outputTokens,
|
|
1608
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1609
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1610
|
+
});
|
|
1611
|
+
capturedTracer.logStreamEvents(data.events);
|
|
1612
|
+
const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
1613
|
+
const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
1614
|
+
const usageUpdate = {
|
|
1615
|
+
inputTokens: data.usage.inputTokens,
|
|
1616
|
+
outputTokens: data.usage.outputTokens,
|
|
1617
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1618
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1619
|
+
};
|
|
1620
|
+
if (!isNaN(rateLimit5h)) {
|
|
1621
|
+
usageUpdate.rateLimitAfter5h = rateLimit5h;
|
|
1622
|
+
}
|
|
1623
|
+
if (!isNaN(rateLimit7d)) {
|
|
1624
|
+
usageUpdate.rateLimitAfter7d = rateLimit7d;
|
|
1625
|
+
}
|
|
1626
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
1627
|
+
capturedTracer.setUsage(usageUpdate);
|
|
1628
|
+
}
|
|
1629
|
+
capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
|
|
1630
|
+
capturedTracer.recordMetrics();
|
|
1631
|
+
capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
|
|
1632
|
+
capturedUpstreamSpan?.end();
|
|
1633
|
+
capturedTracer.end(200, Date.now() - requestStartTime);
|
|
1634
|
+
recordFinalSuccess(capturedAccountLabel, account.type);
|
|
1635
|
+
logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
|
|
1636
|
+
inputTokens: data.usage.inputTokens,
|
|
1637
|
+
outputTokens: data.usage.outputTokens,
|
|
1638
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1639
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1640
|
+
});
|
|
1641
|
+
logProxyBody({
|
|
1642
|
+
phase: "upstream_response",
|
|
1643
|
+
headers: responseHeaders,
|
|
1644
|
+
body: data.rawText ?? "",
|
|
1645
|
+
bodySize: data.totalBytesReceived,
|
|
1646
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1647
|
+
account: capturedAccountLabel,
|
|
1648
|
+
accountType: account.type,
|
|
1649
|
+
attempt: attemptNumber,
|
|
1650
|
+
responseStatus: 200,
|
|
1651
|
+
durationMs: Date.now() - requestStartTime,
|
|
1652
|
+
});
|
|
1653
|
+
logProxyBody({
|
|
1654
|
+
phase: "client_response",
|
|
1655
|
+
headers: responseHeaders,
|
|
1656
|
+
body: clientBody.text,
|
|
1657
|
+
bodySize: clientBody.totalBytes,
|
|
1658
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1659
|
+
account: capturedAccountLabel,
|
|
1660
|
+
accountType: account.type,
|
|
1661
|
+
attempt: attemptNumber,
|
|
1662
|
+
responseStatus: 200,
|
|
1663
|
+
durationMs: Date.now() - requestStartTime,
|
|
1664
|
+
});
|
|
1665
|
+
})
|
|
1666
|
+
.catch((error) => {
|
|
1667
|
+
capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
|
|
1668
|
+
capturedUpstreamSpan?.end();
|
|
1669
|
+
capturedTracer.end(500, Date.now() - requestStartTime);
|
|
1670
|
+
recordFinalError(500, capturedAccountLabel, account.type);
|
|
1671
|
+
logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", error instanceof Error ? error.message : String(error));
|
|
1672
|
+
});
|
|
1673
|
+
}
|
|
1674
|
+
catch {
|
|
1675
|
+
// Interceptor attachment failed after stream setup; response handling continues.
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
else {
|
|
1679
|
+
upstreamSpan?.end();
|
|
1680
|
+
try {
|
|
1681
|
+
const { stream: noTracerInterceptor, telemetry: noTracerTelemetry } = createSSEInterceptor({
|
|
1682
|
+
captureRawText: true,
|
|
1683
|
+
});
|
|
1684
|
+
streamSource = streamSource.pipeThrough(noTracerInterceptor);
|
|
1685
|
+
const capturedAccountLabel = account.label;
|
|
1686
|
+
Promise.all([noTracerTelemetry, clientCapture])
|
|
1687
|
+
.then(([data, clientBody]) => {
|
|
1688
|
+
recordFinalSuccess(capturedAccountLabel, account.type);
|
|
1689
|
+
logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
|
|
1690
|
+
inputTokens: data.usage.inputTokens,
|
|
1691
|
+
outputTokens: data.usage.outputTokens,
|
|
1692
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1693
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1694
|
+
});
|
|
1695
|
+
logProxyBody({
|
|
1696
|
+
phase: "upstream_response",
|
|
1697
|
+
headers: responseHeaders,
|
|
1698
|
+
body: data.rawText ?? "",
|
|
1699
|
+
bodySize: data.totalBytesReceived,
|
|
1700
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1701
|
+
account: capturedAccountLabel,
|
|
1702
|
+
accountType: account.type,
|
|
1703
|
+
attempt: attemptNumber,
|
|
1704
|
+
responseStatus: 200,
|
|
1705
|
+
durationMs: Date.now() - requestStartTime,
|
|
1706
|
+
});
|
|
1707
|
+
logProxyBody({
|
|
1708
|
+
phase: "client_response",
|
|
1709
|
+
headers: responseHeaders,
|
|
1710
|
+
body: clientBody.text,
|
|
1711
|
+
bodySize: clientBody.totalBytes,
|
|
1712
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1713
|
+
account: capturedAccountLabel,
|
|
1714
|
+
accountType: account.type,
|
|
1715
|
+
attempt: attemptNumber,
|
|
1716
|
+
responseStatus: 200,
|
|
1717
|
+
durationMs: Date.now() - requestStartTime,
|
|
1718
|
+
});
|
|
1719
|
+
})
|
|
1720
|
+
.catch(() => {
|
|
1721
|
+
recordFinalSuccess(account.label, account.type);
|
|
1722
|
+
logFinalRequest(response.status, account.label, account.type);
|
|
1723
|
+
});
|
|
1724
|
+
}
|
|
1725
|
+
catch {
|
|
1726
|
+
clientCapture
|
|
1727
|
+
.then((clientBody) => {
|
|
1728
|
+
logProxyBody({
|
|
1729
|
+
phase: "client_response",
|
|
1730
|
+
headers: responseHeaders,
|
|
1731
|
+
body: clientBody.text,
|
|
1732
|
+
bodySize: clientBody.totalBytes,
|
|
1733
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1734
|
+
account: account.label,
|
|
1735
|
+
accountType: account.type,
|
|
1736
|
+
attempt: attemptNumber,
|
|
1737
|
+
responseStatus: 200,
|
|
1738
|
+
durationMs: Date.now() - requestStartTime,
|
|
1739
|
+
});
|
|
1740
|
+
})
|
|
1741
|
+
.catch(() => {
|
|
1742
|
+
// Non-fatal
|
|
1743
|
+
});
|
|
1744
|
+
recordFinalSuccess(account.label, account.type);
|
|
1745
|
+
logFinalRequest(response.status, account.label, account.type);
|
|
1746
|
+
}
|
|
1747
|
+
}
|
|
1748
|
+
const clientStream = streamSource.pipeThrough(clientCaptureStream);
|
|
1749
|
+
const clientResponseHeaders = {
|
|
1750
|
+
"content-type": "text/event-stream",
|
|
1751
|
+
"cache-control": "no-cache",
|
|
1752
|
+
connection: "keep-alive",
|
|
1753
|
+
};
|
|
1754
|
+
for (const headerName of [
|
|
1755
|
+
"retry-after",
|
|
1756
|
+
"anthropic-ratelimit-requests-remaining",
|
|
1757
|
+
"anthropic-ratelimit-requests-limit",
|
|
1758
|
+
"anthropic-ratelimit-tokens-remaining",
|
|
1759
|
+
"anthropic-ratelimit-tokens-limit",
|
|
1760
|
+
]) {
|
|
1761
|
+
const value = response.headers.get(headerName);
|
|
1762
|
+
if (value) {
|
|
1763
|
+
clientResponseHeaders[headerName] = value;
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
return new Response(clientStream, {
|
|
1767
|
+
status: response.status,
|
|
1768
|
+
headers: clientResponseHeaders,
|
|
1769
|
+
});
|
|
1770
|
+
}
|
|
1771
|
+
async function handleAnthropicJsonSuccessResponse(args) {
|
|
1772
|
+
const { account, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1773
|
+
const responseText = await response.text();
|
|
1774
|
+
tracer?.logUpstreamResponseBody(responseText);
|
|
1775
|
+
logProxyBody({
|
|
1776
|
+
phase: "upstream_response",
|
|
1777
|
+
headers: responseHeaders,
|
|
1778
|
+
body: responseText,
|
|
1779
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
1780
|
+
contentType: responseHeaders["content-type"] ?? "application/json",
|
|
1781
|
+
account: account.label,
|
|
1782
|
+
accountType: account.type,
|
|
1783
|
+
attempt: attemptNumber,
|
|
1784
|
+
responseStatus: response.status,
|
|
1785
|
+
durationMs: Date.now() - fetchStartMs,
|
|
1786
|
+
});
|
|
1787
|
+
logProxyBody({
|
|
1788
|
+
phase: "client_response",
|
|
1789
|
+
headers: responseHeaders,
|
|
1790
|
+
body: responseText,
|
|
1791
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
1792
|
+
contentType: responseHeaders["content-type"] ?? "application/json",
|
|
1793
|
+
account: account.label,
|
|
1794
|
+
accountType: account.type,
|
|
1795
|
+
attempt: attemptNumber,
|
|
1796
|
+
responseStatus: response.status,
|
|
1797
|
+
durationMs: Date.now() - requestStartTime,
|
|
1798
|
+
});
|
|
1799
|
+
const responseJson = JSON.parse(responseText);
|
|
1800
|
+
if (tracer && responseJson && typeof responseJson === "object") {
|
|
1801
|
+
const usage = responseJson.usage;
|
|
1802
|
+
if (usage) {
|
|
1803
|
+
tracer.setUsage({
|
|
1804
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
1805
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
1806
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
1807
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
1808
|
+
});
|
|
1809
|
+
const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ??
|
|
1810
|
+
"");
|
|
1811
|
+
const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ??
|
|
1812
|
+
"");
|
|
1813
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
1814
|
+
const usageWithRates = {
|
|
1815
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
1816
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
1817
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
1818
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
1819
|
+
};
|
|
1820
|
+
if (!isNaN(rateLimit5h)) {
|
|
1821
|
+
usageWithRates.rateLimitAfter5h = rateLimit5h;
|
|
1822
|
+
}
|
|
1823
|
+
if (!isNaN(rateLimit7d)) {
|
|
1824
|
+
usageWithRates.rateLimitAfter7d = rateLimit7d;
|
|
1825
|
+
}
|
|
1826
|
+
tracer.setUsage(usageWithRates);
|
|
1827
|
+
}
|
|
1828
|
+
}
|
|
1829
|
+
tracer.recordMetrics();
|
|
1830
|
+
const responseJsonStr = JSON.stringify(responseJson);
|
|
1831
|
+
tracer.recordBodySizes(finalBodyStr.length, responseJsonStr.length);
|
|
1832
|
+
upstreamSpan?.end();
|
|
1833
|
+
tracer.end(response.status, Date.now() - requestStartTime);
|
|
1834
|
+
recordFinalSuccess(account.label, account.type);
|
|
1835
|
+
logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
|
|
1836
|
+
inputTokens: usage?.input_tokens,
|
|
1837
|
+
outputTokens: usage?.output_tokens,
|
|
1838
|
+
cacheCreationTokens: usage?.cache_creation_input_tokens,
|
|
1839
|
+
cacheReadTokens: usage?.cache_read_input_tokens,
|
|
1840
|
+
});
|
|
1841
|
+
}
|
|
1842
|
+
else {
|
|
1843
|
+
upstreamSpan?.end();
|
|
1844
|
+
const noTracerUsage = responseJson && typeof responseJson === "object"
|
|
1845
|
+
? responseJson.usage
|
|
1846
|
+
: undefined;
|
|
1847
|
+
recordFinalSuccess(account.label, account.type);
|
|
1848
|
+
logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
|
|
1849
|
+
inputTokens: noTracerUsage?.input_tokens,
|
|
1850
|
+
outputTokens: noTracerUsage?.output_tokens,
|
|
1851
|
+
cacheCreationTokens: noTracerUsage?.cache_creation_input_tokens,
|
|
1852
|
+
cacheReadTokens: noTracerUsage?.cache_read_input_tokens,
|
|
1853
|
+
});
|
|
1854
|
+
}
|
|
1855
|
+
return { response: responseJson };
|
|
1856
|
+
}
|
|
1857
|
+
async function handleAnthropicSuccessfulRetryResponse(args) {
|
|
1858
|
+
const { ctx, body, account, retryResp, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
|
|
1859
|
+
const retryQuota = parseQuotaHeaders(retryResp.headers);
|
|
1860
|
+
if (retryQuota) {
|
|
1861
|
+
saveAccountQuota(account.label, retryQuota).catch((error) => {
|
|
1862
|
+
logger.debug("[proxy] Failed to persist account quota after auth retry", {
|
|
1863
|
+
account: account.label,
|
|
1864
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1865
|
+
});
|
|
1866
|
+
});
|
|
1867
|
+
}
|
|
1868
|
+
if (body.stream && retryResp.body) {
|
|
1869
|
+
const retryReader = retryResp.body.getReader();
|
|
1870
|
+
let retryStreamClosed = false;
|
|
1871
|
+
const retryStream = new ReadableStream({
|
|
1872
|
+
async pull(controller) {
|
|
1873
|
+
if (retryStreamClosed) {
|
|
1874
|
+
return;
|
|
1875
|
+
}
|
|
1876
|
+
try {
|
|
1877
|
+
const { done, value } = await retryReader.read();
|
|
1878
|
+
if (retryStreamClosed) {
|
|
1879
|
+
return;
|
|
1880
|
+
}
|
|
1881
|
+
if (done) {
|
|
1882
|
+
retryStreamClosed = true;
|
|
1883
|
+
controller.close();
|
|
1884
|
+
return;
|
|
1885
|
+
}
|
|
1886
|
+
controller.enqueue(value);
|
|
1887
|
+
}
|
|
1888
|
+
catch (streamErr) {
|
|
1889
|
+
const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
|
|
1890
|
+
logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
|
|
1891
|
+
logStreamError({
|
|
1892
|
+
timestamp: new Date().toISOString(),
|
|
1893
|
+
requestId: ctx.requestId,
|
|
1894
|
+
account: account.label,
|
|
1895
|
+
model: body.model,
|
|
1896
|
+
errorMessage: errMsg,
|
|
1897
|
+
durationMs: Date.now() - fetchStartMs,
|
|
1898
|
+
});
|
|
1899
|
+
if (!retryStreamClosed) {
|
|
1900
|
+
retryStreamClosed = true;
|
|
1901
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
1902
|
+
controller.enqueue(new TextEncoder().encode(errorEvent));
|
|
1903
|
+
controller.close();
|
|
1904
|
+
}
|
|
1905
|
+
}
|
|
1906
|
+
},
|
|
1907
|
+
cancel() {
|
|
1908
|
+
retryStreamClosed = true;
|
|
1909
|
+
retryReader.cancel();
|
|
1910
|
+
},
|
|
1911
|
+
});
|
|
1912
|
+
let retryClientStream = retryStream;
|
|
1913
|
+
if (tracer) {
|
|
1914
|
+
try {
|
|
1915
|
+
const { stream: retryInterceptor, telemetry: retryTelemetry } = createSSEInterceptor();
|
|
1916
|
+
retryClientStream = retryStream.pipeThrough(retryInterceptor);
|
|
1917
|
+
const capturedTracer = tracer;
|
|
1918
|
+
const capturedUpstreamSpan = upstreamSpan;
|
|
1919
|
+
const capturedRetryResp = retryResp;
|
|
1920
|
+
const capturedRetryRequestBytes = finalBodyStr.length;
|
|
1921
|
+
const capturedAccountLabel = account.label;
|
|
1922
|
+
retryTelemetry
|
|
1923
|
+
.then((data) => {
|
|
1924
|
+
capturedTracer.setUsage({
|
|
1925
|
+
inputTokens: data.usage.inputTokens,
|
|
1926
|
+
outputTokens: data.usage.outputTokens,
|
|
1927
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1928
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1929
|
+
});
|
|
1930
|
+
capturedTracer.logStreamEvents(data.events);
|
|
1931
|
+
capturedTracer.logUpstreamResponseHeaders(Object.fromEntries([...capturedRetryResp.headers.entries()]));
|
|
1932
|
+
capturedTracer.recordMetrics();
|
|
1933
|
+
capturedTracer.recordBodySizes(capturedRetryRequestBytes, data.totalBytesReceived);
|
|
1934
|
+
capturedUpstreamSpan?.end();
|
|
1935
|
+
capturedTracer.end(200, Date.now() - requestStartTime);
|
|
1936
|
+
recordFinalSuccess(capturedAccountLabel, account.type);
|
|
1937
|
+
logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
|
|
1938
|
+
inputTokens: data.usage.inputTokens,
|
|
1939
|
+
outputTokens: data.usage.outputTokens,
|
|
1940
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1941
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1942
|
+
});
|
|
1943
|
+
})
|
|
1944
|
+
.catch((error) => {
|
|
1945
|
+
capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
|
|
1946
|
+
capturedUpstreamSpan?.end();
|
|
1947
|
+
capturedTracer.end(500, Date.now() - requestStartTime);
|
|
1948
|
+
recordFinalError(500, capturedAccountLabel, account.type);
|
|
1949
|
+
logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", error instanceof Error ? error.message : String(error));
|
|
1950
|
+
});
|
|
1951
|
+
}
|
|
1952
|
+
catch {
|
|
1953
|
+
retryClientStream = retryStream;
|
|
1954
|
+
}
|
|
1955
|
+
}
|
|
1956
|
+
const responseHeaders = {
|
|
1957
|
+
"content-type": "text/event-stream",
|
|
1958
|
+
"cache-control": "no-cache",
|
|
1959
|
+
connection: "keep-alive",
|
|
1960
|
+
};
|
|
1961
|
+
for (const headerName of [
|
|
1962
|
+
"retry-after",
|
|
1963
|
+
"anthropic-ratelimit-requests-remaining",
|
|
1964
|
+
"anthropic-ratelimit-requests-limit",
|
|
1965
|
+
"anthropic-ratelimit-tokens-remaining",
|
|
1966
|
+
"anthropic-ratelimit-tokens-limit",
|
|
1967
|
+
]) {
|
|
1968
|
+
const value = retryResp.headers.get(headerName);
|
|
1969
|
+
if (value) {
|
|
1970
|
+
responseHeaders[headerName] = value;
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
return new Response(retryClientStream, {
|
|
1974
|
+
status: retryResp.status,
|
|
1975
|
+
headers: responseHeaders,
|
|
1976
|
+
});
|
|
1977
|
+
}
|
|
1978
|
+
const retryRespHeaders = Object.fromEntries([...retryResp.headers.entries()]);
|
|
1979
|
+
const retryText = await retryResp.text();
|
|
1980
|
+
tracer?.logUpstreamResponseHeaders(retryRespHeaders);
|
|
1981
|
+
tracer?.logUpstreamResponseBody(retryText);
|
|
1982
|
+
logProxyBody({
|
|
1983
|
+
phase: "upstream_response",
|
|
1984
|
+
headers: retryRespHeaders,
|
|
1985
|
+
body: retryText,
|
|
1986
|
+
bodySize: Buffer.byteLength(retryText, "utf8"),
|
|
1987
|
+
contentType: retryRespHeaders["content-type"] ?? "application/json",
|
|
1988
|
+
account: account.label,
|
|
1989
|
+
accountType: account.type,
|
|
1990
|
+
attempt: attemptNumber,
|
|
1991
|
+
responseStatus: retryResp.status,
|
|
1992
|
+
durationMs: Date.now() - fetchStartMs,
|
|
1993
|
+
});
|
|
1994
|
+
logProxyBody({
|
|
1995
|
+
phase: "client_response",
|
|
1996
|
+
headers: retryRespHeaders,
|
|
1997
|
+
body: retryText,
|
|
1998
|
+
bodySize: Buffer.byteLength(retryText, "utf8"),
|
|
1999
|
+
contentType: retryRespHeaders["content-type"] ?? "application/json",
|
|
2000
|
+
account: account.label,
|
|
2001
|
+
accountType: account.type,
|
|
2002
|
+
attempt: attemptNumber,
|
|
2003
|
+
responseStatus: retryResp.status,
|
|
2004
|
+
durationMs: Date.now() - requestStartTime,
|
|
2005
|
+
});
|
|
2006
|
+
const retryJson = JSON.parse(retryText);
|
|
2007
|
+
if (tracer && retryJson && typeof retryJson === "object") {
|
|
2008
|
+
const retryUsage = retryJson.usage;
|
|
2009
|
+
if (retryUsage) {
|
|
2010
|
+
tracer.setUsage({
|
|
2011
|
+
inputTokens: retryUsage.input_tokens ?? 0,
|
|
2012
|
+
outputTokens: retryUsage.output_tokens ?? 0,
|
|
2013
|
+
cacheCreationTokens: retryUsage.cache_creation_input_tokens ?? 0,
|
|
2014
|
+
cacheReadTokens: retryUsage.cache_read_input_tokens ?? 0,
|
|
2015
|
+
});
|
|
2016
|
+
}
|
|
2017
|
+
tracer.recordMetrics();
|
|
2018
|
+
const retryJsonStr = JSON.stringify(retryJson);
|
|
2019
|
+
tracer.recordBodySizes(finalBodyStr.length, retryJsonStr.length);
|
|
2020
|
+
upstreamSpan?.end();
|
|
2021
|
+
tracer.end(retryResp.status, Date.now() - requestStartTime);
|
|
2022
|
+
recordFinalSuccess(account.label, account.type);
|
|
2023
|
+
logFinalRequest(retryResp.status, account.label, account.type, undefined, undefined, {
|
|
2024
|
+
inputTokens: retryUsage?.input_tokens,
|
|
2025
|
+
outputTokens: retryUsage?.output_tokens,
|
|
2026
|
+
cacheCreationTokens: retryUsage?.cache_creation_input_tokens,
|
|
2027
|
+
cacheReadTokens: retryUsage?.cache_read_input_tokens,
|
|
2028
|
+
});
|
|
2029
|
+
}
|
|
2030
|
+
else {
|
|
2031
|
+
upstreamSpan?.end();
|
|
2032
|
+
recordFinalSuccess(account.label, account.type);
|
|
2033
|
+
logFinalRequest(retryResp.status, account.label, account.type);
|
|
2034
|
+
}
|
|
2035
|
+
return retryJson;
|
|
2036
|
+
}
|
|
2037
|
+
async function handleAnthropicAuthRetry(args) {
|
|
2038
|
+
const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
|
|
2039
|
+
recordAttemptError(account.label, account.type, 401);
|
|
2040
|
+
let currentLastError = lastError;
|
|
2041
|
+
let currentAuthFailureMessage = authFailureMessage;
|
|
2042
|
+
let currentSawRateLimit = sawRateLimit;
|
|
2043
|
+
let currentSawTransientFailure = sawTransientFailure;
|
|
2044
|
+
let currentSawNetworkError = sawNetworkError;
|
|
2045
|
+
let currentUpstreamSpan = upstreamSpan;
|
|
2046
|
+
let authRetrySucceeded = false;
|
|
2047
|
+
let authRetryError = "received 401 from Anthropic";
|
|
2048
|
+
for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
|
|
2049
|
+
logger.always(`[proxy] ← 401 account=${account.label} refreshing (attempt ${authRetry + 1}/${MAX_AUTH_RETRIES})`);
|
|
2050
|
+
const refreshSucceeded = await refreshToken(account);
|
|
2051
|
+
if (!refreshSucceeded.success) {
|
|
2052
|
+
accountState.consecutiveRefreshFailures += 1;
|
|
2053
|
+
authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
|
|
2054
|
+
currentLastError = authRetryError;
|
|
2055
|
+
logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
|
|
2056
|
+
if (accountState.consecutiveRefreshFailures >=
|
|
2057
|
+
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
2058
|
+
await disableAccountUntilReauth(account, accountState);
|
|
2059
|
+
currentAuthFailureMessage = formatReauthMessage(account.label);
|
|
2060
|
+
break;
|
|
2061
|
+
}
|
|
2062
|
+
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
2063
|
+
await sleep(2000);
|
|
2064
|
+
}
|
|
2065
|
+
continue;
|
|
2066
|
+
}
|
|
2067
|
+
if (account.persistTarget) {
|
|
2068
|
+
await persistTokens(account.persistTarget, account);
|
|
2069
|
+
}
|
|
2070
|
+
headers.authorization = `Bearer ${account.token}`;
|
|
2071
|
+
try {
|
|
2072
|
+
const retryResp = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
|
|
2073
|
+
method: "POST",
|
|
2074
|
+
headers,
|
|
2075
|
+
body: buildUpstreamBody(account.token).bodyStr,
|
|
2076
|
+
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
2077
|
+
});
|
|
2078
|
+
if (retryResp.ok) {
|
|
2079
|
+
authRetrySucceeded = true;
|
|
2080
|
+
accountState.consecutiveRefreshFailures = 0;
|
|
2081
|
+
accountState.backoffLevel = 0;
|
|
2082
|
+
accountState.coolingUntil = undefined;
|
|
2083
|
+
logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
|
|
2084
|
+
const successResponse = await handleAnthropicSuccessfulRetryResponse({
|
|
2085
|
+
ctx,
|
|
2086
|
+
body,
|
|
2087
|
+
account,
|
|
2088
|
+
retryResp,
|
|
2089
|
+
tracer,
|
|
2090
|
+
requestStartTime,
|
|
2091
|
+
fetchStartMs,
|
|
2092
|
+
attemptNumber,
|
|
2093
|
+
finalBodyStr,
|
|
2094
|
+
upstreamSpan: currentUpstreamSpan,
|
|
2095
|
+
logProxyBody,
|
|
2096
|
+
logFinalRequest,
|
|
2097
|
+
});
|
|
2098
|
+
return {
|
|
2099
|
+
response: successResponse,
|
|
2100
|
+
continueLoop: false,
|
|
2101
|
+
lastError: currentLastError,
|
|
2102
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2103
|
+
sawRateLimit: currentSawRateLimit,
|
|
2104
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2105
|
+
sawNetworkError: currentSawNetworkError,
|
|
2106
|
+
upstreamSpan: undefined,
|
|
2107
|
+
};
|
|
2108
|
+
}
|
|
2109
|
+
const retryStatus = retryResp.status;
|
|
2110
|
+
const retryBody = await retryResp.text();
|
|
2111
|
+
authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
|
|
2112
|
+
currentLastError = retryBody;
|
|
2113
|
+
logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
|
|
2114
|
+
recordAttemptError(account.label, account.type, retryStatus);
|
|
2115
|
+
if (retryStatus === 429) {
|
|
2116
|
+
currentSawRateLimit = true;
|
|
2117
|
+
const retryAfter = retryResp.headers.get("retry-after");
|
|
2118
|
+
const parsedRetryAfter = parseInt(retryAfter ?? "", 10);
|
|
2119
|
+
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
2120
|
+
? 60_000
|
|
2121
|
+
: Math.max(1, parsedRetryAfter) * 1000;
|
|
2122
|
+
accountState.coolingUntil = Date.now() + cooldownMs;
|
|
2123
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2124
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2125
|
+
break;
|
|
2126
|
+
}
|
|
2127
|
+
if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
|
|
2128
|
+
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
2129
|
+
await sleep(1000);
|
|
2130
|
+
}
|
|
2131
|
+
continue;
|
|
2132
|
+
}
|
|
2133
|
+
if (isTransientHttpFailure(retryStatus, retryBody)) {
|
|
2134
|
+
currentSawTransientFailure = true;
|
|
2135
|
+
break;
|
|
2136
|
+
}
|
|
2137
|
+
logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
|
|
2138
|
+
recordFinalError(retryStatus, account.label, account.type);
|
|
2139
|
+
try {
|
|
2140
|
+
logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
|
|
2141
|
+
return {
|
|
2142
|
+
response: JSON.parse(retryBody),
|
|
2143
|
+
continueLoop: false,
|
|
2144
|
+
lastError: currentLastError,
|
|
2145
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2146
|
+
sawRateLimit: currentSawRateLimit,
|
|
2147
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2148
|
+
sawNetworkError: currentSawNetworkError,
|
|
2149
|
+
upstreamSpan: currentUpstreamSpan,
|
|
2150
|
+
};
|
|
2151
|
+
}
|
|
2152
|
+
catch {
|
|
2153
|
+
logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
|
|
2154
|
+
return {
|
|
2155
|
+
response: buildClaudeError(retryStatus, retryBody),
|
|
2156
|
+
continueLoop: false,
|
|
2157
|
+
lastError: currentLastError,
|
|
2158
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2159
|
+
sawRateLimit: currentSawRateLimit,
|
|
2160
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2161
|
+
sawNetworkError: currentSawNetworkError,
|
|
2162
|
+
upstreamSpan: currentUpstreamSpan,
|
|
2163
|
+
};
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
catch (retryFetchErr) {
|
|
2167
|
+
currentSawNetworkError = true;
|
|
2168
|
+
recordAttemptError(account.label, account.type, 502);
|
|
2169
|
+
const message = retryFetchErr instanceof Error
|
|
2170
|
+
? retryFetchErr.message
|
|
2171
|
+
: String(retryFetchErr);
|
|
2172
|
+
authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
|
|
2173
|
+
currentLastError = authRetryError;
|
|
2174
|
+
logger.debug(`[proxy] ${authRetryError}`);
|
|
2175
|
+
break;
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
if (!authRetrySucceeded) {
|
|
2179
|
+
if (!accountState.permanentlyDisabled) {
|
|
2180
|
+
if (!accountState.coolingUntil ||
|
|
2181
|
+
accountState.coolingUntil <= Date.now()) {
|
|
2182
|
+
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
2183
|
+
}
|
|
2184
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2185
|
+
}
|
|
2186
|
+
currentLastError = authRetryError;
|
|
2187
|
+
logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
|
|
2188
|
+
logAttempt(401, "authentication_error", authRetryError);
|
|
2189
|
+
tracer?.setError("authentication_error", authRetryError);
|
|
2190
|
+
tracer?.recordRetry(account.label, "auth_exhausted");
|
|
2191
|
+
currentUpstreamSpan?.end();
|
|
2192
|
+
currentUpstreamSpan = undefined;
|
|
2193
|
+
}
|
|
2194
|
+
return {
|
|
2195
|
+
continueLoop: true,
|
|
2196
|
+
lastError: currentLastError,
|
|
2197
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2198
|
+
sawRateLimit: currentSawRateLimit,
|
|
2199
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2200
|
+
sawNetworkError: currentSawNetworkError,
|
|
2201
|
+
upstreamSpan: currentUpstreamSpan,
|
|
2202
|
+
};
|
|
2203
|
+
}
|
|
2204
|
+
function buildAnthropicTerminalErrorResponse(args) {
|
|
2205
|
+
const { responseStatus, account, errBody, errRespHeaders, requestStartTime, attemptNumber, logProxyBody, logFinalRequest, errorType, } = args;
|
|
2206
|
+
try {
|
|
2207
|
+
const parsedError = JSON.parse(errBody);
|
|
2208
|
+
logFinalRequest(responseStatus, account.label, account.type, errorType, summarizeErrorMessage(errBody));
|
|
2209
|
+
logProxyBody({
|
|
2210
|
+
phase: "client_response",
|
|
2211
|
+
headers: {
|
|
2212
|
+
"content-type": errRespHeaders["content-type"] ?? "application/json",
|
|
2213
|
+
},
|
|
2214
|
+
body: errBody,
|
|
2215
|
+
bodySize: Buffer.byteLength(errBody, "utf8"),
|
|
2216
|
+
contentType: errRespHeaders["content-type"] ?? "application/json",
|
|
2217
|
+
account: account.label,
|
|
2218
|
+
accountType: account.type,
|
|
2219
|
+
attempt: attemptNumber,
|
|
2220
|
+
responseStatus,
|
|
2221
|
+
durationMs: Date.now() - requestStartTime,
|
|
2222
|
+
});
|
|
2223
|
+
return parsedError;
|
|
2224
|
+
}
|
|
2225
|
+
catch {
|
|
2226
|
+
logFinalRequest(responseStatus, account.label, account.type, errorType, summarizeErrorMessage(errBody));
|
|
2227
|
+
const clientError = buildClaudeError(responseStatus, errBody);
|
|
2228
|
+
const clientErrorBody = JSON.stringify(clientError);
|
|
2229
|
+
logProxyBody({
|
|
2230
|
+
phase: "client_response",
|
|
2231
|
+
headers: { "content-type": "application/json" },
|
|
2232
|
+
body: clientErrorBody,
|
|
2233
|
+
bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
|
|
2234
|
+
contentType: "application/json",
|
|
2235
|
+
account: account.label,
|
|
2236
|
+
accountType: account.type,
|
|
2237
|
+
attempt: attemptNumber,
|
|
2238
|
+
responseStatus,
|
|
2239
|
+
durationMs: Date.now() - requestStartTime,
|
|
2240
|
+
});
|
|
2241
|
+
return clientError;
|
|
2242
|
+
}
|
|
2243
|
+
}
|
|
2244
|
+
async function handleAnthropicNonOkResponse(args) {
|
|
2245
|
+
const { response, account, accountState, tracer, requestStartTime, fetchStartMs, attemptNumber, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawTransientFailure, invalidRequestFailure, maxConsecutiveRefreshFailures, } = args;
|
|
2246
|
+
let currentLastError = lastError;
|
|
2247
|
+
let currentAuthFailureMessage = authFailureMessage;
|
|
2248
|
+
let currentSawTransientFailure = sawTransientFailure;
|
|
2249
|
+
let currentInvalidRequestFailure = invalidRequestFailure;
|
|
2250
|
+
const errBody = await response.text();
|
|
2251
|
+
const errRespHeaders = {};
|
|
2252
|
+
response.headers.forEach((value, key) => {
|
|
2253
|
+
errRespHeaders[key] = value;
|
|
2254
|
+
});
|
|
2255
|
+
tracer?.logUpstreamResponseHeaders(errRespHeaders);
|
|
2256
|
+
tracer?.logUpstreamResponseBody(errBody);
|
|
2257
|
+
logProxyBody({
|
|
2258
|
+
phase: "upstream_response",
|
|
2259
|
+
headers: errRespHeaders,
|
|
2260
|
+
body: errBody,
|
|
2261
|
+
bodySize: Buffer.byteLength(errBody, "utf8"),
|
|
2262
|
+
contentType: errRespHeaders["content-type"] ?? "application/json",
|
|
2263
|
+
account: account.label,
|
|
2264
|
+
accountType: account.type,
|
|
2265
|
+
attempt: attemptNumber,
|
|
2266
|
+
responseStatus: response.status,
|
|
2267
|
+
durationMs: Date.now() - fetchStartMs,
|
|
2268
|
+
});
|
|
2269
|
+
if (isInvalidRequestError(response.status, errBody)) {
|
|
2270
|
+
logger.always(`[proxy] ← ${response.status} upstream invalid_request_error`);
|
|
2271
|
+
logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
|
|
2272
|
+
tracer?.setError("invalid_request_error", summarizeErrorMessage(errBody));
|
|
2273
|
+
currentInvalidRequestFailure = {
|
|
2274
|
+
status: response.status,
|
|
2275
|
+
body: errBody,
|
|
2276
|
+
contentType: errRespHeaders["content-type"],
|
|
2277
|
+
};
|
|
2278
|
+
currentLastError = summarizeErrorMessage(errBody);
|
|
2279
|
+
return {
|
|
2280
|
+
continueLoop: false,
|
|
2281
|
+
lastError: currentLastError,
|
|
2282
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2283
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2284
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2285
|
+
upstreamSpan: undefined,
|
|
2286
|
+
};
|
|
2287
|
+
}
|
|
2288
|
+
if ((response.status === 401 ||
|
|
2289
|
+
response.status === 402 ||
|
|
2290
|
+
response.status === 403) &&
|
|
2291
|
+
account.type === "oauth" &&
|
|
2292
|
+
!account.refreshToken) {
|
|
2293
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
2294
|
+
accountState.consecutiveRefreshFailures += 1;
|
|
2295
|
+
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
2296
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2297
|
+
if (accountState.consecutiveRefreshFailures >= maxConsecutiveRefreshFailures) {
|
|
2298
|
+
await disableAccountUntilReauth(account, accountState);
|
|
2299
|
+
}
|
|
2300
|
+
currentAuthFailureMessage = formatReauthMessage(account.label);
|
|
2301
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
2302
|
+
currentLastError = errBody;
|
|
2303
|
+
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
2304
|
+
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
2305
|
+
tracer?.recordRetry(account.label, "auth_no_refresh");
|
|
2306
|
+
return {
|
|
2307
|
+
continueLoop: true,
|
|
2308
|
+
lastError: currentLastError,
|
|
2309
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2310
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2311
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2312
|
+
upstreamSpan: undefined,
|
|
2313
|
+
};
|
|
2314
|
+
}
|
|
2315
|
+
if ((response.status === 401 ||
|
|
2316
|
+
response.status === 402 ||
|
|
2317
|
+
response.status === 403) &&
|
|
2318
|
+
account.type === "api_key") {
|
|
2319
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
2320
|
+
currentAuthFailureMessage =
|
|
2321
|
+
"Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
|
|
2322
|
+
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
2323
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2324
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
2325
|
+
currentLastError = errBody;
|
|
2326
|
+
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
2327
|
+
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
2328
|
+
tracer?.recordRetry(account.label, "auth_api_key");
|
|
2329
|
+
return {
|
|
2330
|
+
continueLoop: true,
|
|
2331
|
+
lastError: currentLastError,
|
|
2332
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2333
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2334
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2335
|
+
upstreamSpan: undefined,
|
|
2336
|
+
};
|
|
2337
|
+
}
|
|
2338
|
+
if (response.status === 404) {
|
|
2339
|
+
recordFinalError(response.status, account.label, account.type);
|
|
2340
|
+
logger.always(`[proxy] ← 404 account=${account.label}`);
|
|
2341
|
+
logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
|
|
2342
|
+
tracer?.setError("not_found_error", summarizeErrorMessage(errBody));
|
|
2343
|
+
tracer?.end(404, Date.now() - requestStartTime);
|
|
2344
|
+
return {
|
|
2345
|
+
response: buildAnthropicTerminalErrorResponse({
|
|
2346
|
+
responseStatus: 404,
|
|
2347
|
+
account,
|
|
2348
|
+
errBody,
|
|
2349
|
+
errRespHeaders,
|
|
2350
|
+
requestStartTime,
|
|
2351
|
+
attemptNumber,
|
|
2352
|
+
logProxyBody,
|
|
2353
|
+
logFinalRequest,
|
|
2354
|
+
errorType: "not_found_error",
|
|
2355
|
+
}),
|
|
2356
|
+
continueLoop: false,
|
|
2357
|
+
lastError: currentLastError,
|
|
2358
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2359
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2360
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2361
|
+
upstreamSpan: undefined,
|
|
2362
|
+
};
|
|
2363
|
+
}
|
|
2364
|
+
if (isTransientHttpFailure(response.status, errBody)) {
|
|
2365
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
2366
|
+
currentSawTransientFailure = true;
|
|
2367
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
|
|
2368
|
+
currentLastError = errBody;
|
|
2369
|
+
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
2370
|
+
tracer?.setError("transient_error", summarizeErrorMessage(errBody));
|
|
2371
|
+
tracer?.recordRetry(account.label, "transient");
|
|
2372
|
+
return {
|
|
2373
|
+
continueLoop: true,
|
|
2374
|
+
lastError: currentLastError,
|
|
2375
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2376
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2377
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2378
|
+
upstreamSpan: undefined,
|
|
2379
|
+
};
|
|
2380
|
+
}
|
|
2381
|
+
recordFinalError(response.status, account.label, account.type);
|
|
2382
|
+
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
2383
|
+
logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
|
|
2384
|
+
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
2385
|
+
tracer?.setError("api_error", summarizeErrorMessage(errBody));
|
|
2386
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
2387
|
+
return {
|
|
2388
|
+
response: buildAnthropicTerminalErrorResponse({
|
|
2389
|
+
responseStatus: response.status,
|
|
2390
|
+
account,
|
|
2391
|
+
errBody,
|
|
2392
|
+
errRespHeaders,
|
|
2393
|
+
requestStartTime,
|
|
2394
|
+
attemptNumber,
|
|
2395
|
+
logProxyBody,
|
|
2396
|
+
logFinalRequest,
|
|
2397
|
+
errorType: "api_error",
|
|
2398
|
+
}),
|
|
2399
|
+
continueLoop: false,
|
|
2400
|
+
lastError: currentLastError,
|
|
2401
|
+
authFailureMessage: currentAuthFailureMessage,
|
|
2402
|
+
sawTransientFailure: currentSawTransientFailure,
|
|
2403
|
+
invalidRequestFailure: currentInvalidRequestFailure,
|
|
2404
|
+
upstreamSpan: undefined,
|
|
2405
|
+
};
|
|
2406
|
+
}
|
|
2407
|
+
function createClaudeRequestRuntimeContext(args) {
|
|
2408
|
+
const { ctx, body, clientRequestBody } = args;
|
|
2409
|
+
let tracer;
|
|
2410
|
+
try {
|
|
2411
|
+
tracer = ProxyTracer.startRequest({
|
|
2412
|
+
requestId: ctx.requestId,
|
|
2413
|
+
method: ctx.method,
|
|
2414
|
+
path: ctx.path,
|
|
2415
|
+
model: body.model,
|
|
2416
|
+
stream: body.stream ?? false,
|
|
2417
|
+
toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
|
|
2418
|
+
sessionId: ctx.headers["x-neurolink-session-id"] ??
|
|
2419
|
+
ctx.headers["x-claude-code-session-id"] ??
|
|
2420
|
+
undefined,
|
|
2421
|
+
userAgent: ctx.headers["user-agent"] ?? undefined,
|
|
2422
|
+
}, ctx.headers);
|
|
2423
|
+
const receiveSpan = tracer.startReceive();
|
|
2424
|
+
tracer.logRequestHeaders(ctx.headers);
|
|
2425
|
+
tracer.logRequestBody(clientRequestBody);
|
|
2426
|
+
receiveSpan.end();
|
|
2427
|
+
}
|
|
2428
|
+
catch {
|
|
2429
|
+
tracer = undefined;
|
|
2430
|
+
}
|
|
2431
|
+
const requestStartTime = Date.now();
|
|
2432
|
+
const logProxyBody = (capture) => {
|
|
2433
|
+
const traceCtx = tracer?.getTraceContext();
|
|
2434
|
+
void logBodyCapture({
|
|
2435
|
+
timestamp: new Date().toISOString(),
|
|
2436
|
+
requestId: ctx.requestId,
|
|
2437
|
+
model: body.model,
|
|
2438
|
+
stream: body.stream ?? false,
|
|
2439
|
+
...capture,
|
|
2440
|
+
...(traceCtx
|
|
2441
|
+
? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
|
|
2442
|
+
: {}),
|
|
2443
|
+
});
|
|
2444
|
+
};
|
|
2445
|
+
const logFinalRequest = (status, accountLabel, accountType, errorType, errorMessage, extra) => {
|
|
2446
|
+
const traceCtx = tracer?.getTraceContext();
|
|
2447
|
+
logRequest({
|
|
2448
|
+
timestamp: new Date().toISOString(),
|
|
2449
|
+
requestId: ctx.requestId,
|
|
2450
|
+
method: ctx.method,
|
|
2451
|
+
path: ctx.path,
|
|
2452
|
+
model: body.model,
|
|
2453
|
+
stream: !!body.stream,
|
|
2454
|
+
toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
|
|
2455
|
+
account: accountLabel,
|
|
2456
|
+
accountType,
|
|
2457
|
+
responseStatus: status,
|
|
2458
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
2459
|
+
...(errorType ? { errorType } : {}),
|
|
2460
|
+
...(errorMessage ? { errorMessage } : {}),
|
|
2461
|
+
...(extra?.inputTokens !== undefined
|
|
2462
|
+
? { inputTokens: extra.inputTokens }
|
|
2463
|
+
: {}),
|
|
2464
|
+
...(extra?.outputTokens !== undefined
|
|
2465
|
+
? { outputTokens: extra.outputTokens }
|
|
2466
|
+
: {}),
|
|
2467
|
+
...(extra?.cacheCreationTokens !== undefined
|
|
2468
|
+
? { cacheCreationTokens: extra.cacheCreationTokens }
|
|
2469
|
+
: {}),
|
|
2470
|
+
...(extra?.cacheReadTokens !== undefined
|
|
2471
|
+
? { cacheReadTokens: extra.cacheReadTokens }
|
|
2472
|
+
: {}),
|
|
2473
|
+
...(traceCtx
|
|
2474
|
+
? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
|
|
2475
|
+
: {}),
|
|
2476
|
+
});
|
|
2477
|
+
};
|
|
2478
|
+
const buildLoggedClaudeError = (status, message, errorType, extra) => {
|
|
2479
|
+
const errorBody = buildClaudeError(status, message, errorType);
|
|
2480
|
+
const errorBodyText = JSON.stringify(errorBody);
|
|
2481
|
+
recordFinalError(status, extra?.account, extra?.accountType);
|
|
2482
|
+
logFinalRequest(status, extra?.account ?? "", extra?.accountType ?? "final", errorType, message);
|
|
2483
|
+
logProxyBody({
|
|
2484
|
+
phase: "client_response",
|
|
2485
|
+
headers: { "content-type": "application/json" },
|
|
2486
|
+
body: errorBodyText,
|
|
2487
|
+
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
|
|
2488
|
+
contentType: "application/json",
|
|
2489
|
+
responseStatus: status,
|
|
2490
|
+
durationMs: Date.now() - requestStartTime,
|
|
2491
|
+
...extra,
|
|
2492
|
+
});
|
|
2493
|
+
return errorBody;
|
|
2494
|
+
};
|
|
2495
|
+
logProxyBody({
|
|
2496
|
+
phase: "client_request",
|
|
2497
|
+
headers: ctx.headers,
|
|
2498
|
+
body: clientRequestBody,
|
|
2499
|
+
bodySize: Buffer.byteLength(clientRequestBody, "utf8"),
|
|
2500
|
+
contentType: ctx.headers["content-type"] ?? "application/json",
|
|
2501
|
+
});
|
|
2502
|
+
return {
|
|
2503
|
+
tracer,
|
|
2504
|
+
requestStartTime,
|
|
2505
|
+
logProxyBody,
|
|
2506
|
+
logFinalRequest,
|
|
2507
|
+
buildLoggedClaudeError,
|
|
2508
|
+
};
|
|
2509
|
+
}
|
|
2510
|
+
function createAnthropicAttemptLogger(args) {
|
|
2511
|
+
const { ctx, body, toolCount, requestStart, tracer, account, attemptNumber } = args;
|
|
2512
|
+
return (status, errorType, errorMessage, extra) => {
|
|
2513
|
+
const traceCtx = tracer?.getTraceContext();
|
|
2514
|
+
logRequestAttempt({
|
|
2515
|
+
timestamp: new Date().toISOString(),
|
|
2516
|
+
requestId: ctx.requestId,
|
|
2517
|
+
attempt: attemptNumber,
|
|
2518
|
+
method: ctx.method,
|
|
2519
|
+
path: ctx.path,
|
|
2520
|
+
model: body.model,
|
|
2521
|
+
stream: !!body.stream,
|
|
2522
|
+
toolCount,
|
|
2523
|
+
account: account.label,
|
|
2524
|
+
accountType: account.type,
|
|
2525
|
+
responseStatus: status,
|
|
2526
|
+
responseTimeMs: Date.now() - requestStart,
|
|
2527
|
+
...(errorType ? { errorType } : {}),
|
|
2528
|
+
...(errorMessage ? { errorMessage } : {}),
|
|
2529
|
+
...(extra?.inputTokens !== undefined
|
|
2530
|
+
? { inputTokens: extra.inputTokens }
|
|
2531
|
+
: {}),
|
|
2532
|
+
...(extra?.outputTokens !== undefined
|
|
2533
|
+
? { outputTokens: extra.outputTokens }
|
|
2534
|
+
: {}),
|
|
2535
|
+
...(extra?.cacheCreationTokens !== undefined
|
|
2536
|
+
? { cacheCreationTokens: extra.cacheCreationTokens }
|
|
2537
|
+
: {}),
|
|
2538
|
+
...(extra?.cacheReadTokens !== undefined
|
|
2539
|
+
? { cacheReadTokens: extra.cacheReadTokens }
|
|
2540
|
+
: {}),
|
|
2541
|
+
...(traceCtx
|
|
2542
|
+
? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
|
|
2543
|
+
: {}),
|
|
2544
|
+
});
|
|
2545
|
+
};
|
|
2546
|
+
}
|
|
2547
|
+
async function prepareAnthropicAccountAttempt(args) {
|
|
2548
|
+
const { account, accountState, bodyStr, clientHeaders, isClaudeClientRequest, url, tracer, attemptNumber, currentLastError, currentAuthFailureMessage, logAttempt, logProxyBody, } = args;
|
|
2549
|
+
let lastError = currentLastError;
|
|
2550
|
+
let authFailureMessage = currentAuthFailureMessage;
|
|
2551
|
+
if (needsRefresh(account)) {
|
|
2552
|
+
const refreshed = await refreshToken(account);
|
|
2553
|
+
if (refreshed.success) {
|
|
2554
|
+
if (account.persistTarget) {
|
|
2555
|
+
await persistTokens(account.persistTarget, account);
|
|
2556
|
+
}
|
|
2557
|
+
accountState.consecutiveRefreshFailures = 0;
|
|
2558
|
+
}
|
|
2559
|
+
else {
|
|
2560
|
+
accountState.consecutiveRefreshFailures += 1;
|
|
2561
|
+
lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
|
|
2562
|
+
logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
|
|
2563
|
+
if (accountState.consecutiveRefreshFailures >=
|
|
2564
|
+
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
2565
|
+
await disableAccountUntilReauth(account, accountState);
|
|
2566
|
+
authFailureMessage = formatReauthMessage(account.label);
|
|
2567
|
+
logAttempt(401, "authentication_error", String(lastError));
|
|
2568
|
+
return {
|
|
2569
|
+
continueLoop: true,
|
|
2570
|
+
lastError,
|
|
2571
|
+
authFailureMessage,
|
|
2572
|
+
};
|
|
2573
|
+
}
|
|
2574
|
+
}
|
|
2575
|
+
}
|
|
2576
|
+
const isOAuth = account.type === "oauth";
|
|
2577
|
+
const filteredHeaders = {};
|
|
2578
|
+
for (const [k, v] of Object.entries(clientHeaders)) {
|
|
2579
|
+
if (typeof v === "string") {
|
|
2580
|
+
filteredHeaders[k] = v;
|
|
2581
|
+
}
|
|
2582
|
+
}
|
|
2583
|
+
const snapshot = isOAuth
|
|
2584
|
+
? await maybeRefreshClaudeSnapshot(account.label, account.key, filteredHeaders, bodyStr)
|
|
2585
|
+
: null;
|
|
2586
|
+
const headers = {};
|
|
2587
|
+
for (const [headerKey, headerValue] of Object.entries(clientHeaders)) {
|
|
2588
|
+
const lower = headerKey.toLowerCase();
|
|
2589
|
+
if (typeof headerValue === "string" &&
|
|
2590
|
+
!BLOCKED_UPSTREAM_HEADERS.has(lower)) {
|
|
2591
|
+
headers[lower] = headerValue;
|
|
2592
|
+
}
|
|
2593
|
+
}
|
|
2594
|
+
headers["content-type"] = "application/json";
|
|
2595
|
+
if (isOAuth) {
|
|
2596
|
+
headers.authorization = `Bearer ${account.token}`;
|
|
2597
|
+
delete headers["x-api-key"];
|
|
2598
|
+
applySnapshotHeaders(headers, snapshot);
|
|
2599
|
+
}
|
|
2600
|
+
else {
|
|
2601
|
+
headers["x-api-key"] = account.token;
|
|
2602
|
+
delete headers.authorization;
|
|
2603
|
+
}
|
|
2604
|
+
if (!headers["user-agent"]) {
|
|
2605
|
+
headers["user-agent"] = CLAUDE_CLI_USER_AGENT;
|
|
2606
|
+
}
|
|
2607
|
+
if (!headers["anthropic-version"]) {
|
|
2608
|
+
headers["anthropic-version"] = "2023-06-01";
|
|
2609
|
+
}
|
|
2610
|
+
if (!headers["anthropic-dangerous-direct-browser-access"]) {
|
|
2611
|
+
headers["anthropic-dangerous-direct-browser-access"] = "true";
|
|
2612
|
+
}
|
|
2613
|
+
if (!headers["x-app"]) {
|
|
2614
|
+
headers["x-app"] = "cli";
|
|
2615
|
+
}
|
|
2616
|
+
if (!headers.accept) {
|
|
2617
|
+
headers.accept = "application/json";
|
|
2618
|
+
}
|
|
2619
|
+
if (isOAuth) {
|
|
2620
|
+
const betaSeed = isClaudeClientRequest
|
|
2621
|
+
? (headers["anthropic-beta"] ?? "")
|
|
2622
|
+
: (clientHeaders["anthropic-beta"] ?? "");
|
|
2623
|
+
const existing = new Set(betaSeed
|
|
2624
|
+
.split(",")
|
|
2625
|
+
.map((value) => value.trim())
|
|
2626
|
+
.filter(Boolean));
|
|
2627
|
+
for (const beta of isClaudeClientRequest
|
|
2628
|
+
? CLAUDE_CODE_OAUTH_BETAS
|
|
2629
|
+
: NON_CLAUDE_OAUTH_BETAS) {
|
|
2630
|
+
existing.add(beta);
|
|
2631
|
+
}
|
|
2632
|
+
headers["anthropic-beta"] = [...existing].join(",");
|
|
2633
|
+
}
|
|
2634
|
+
else {
|
|
2635
|
+
const cleaned = (headers["anthropic-beta"] ?? "")
|
|
2636
|
+
.split(",")
|
|
2637
|
+
.map((value) => value.trim())
|
|
2638
|
+
.filter((value) => value && !CLAUDE_CODE_OAUTH_BETAS.includes(value))
|
|
2639
|
+
.join(",");
|
|
2640
|
+
if (cleaned) {
|
|
2641
|
+
headers["anthropic-beta"] = cleaned;
|
|
2642
|
+
}
|
|
2643
|
+
else {
|
|
2644
|
+
delete headers["anthropic-beta"];
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
const buildUpstreamBody = (token) => isOAuth
|
|
2648
|
+
? polyfillOAuthBody(bodyStr, token, snapshot, headers["x-claude-code-session-id"])
|
|
2649
|
+
: { bodyStr };
|
|
2650
|
+
const polyfilledBody = buildUpstreamBody(account.token);
|
|
2651
|
+
if (isOAuth &&
|
|
2652
|
+
polyfilledBody.sessionId &&
|
|
2653
|
+
!headers["x-claude-code-session-id"]) {
|
|
2654
|
+
headers["x-claude-code-session-id"] = polyfilledBody.sessionId;
|
|
2655
|
+
}
|
|
2656
|
+
const finalBodyStr = polyfilledBody.bodyStr;
|
|
2657
|
+
logger.always(`[proxy] → account=${account.label} (${account.type})`);
|
|
2658
|
+
recordAttempt(account.label, account.type);
|
|
2659
|
+
const fetchStartMs = Date.now();
|
|
2660
|
+
let upstreamSpan;
|
|
2661
|
+
if (tracer) {
|
|
2662
|
+
upstreamSpan = tracer.startUpstreamAttempt({
|
|
2663
|
+
attempt: attemptNumber,
|
|
2664
|
+
account: account.label,
|
|
2665
|
+
polyfillHeaders: isOAuth,
|
|
2666
|
+
polyfillBody: isOAuth,
|
|
2667
|
+
upstreamUrl: url,
|
|
2668
|
+
});
|
|
2669
|
+
tracer.logUpstreamRequestHeaders(headers);
|
|
2670
|
+
tracer.logUpstreamRequestBody(finalBodyStr);
|
|
2671
|
+
Object.assign(headers, tracer.getTraceHeaders());
|
|
2672
|
+
}
|
|
2673
|
+
logProxyBody({
|
|
2674
|
+
phase: "upstream_request",
|
|
2675
|
+
headers,
|
|
2676
|
+
body: finalBodyStr,
|
|
2677
|
+
bodySize: Buffer.byteLength(finalBodyStr, "utf8"),
|
|
2678
|
+
contentType: headers["content-type"] ?? "application/json",
|
|
2679
|
+
account: account.label,
|
|
2680
|
+
accountType: account.type,
|
|
2681
|
+
attempt: attemptNumber,
|
|
2682
|
+
});
|
|
2683
|
+
return {
|
|
2684
|
+
continueLoop: false,
|
|
2685
|
+
lastError,
|
|
2686
|
+
authFailureMessage,
|
|
2687
|
+
headers,
|
|
2688
|
+
buildUpstreamBody,
|
|
2689
|
+
finalBodyStr,
|
|
2690
|
+
fetchStartMs,
|
|
2691
|
+
upstreamSpan,
|
|
2692
|
+
};
|
|
2693
|
+
}
|
|
2694
|
+
async function fetchAnthropicAccountResponse(args) {
|
|
2695
|
+
const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
|
|
2696
|
+
let lastError = currentLastError;
|
|
2697
|
+
let sawRateLimit = currentSawRateLimit;
|
|
2698
|
+
let sawNetworkError = currentSawNetworkError;
|
|
2699
|
+
const currentUpstreamSpan = upstreamSpan;
|
|
2700
|
+
let response;
|
|
2701
|
+
try {
|
|
2702
|
+
response = await fetch(url, {
|
|
2703
|
+
method: "POST",
|
|
2704
|
+
headers,
|
|
2705
|
+
body: finalBodyStr,
|
|
2706
|
+
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
2707
|
+
});
|
|
2708
|
+
}
|
|
2709
|
+
catch (fetchErr) {
|
|
2710
|
+
if (!isRetryableNetworkError(fetchErr)) {
|
|
2711
|
+
throw fetchErr;
|
|
2712
|
+
}
|
|
2713
|
+
sawNetworkError = true;
|
|
2714
|
+
recordAttemptError(account.label, account.type, 502);
|
|
2715
|
+
const errorCode = getErrorCode(fetchErr) ?? "unknown";
|
|
2716
|
+
const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
|
|
2717
|
+
lastError = errorMessage;
|
|
2718
|
+
logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
|
|
2719
|
+
logAttempt(502, "network_error", errorMessage);
|
|
2720
|
+
tracer?.setError("network_error", errorMessage);
|
|
2721
|
+
tracer?.recordRetry(account.label, "network_error");
|
|
2722
|
+
currentUpstreamSpan?.end();
|
|
2723
|
+
return {
|
|
2724
|
+
continueLoop: true,
|
|
2725
|
+
lastError,
|
|
2726
|
+
sawRateLimit,
|
|
2727
|
+
sawNetworkError,
|
|
2728
|
+
upstreamSpan: undefined,
|
|
2729
|
+
};
|
|
2730
|
+
}
|
|
2731
|
+
if (response.status === 429) {
|
|
2732
|
+
sawRateLimit = true;
|
|
2733
|
+
const retryAfter = response.headers.get("retry-after");
|
|
2734
|
+
let cooldownMs = 0;
|
|
2735
|
+
if (retryAfter) {
|
|
2736
|
+
const seconds = parseInt(retryAfter, 10);
|
|
2737
|
+
if (!Number.isNaN(seconds)) {
|
|
2738
|
+
cooldownMs = seconds * 1000;
|
|
2739
|
+
}
|
|
2740
|
+
else {
|
|
2741
|
+
const date = new Date(retryAfter);
|
|
2742
|
+
if (!Number.isNaN(date.getTime())) {
|
|
2743
|
+
cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
|
|
2744
|
+
}
|
|
2745
|
+
}
|
|
2746
|
+
}
|
|
2747
|
+
const level = accountState.backoffLevel;
|
|
2748
|
+
const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
|
|
2749
|
+
const backoffMs = Math.min(baseCooldown * 2 ** level, RATE_LIMIT_BACKOFF_CAP_MS);
|
|
2750
|
+
accountState.coolingUntil = Date.now() + backoffMs;
|
|
2751
|
+
accountState.backoffLevel += 1;
|
|
2752
|
+
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
2753
|
+
recordAttemptError(account.label, account.type, 429);
|
|
2754
|
+
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
2755
|
+
lastError = await response.text();
|
|
2756
|
+
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
|
|
2757
|
+
logAttempt(429, "rate_limit_error", String(lastError));
|
|
2758
|
+
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
2759
|
+
tracer?.recordRetry(account.label, "rate_limit");
|
|
2760
|
+
currentUpstreamSpan?.end();
|
|
2761
|
+
return {
|
|
2762
|
+
continueLoop: true,
|
|
2763
|
+
lastError,
|
|
2764
|
+
sawRateLimit,
|
|
2765
|
+
sawNetworkError,
|
|
2766
|
+
upstreamSpan: undefined,
|
|
2767
|
+
};
|
|
2768
|
+
}
|
|
2769
|
+
return {
|
|
2770
|
+
continueLoop: false,
|
|
2771
|
+
response,
|
|
2772
|
+
lastError,
|
|
2773
|
+
sawRateLimit,
|
|
2774
|
+
sawNetworkError,
|
|
2775
|
+
upstreamSpan: currentUpstreamSpan,
|
|
2776
|
+
};
|
|
2777
|
+
}
|
|
2778
|
+
async function handleAnthropicRoutedClaudeRequest(args) {
|
|
2779
|
+
const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
|
|
2780
|
+
const loadedAccounts = await loadClaudeProxyAccounts({
|
|
2781
|
+
ctx,
|
|
2782
|
+
body,
|
|
2783
|
+
tracer,
|
|
2784
|
+
requestStartTime,
|
|
2785
|
+
accountStrategy,
|
|
2786
|
+
buildLoggedClaudeError,
|
|
2787
|
+
});
|
|
2788
|
+
if ("response" in loadedAccounts) {
|
|
2789
|
+
return loadedAccounts.response;
|
|
2790
|
+
}
|
|
2791
|
+
const { accounts, enabledAccounts, orderedAccounts, bodyStr, requestStart, toolCount, url, clientHeaders, isClaudeClientRequest, } = loadedAccounts;
|
|
2792
|
+
const loopState = {
|
|
2793
|
+
lastError: undefined,
|
|
2794
|
+
sawRateLimit: false,
|
|
2795
|
+
sawNetworkError: false,
|
|
2796
|
+
sawTransientFailure: false,
|
|
2797
|
+
invalidRequestFailure: null,
|
|
2798
|
+
authFailureMessage: null,
|
|
2799
|
+
attemptNumber: 0,
|
|
2800
|
+
};
|
|
2801
|
+
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
2802
|
+
for (const account of orderedAccounts) {
|
|
2803
|
+
const accountState = getOrCreateRuntimeState(account.key);
|
|
2804
|
+
if (accountState.coolingUntil && accountState.coolingUntil > Date.now()) {
|
|
2805
|
+
continue;
|
|
2806
|
+
}
|
|
2807
|
+
loopState.attemptNumber += 1;
|
|
2808
|
+
if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
|
|
2809
|
+
tracer.setAccountSelection({
|
|
2810
|
+
strategy: accountStrategy,
|
|
2811
|
+
accountsTotal: accounts.length,
|
|
2812
|
+
accountsHealthy: enabledAccounts.length,
|
|
2813
|
+
selectedAccount: account.label,
|
|
2814
|
+
accountType: account.type,
|
|
2815
|
+
});
|
|
2816
|
+
acctSelectionSpan.end();
|
|
2817
|
+
}
|
|
2818
|
+
const logAttempt = createAnthropicAttemptLogger({
|
|
2819
|
+
ctx,
|
|
2820
|
+
body,
|
|
2821
|
+
toolCount,
|
|
2822
|
+
requestStart,
|
|
2823
|
+
tracer,
|
|
2824
|
+
account,
|
|
2825
|
+
attemptNumber: loopState.attemptNumber,
|
|
2826
|
+
});
|
|
2827
|
+
const preparedAttempt = await prepareAnthropicAccountAttempt({
|
|
2828
|
+
account,
|
|
2829
|
+
accountState,
|
|
2830
|
+
bodyStr,
|
|
2831
|
+
clientHeaders,
|
|
2832
|
+
isClaudeClientRequest,
|
|
2833
|
+
url,
|
|
2834
|
+
tracer,
|
|
2835
|
+
attemptNumber: loopState.attemptNumber,
|
|
2836
|
+
currentLastError: loopState.lastError,
|
|
2837
|
+
currentAuthFailureMessage: loopState.authFailureMessage,
|
|
2838
|
+
logAttempt,
|
|
2839
|
+
logProxyBody,
|
|
2840
|
+
});
|
|
2841
|
+
loopState.lastError = preparedAttempt.lastError;
|
|
2842
|
+
loopState.authFailureMessage = preparedAttempt.authFailureMessage;
|
|
2843
|
+
if (preparedAttempt.continueLoop ||
|
|
2844
|
+
!preparedAttempt.headers ||
|
|
2845
|
+
!preparedAttempt.buildUpstreamBody ||
|
|
2846
|
+
!preparedAttempt.finalBodyStr ||
|
|
2847
|
+
preparedAttempt.fetchStartMs === undefined) {
|
|
2848
|
+
continue;
|
|
2849
|
+
}
|
|
2850
|
+
const fetchResult = await fetchAnthropicAccountResponse({
|
|
2851
|
+
url,
|
|
2852
|
+
headers: preparedAttempt.headers,
|
|
2853
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2854
|
+
account,
|
|
2855
|
+
accountState,
|
|
2856
|
+
enabledAccounts,
|
|
2857
|
+
orderedAccounts,
|
|
2858
|
+
tracer,
|
|
2859
|
+
logAttempt,
|
|
2860
|
+
currentLastError: loopState.lastError,
|
|
2861
|
+
currentSawRateLimit: loopState.sawRateLimit,
|
|
2862
|
+
currentSawNetworkError: loopState.sawNetworkError,
|
|
2863
|
+
upstreamSpan: preparedAttempt.upstreamSpan,
|
|
2864
|
+
});
|
|
2865
|
+
loopState.lastError = fetchResult.lastError;
|
|
2866
|
+
loopState.sawRateLimit = fetchResult.sawRateLimit;
|
|
2867
|
+
loopState.sawNetworkError = fetchResult.sawNetworkError;
|
|
2868
|
+
if (fetchResult.continueLoop || !fetchResult.response) {
|
|
2869
|
+
continue;
|
|
2870
|
+
}
|
|
2871
|
+
let upstreamSpan = fetchResult.upstreamSpan;
|
|
2872
|
+
const response = fetchResult.response;
|
|
2873
|
+
if (response.status === 401 &&
|
|
2874
|
+
account.type === "oauth" &&
|
|
2875
|
+
account.refreshToken) {
|
|
2876
|
+
const authRetryResult = await handleAnthropicAuthRetry({
|
|
2877
|
+
ctx,
|
|
2878
|
+
body,
|
|
2879
|
+
account,
|
|
2880
|
+
accountState,
|
|
2881
|
+
headers: preparedAttempt.headers,
|
|
2882
|
+
buildUpstreamBody: preparedAttempt.buildUpstreamBody,
|
|
2883
|
+
enabledAccounts,
|
|
2884
|
+
orderedAccounts,
|
|
2885
|
+
response,
|
|
2886
|
+
tracer,
|
|
2887
|
+
requestStartTime,
|
|
2888
|
+
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2889
|
+
attemptNumber: loopState.attemptNumber,
|
|
2890
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2891
|
+
upstreamSpan,
|
|
2892
|
+
logAttempt,
|
|
2893
|
+
logProxyBody,
|
|
2894
|
+
logFinalRequest,
|
|
2895
|
+
lastError: loopState.lastError,
|
|
2896
|
+
authFailureMessage: loopState.authFailureMessage,
|
|
2897
|
+
sawRateLimit: loopState.sawRateLimit,
|
|
2898
|
+
sawTransientFailure: loopState.sawTransientFailure,
|
|
2899
|
+
sawNetworkError: loopState.sawNetworkError,
|
|
2900
|
+
});
|
|
2901
|
+
loopState.lastError = authRetryResult.lastError;
|
|
2902
|
+
loopState.authFailureMessage = authRetryResult.authFailureMessage;
|
|
2903
|
+
loopState.sawRateLimit = authRetryResult.sawRateLimit;
|
|
2904
|
+
loopState.sawTransientFailure = authRetryResult.sawTransientFailure;
|
|
2905
|
+
loopState.sawNetworkError = authRetryResult.sawNetworkError;
|
|
2906
|
+
upstreamSpan = authRetryResult.upstreamSpan;
|
|
2907
|
+
if (authRetryResult.response !== undefined) {
|
|
2908
|
+
return authRetryResult.response;
|
|
2909
|
+
}
|
|
2910
|
+
if (authRetryResult.continueLoop) {
|
|
2911
|
+
continue;
|
|
2912
|
+
}
|
|
2913
|
+
}
|
|
2914
|
+
if (!response.ok) {
|
|
2915
|
+
const nonOkResult = await handleAnthropicNonOkResponse({
|
|
2916
|
+
response,
|
|
2917
|
+
account,
|
|
2918
|
+
accountState,
|
|
2919
|
+
tracer,
|
|
2920
|
+
requestStartTime,
|
|
2921
|
+
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2922
|
+
attemptNumber: loopState.attemptNumber,
|
|
2923
|
+
logAttempt,
|
|
2924
|
+
logProxyBody,
|
|
2925
|
+
logFinalRequest,
|
|
2926
|
+
lastError: loopState.lastError,
|
|
2927
|
+
authFailureMessage: loopState.authFailureMessage,
|
|
2928
|
+
sawTransientFailure: loopState.sawTransientFailure,
|
|
2929
|
+
invalidRequestFailure: loopState.invalidRequestFailure,
|
|
2930
|
+
maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
|
|
2931
|
+
});
|
|
2932
|
+
loopState.lastError = nonOkResult.lastError;
|
|
2933
|
+
loopState.authFailureMessage = nonOkResult.authFailureMessage;
|
|
2934
|
+
loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
|
|
2935
|
+
loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
|
|
2936
|
+
if (nonOkResult.response !== undefined) {
|
|
2937
|
+
return nonOkResult.response;
|
|
2938
|
+
}
|
|
2939
|
+
if (nonOkResult.continueLoop) {
|
|
2940
|
+
continue;
|
|
2941
|
+
}
|
|
2942
|
+
break;
|
|
2943
|
+
}
|
|
2944
|
+
const successResult = await handleAnthropicSuccessfulResponse({
|
|
2945
|
+
ctx,
|
|
2946
|
+
body,
|
|
2947
|
+
account,
|
|
2948
|
+
accountState,
|
|
2949
|
+
response,
|
|
2950
|
+
tracer,
|
|
2951
|
+
requestStartTime,
|
|
2952
|
+
fetchStartMs: preparedAttempt.fetchStartMs,
|
|
2953
|
+
attemptNumber: loopState.attemptNumber,
|
|
2954
|
+
finalBodyStr: preparedAttempt.finalBodyStr,
|
|
2955
|
+
upstreamSpan,
|
|
2956
|
+
logProxyBody,
|
|
2957
|
+
logFinalRequest,
|
|
2958
|
+
});
|
|
2959
|
+
if ("retryNextAccount" in successResult) {
|
|
2960
|
+
continue;
|
|
2961
|
+
}
|
|
2962
|
+
return successResult.response;
|
|
2963
|
+
}
|
|
2964
|
+
if (loopState.attemptNumber === 0) {
|
|
2965
|
+
acctSelectionSpan?.end();
|
|
2966
|
+
}
|
|
2967
|
+
const configuredFallbackResponse = await tryConfiguredClaudeFallbackChain({
|
|
2968
|
+
ctx,
|
|
2969
|
+
body,
|
|
2970
|
+
modelRouter,
|
|
2971
|
+
tracer,
|
|
2972
|
+
requestStartTime,
|
|
2973
|
+
logProxyBody,
|
|
2974
|
+
logFinalRequest,
|
|
2975
|
+
});
|
|
2976
|
+
if (configuredFallbackResponse) {
|
|
2977
|
+
return configuredFallbackResponse;
|
|
2978
|
+
}
|
|
2979
|
+
const configuredChain = modelRouter?.getFallbackChain() ?? [];
|
|
2980
|
+
if (configuredChain.length === 0 && !loopState.sawRateLimit) {
|
|
2981
|
+
const autoFallbackResponse = await tryAutoClaudeFallback({
|
|
2982
|
+
ctx,
|
|
2983
|
+
body,
|
|
2984
|
+
tracer,
|
|
2985
|
+
requestStartTime,
|
|
2986
|
+
logProxyBody,
|
|
2987
|
+
logFinalRequest,
|
|
2988
|
+
});
|
|
2989
|
+
if (autoFallbackResponse) {
|
|
2990
|
+
return autoFallbackResponse;
|
|
2991
|
+
}
|
|
2992
|
+
}
|
|
2993
|
+
return buildClaudeAnthropicFailureResponse({
|
|
2994
|
+
tracer,
|
|
2995
|
+
requestStartTime,
|
|
2996
|
+
authFailureMessage: loopState.authFailureMessage,
|
|
2997
|
+
invalidRequestFailure: loopState.invalidRequestFailure,
|
|
2998
|
+
sawNetworkError: loopState.sawNetworkError,
|
|
2999
|
+
sawTransientFailure: loopState.sawTransientFailure,
|
|
3000
|
+
sawRateLimit: loopState.sawRateLimit,
|
|
3001
|
+
lastError: loopState.lastError,
|
|
3002
|
+
orderedAccounts,
|
|
3003
|
+
buildLoggedClaudeError,
|
|
3004
|
+
logProxyBody,
|
|
3005
|
+
logFinalRequest,
|
|
3006
|
+
});
|
|
3007
|
+
}
|
|
259
3008
|
// ---------------------------------------------------------------------------
|
|
260
3009
|
// Route factory
|
|
261
3010
|
// ---------------------------------------------------------------------------
|
|
@@ -269,7 +3018,7 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
|
|
|
269
3018
|
* @param basePath - Base path prefix (default: "" since Claude API uses /v1/...).
|
|
270
3019
|
* @returns RouteGroup with Claude-compatible endpoints.
|
|
271
3020
|
*/
|
|
272
|
-
export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first") {
|
|
3021
|
+
export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first", passthroughMode = false) {
|
|
273
3022
|
return {
|
|
274
3023
|
prefix: `${basePath}/v1`,
|
|
275
3024
|
routes: [
|
|
@@ -298,1269 +3047,65 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
298
3047
|
provider: "anthropic",
|
|
299
3048
|
model: body.model,
|
|
300
3049
|
};
|
|
3050
|
+
const clientRequestBody = JSON.stringify(body);
|
|
3051
|
+
// 3. Create request runtime context (tracer, loggers, error builder)
|
|
3052
|
+
const { tracer, requestStartTime, logProxyBody, logFinalRequest, buildLoggedClaudeError, } = createClaudeRequestRuntimeContext({
|
|
3053
|
+
ctx,
|
|
3054
|
+
body,
|
|
3055
|
+
clientRequestBody,
|
|
3056
|
+
});
|
|
301
3057
|
try {
|
|
302
|
-
//
|
|
3058
|
+
// 4. Route based on target provider
|
|
303
3059
|
if (route.provider === null) {
|
|
304
|
-
|
|
305
|
-
|
|
3060
|
+
tracer?.setError("not_found_error", `Model '${body.model}' is not a Claude model.`);
|
|
3061
|
+
tracer?.end(404, Date.now() - requestStartTime);
|
|
3062
|
+
return buildLoggedClaudeError(404, `Model '${body.model}' is not a Claude model. Use a model router to route it to another provider.`);
|
|
306
3063
|
}
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
// before adding — if it fails, skip the account entirely.
|
|
318
|
-
const { tokenStore } = await import("../../auth/tokenStore.js");
|
|
319
|
-
// Decision 10D: Auto-prune dead entries once on first request (startup)
|
|
320
|
-
if (!startupPruneDone) {
|
|
321
|
-
await tokenStore.pruneExpired();
|
|
322
|
-
startupPruneDone = true;
|
|
323
|
-
}
|
|
324
|
-
const compoundKeys = await tokenStore.listByPrefix("anthropic:");
|
|
325
|
-
for (const key of compoundKeys) {
|
|
326
|
-
// Decision 10D + Hot-reload: Skip disabled accounts UNLESS credentials changed
|
|
327
|
-
if (await tokenStore.isDisabled(key)) {
|
|
328
|
-
const existingState = getOrCreateRuntimeState(key);
|
|
329
|
-
// Check if credentials were refreshed/re-authed since disable.
|
|
330
|
-
// On cold start, lastToken is empty — don't treat that as a
|
|
331
|
-
// credential change; only compare on subsequent reloads.
|
|
332
|
-
const tokens = await tokenStore.loadTokens(key);
|
|
333
|
-
const hasTrackedTokens = existingState.lastToken !== undefined &&
|
|
334
|
-
existingState.lastToken !== "";
|
|
335
|
-
const tokenChanged = tokens &&
|
|
336
|
-
hasTrackedTokens &&
|
|
337
|
-
(existingState.lastToken !== tokens.accessToken ||
|
|
338
|
-
existingState.lastRefreshToken !== tokens.refreshToken);
|
|
339
|
-
if (tokenChanged) {
|
|
340
|
-
// Credentials changed — auto-enable and use this account
|
|
341
|
-
await tokenStore.markEnabled(key);
|
|
342
|
-
logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials changed)`);
|
|
343
|
-
existingState.permanentlyDisabled = false;
|
|
344
|
-
existingState.coolingUntil = undefined;
|
|
345
|
-
existingState.backoffLevel = 0;
|
|
346
|
-
existingState.consecutiveRefreshFailures = 0;
|
|
347
|
-
}
|
|
348
|
-
else {
|
|
349
|
-
logger.debug(`[proxy] skipping disabled account=${key.split(":")[1] ?? key}`);
|
|
350
|
-
existingState.permanentlyDisabled = true;
|
|
351
|
-
continue;
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
const tokens = await tokenStore.loadTokens(key);
|
|
355
|
-
if (!tokens) {
|
|
356
|
-
continue;
|
|
357
|
-
}
|
|
358
|
-
let accessToken = tokens.accessToken;
|
|
359
|
-
let refreshTok = tokens.refreshToken;
|
|
360
|
-
let expiresAt = tokens.expiresAt;
|
|
361
|
-
// Check if token is expired
|
|
362
|
-
const isExpired = expiresAt ? expiresAt < Date.now() : false;
|
|
363
|
-
if (isExpired) {
|
|
364
|
-
const label = key.split(":")[1] ?? key;
|
|
365
|
-
// Check if already marked dead from a previous request
|
|
366
|
-
const existingState = getOrCreateRuntimeState(key);
|
|
367
|
-
if (existingState.permanentlyDisabled) {
|
|
368
|
-
// Already known dead — skip silently (no log spam)
|
|
369
|
-
continue;
|
|
370
|
-
}
|
|
371
|
-
if (!refreshTok) {
|
|
372
|
-
logger.always(`[proxy] skipping account=${label} (expired, no refresh token)`);
|
|
373
|
-
await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
|
|
374
|
-
continue;
|
|
375
|
-
}
|
|
376
|
-
// Try ONE refresh before adding
|
|
377
|
-
const tempAccount = {
|
|
378
|
-
token: accessToken,
|
|
379
|
-
refreshToken: refreshTok,
|
|
380
|
-
expiresAt,
|
|
381
|
-
label,
|
|
382
|
-
};
|
|
383
|
-
const refreshed = await refreshToken(tempAccount);
|
|
384
|
-
if (!refreshed.success) {
|
|
385
|
-
logger.always(`[proxy] skipping account=${label} (expired, refresh failed: ${refreshed.error?.slice(0, 200) ?? "unknown"})`);
|
|
386
|
-
await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
|
|
387
|
-
continue;
|
|
388
|
-
}
|
|
389
|
-
// Refresh succeeded — use new token and persist
|
|
390
|
-
accessToken = tempAccount.token;
|
|
391
|
-
refreshTok = tempAccount.refreshToken;
|
|
392
|
-
expiresAt = tempAccount.expiresAt;
|
|
393
|
-
await tokenStore.saveTokens(key, {
|
|
394
|
-
accessToken,
|
|
395
|
-
refreshToken: refreshTok,
|
|
396
|
-
expiresAt: expiresAt ?? Date.now() + 3600_000,
|
|
397
|
-
tokenType: "Bearer",
|
|
398
|
-
});
|
|
399
|
-
logger.always(`[proxy] refreshed expired account=${key.split(":")[1] ?? key} at startup`);
|
|
400
|
-
}
|
|
401
|
-
// Detect whether this is an API key or an OAuth token.
|
|
402
|
-
// Use the stored tokenType (set at auth time) rather than a
|
|
403
|
-
// prefix heuristic — both API keys (sk-ant-api03-…) and OAuth
|
|
404
|
-
// access tokens (sk-ant-oat01-…) share the "sk-ant-" prefix.
|
|
405
|
-
const accountType = tokens.tokenType === "Bearer" ? "oauth" : "api_key";
|
|
406
|
-
accounts.push({
|
|
407
|
-
key,
|
|
408
|
-
label: key.split(":")[1] ?? key,
|
|
409
|
-
token: accessToken,
|
|
410
|
-
refreshToken: refreshTok,
|
|
411
|
-
expiresAt,
|
|
412
|
-
type: accountType,
|
|
413
|
-
persistTarget: { providerKey: key },
|
|
414
|
-
});
|
|
415
|
-
}
|
|
416
|
-
// 2. Legacy credentials file (only if no usable compound account was loaded)
|
|
417
|
-
if (accounts.length === 0) {
|
|
418
|
-
try {
|
|
419
|
-
const creds = JSON.parse(fs.readFileSync(legacyCredPath, "utf8"));
|
|
420
|
-
const legacyAccount = await tryLoadLegacyAccount(creds, legacyCredPath);
|
|
421
|
-
if (legacyAccount) {
|
|
422
|
-
accounts.push(legacyAccount);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
catch {
|
|
426
|
-
// no-op: file absent or invalid
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
// 3. Env var — only use as fallback when no OAuth accounts are available.
|
|
430
|
-
if (process.env.ANTHROPIC_API_KEY && accounts.length === 0) {
|
|
431
|
-
accounts.push({
|
|
432
|
-
key: "anthropic:env",
|
|
433
|
-
label: "env",
|
|
434
|
-
token: process.env.ANTHROPIC_API_KEY,
|
|
435
|
-
type: "api_key",
|
|
3064
|
+
if (route.provider === "anthropic") {
|
|
3065
|
+
tracer?.setMode("passthrough");
|
|
3066
|
+
if (passthroughMode) {
|
|
3067
|
+
return handleClaudePassthroughRequest({
|
|
3068
|
+
ctx,
|
|
3069
|
+
body,
|
|
3070
|
+
clientRequestBody,
|
|
3071
|
+
tracer,
|
|
3072
|
+
requestStartTime,
|
|
3073
|
+
logProxyBody,
|
|
436
3074
|
});
|
|
437
3075
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
|
|
449
|
-
}
|
|
450
|
-
state.coolingUntil = undefined;
|
|
451
|
-
state.backoffLevel = 0;
|
|
452
|
-
state.consecutiveRefreshFailures = 0;
|
|
453
|
-
state.permanentlyDisabled = false;
|
|
454
|
-
}
|
|
455
|
-
state.lastToken = account.token;
|
|
456
|
-
state.lastRefreshToken = account.refreshToken;
|
|
457
|
-
}
|
|
458
|
-
const enabledAccounts = accounts.filter((account) => {
|
|
459
|
-
return !getOrCreateRuntimeState(account.key)
|
|
460
|
-
.permanentlyDisabled;
|
|
461
|
-
});
|
|
462
|
-
if (enabledAccounts.length === 0) {
|
|
463
|
-
return buildClaudeError(401, formatReauthMessage(accounts.map((account) => account.label)));
|
|
464
|
-
}
|
|
465
|
-
// Order accounts based on the configured strategy.
|
|
466
|
-
// - fill-first: always start with the primary account;
|
|
467
|
-
// only fall over when the primary is cooling down (429/401).
|
|
468
|
-
// - round-robin: rotate the starting index on every request
|
|
469
|
-
// so traffic is spread evenly across accounts.
|
|
470
|
-
const orderedAccounts = [...enabledAccounts];
|
|
471
|
-
// Reset round-robin index when account list size changes
|
|
472
|
-
// (e.g. a new account was authenticated while the proxy was running).
|
|
473
|
-
// Only applies to round-robin; fill-first uses primaryAccountIndex
|
|
474
|
-
// as a sticky primary and should not be disrupted.
|
|
475
|
-
if (accountStrategy === "round-robin" &&
|
|
476
|
-
orderedAccounts.length !== lastKnownAccountCount) {
|
|
477
|
-
primaryAccountIndex = 0;
|
|
478
|
-
lastKnownAccountCount = orderedAccounts.length;
|
|
479
|
-
}
|
|
480
|
-
if (orderedAccounts.length > 1) {
|
|
481
|
-
if (accountStrategy === "round-robin") {
|
|
482
|
-
// Advance the index on every request for even distribution
|
|
483
|
-
const idx = primaryAccountIndex % orderedAccounts.length;
|
|
484
|
-
primaryAccountIndex =
|
|
485
|
-
(primaryAccountIndex + 1) % orderedAccounts.length;
|
|
486
|
-
if (idx > 0) {
|
|
487
|
-
const head = orderedAccounts.splice(0, idx);
|
|
488
|
-
orderedAccounts.push(...head);
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
else {
|
|
492
|
-
// fill-first (default): clamp primaryAccountIndex
|
|
493
|
-
const idx = primaryAccountIndex % orderedAccounts.length;
|
|
494
|
-
if (idx > 0) {
|
|
495
|
-
const head = orderedAccounts.splice(0, idx);
|
|
496
|
-
orderedAccounts.push(...head);
|
|
497
|
-
}
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
let lastError;
|
|
501
|
-
let sawRateLimit = false;
|
|
502
|
-
let sawNetworkError = false;
|
|
503
|
-
let sawTransientFailure = false;
|
|
504
|
-
let authFailureMessage = null;
|
|
505
|
-
const bodyStr = JSON.stringify(body);
|
|
506
|
-
const requestStart = Date.now();
|
|
507
|
-
const toolCount = Array.isArray(body.tools)
|
|
508
|
-
? body.tools.length
|
|
509
|
-
: 0;
|
|
510
|
-
const url = "https://api.anthropic.com/v1/messages?beta=true";
|
|
511
|
-
const clientHeaders = ctx.headers ?? {};
|
|
512
|
-
for (const account of orderedAccounts) {
|
|
513
|
-
const accountState = getOrCreateRuntimeState(account.key);
|
|
514
|
-
if (accountState.coolingUntil &&
|
|
515
|
-
accountState.coolingUntil > Date.now()) {
|
|
516
|
-
continue;
|
|
517
|
-
}
|
|
518
|
-
const logAttempt = (status, errorType, errorMessage) => {
|
|
519
|
-
logRequest({
|
|
520
|
-
timestamp: new Date().toISOString(),
|
|
521
|
-
requestId: ctx.requestId,
|
|
522
|
-
method: ctx.method,
|
|
523
|
-
path: ctx.path,
|
|
524
|
-
model: body.model,
|
|
525
|
-
stream: !!body.stream,
|
|
526
|
-
toolCount,
|
|
527
|
-
account: account.label,
|
|
528
|
-
accountType: account.type,
|
|
529
|
-
responseStatus: status,
|
|
530
|
-
responseTimeMs: Date.now() - requestStart,
|
|
531
|
-
...(errorType ? { errorType } : {}),
|
|
532
|
-
...(errorMessage ? { errorMessage } : {}),
|
|
533
|
-
});
|
|
534
|
-
};
|
|
535
|
-
// Auto-refresh expiring access tokens once before making the request.
|
|
536
|
-
if (needsRefresh(account)) {
|
|
537
|
-
const refreshed = await refreshToken(account);
|
|
538
|
-
if (refreshed.success) {
|
|
539
|
-
if (account.persistTarget) {
|
|
540
|
-
await persistTokens(account.persistTarget, account);
|
|
541
|
-
}
|
|
542
|
-
accountState.consecutiveRefreshFailures = 0;
|
|
543
|
-
}
|
|
544
|
-
else {
|
|
545
|
-
accountState.consecutiveRefreshFailures += 1;
|
|
546
|
-
lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
|
|
547
|
-
logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
|
|
548
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
549
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
550
|
-
await disableAccountUntilReauth(account, accountState);
|
|
551
|
-
authFailureMessage = formatReauthMessage(account.label);
|
|
552
|
-
logAttempt(401, "authentication_error", String(lastError));
|
|
553
|
-
continue;
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
|
-
const isOAuth = account.type === "oauth";
|
|
558
|
-
// Decision 6: Passthrough client headers, fill gaps only.
|
|
559
|
-
// Start with a copy of incoming client headers, then set
|
|
560
|
-
// defaults for anything the client didn't send. Always
|
|
561
|
-
// override auth + content-type.
|
|
562
|
-
const headers = {};
|
|
563
|
-
for (const [hk, hv] of Object.entries(clientHeaders)) {
|
|
564
|
-
const lower = hk.toLowerCase();
|
|
565
|
-
if (typeof hv === "string" &&
|
|
566
|
-
!BLOCKED_UPSTREAM_HEADERS.has(lower)) {
|
|
567
|
-
headers[lower] = hv;
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
// Always set (override) — auth and content-type are proxy-controlled
|
|
571
|
-
headers["content-type"] = "application/json";
|
|
572
|
-
if (isOAuth) {
|
|
573
|
-
headers["authorization"] = `Bearer ${account.token}`;
|
|
574
|
-
delete headers["x-api-key"];
|
|
575
|
-
}
|
|
576
|
-
else {
|
|
577
|
-
headers["x-api-key"] = account.token;
|
|
578
|
-
delete headers["authorization"];
|
|
579
|
-
}
|
|
580
|
-
// Apply header snapshot defaults for OAuth accounts
|
|
581
|
-
if (isOAuth) {
|
|
582
|
-
await applyHeaderSnapshot(headers, account.label);
|
|
583
|
-
}
|
|
584
|
-
// Hard defaults for anything still missing
|
|
585
|
-
if (!headers["user-agent"]) {
|
|
586
|
-
headers["user-agent"] = "claude-cli/2.1.86 (external, cli)";
|
|
587
|
-
}
|
|
588
|
-
if (!headers["anthropic-version"]) {
|
|
589
|
-
headers["anthropic-version"] = "2023-06-01";
|
|
590
|
-
}
|
|
591
|
-
if (!headers["anthropic-dangerous-direct-browser-access"]) {
|
|
592
|
-
headers["anthropic-dangerous-direct-browser-access"] = "true";
|
|
593
|
-
}
|
|
594
|
-
// Manage anthropic-beta header based on auth type.
|
|
595
|
-
// OAuth requires specific betas; API-key must NOT carry them.
|
|
596
|
-
if (isOAuth) {
|
|
597
|
-
const existing = new Set((headers["anthropic-beta"] ?? "")
|
|
598
|
-
.split(",")
|
|
599
|
-
.map((s) => s.trim())
|
|
600
|
-
.filter(Boolean));
|
|
601
|
-
existing.add("oauth-2025-04-20");
|
|
602
|
-
existing.add("claude-code-20250219");
|
|
603
|
-
headers["anthropic-beta"] = [...existing].join(",");
|
|
604
|
-
}
|
|
605
|
-
else {
|
|
606
|
-
// Strip OAuth-specific betas that may have leaked from client
|
|
607
|
-
const cleaned = (headers["anthropic-beta"] ?? "")
|
|
608
|
-
.split(",")
|
|
609
|
-
.map((s) => s.trim())
|
|
610
|
-
.filter((s) => s && s !== "oauth-2025-04-20")
|
|
611
|
-
.join(",");
|
|
612
|
-
if (cleaned) {
|
|
613
|
-
headers["anthropic-beta"] = cleaned;
|
|
614
|
-
}
|
|
615
|
-
else {
|
|
616
|
-
delete headers["anthropic-beta"];
|
|
617
|
-
}
|
|
618
|
-
}
|
|
619
|
-
// Polyfill request body for OAuth accounts
|
|
620
|
-
const buildUpstreamBody = () => isOAuth ? polyfillOAuthBody(bodyStr, account.token) : bodyStr;
|
|
621
|
-
const finalBodyStr = buildUpstreamBody();
|
|
622
|
-
logger.always(`[proxy] → account=${account.label} (${account.type})`);
|
|
623
|
-
recordRequest(account.label, account.type);
|
|
624
|
-
// Log full request for debugging (written to ~/.neurolink/logs/proxy-debug-*.jsonl)
|
|
625
|
-
const fetchStartMs = Date.now();
|
|
626
|
-
let response;
|
|
627
|
-
try {
|
|
628
|
-
response = await fetch(url, {
|
|
629
|
-
method: "POST",
|
|
630
|
-
headers,
|
|
631
|
-
body: finalBodyStr,
|
|
632
|
-
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
633
|
-
});
|
|
634
|
-
}
|
|
635
|
-
catch (fetchErr) {
|
|
636
|
-
if (!isRetryableNetworkError(fetchErr)) {
|
|
637
|
-
throw fetchErr;
|
|
638
|
-
}
|
|
639
|
-
// Decision 8: Network errors — immediate rotation, no cooldown
|
|
640
|
-
sawNetworkError = true;
|
|
641
|
-
recordError(account.label, account.type, 502);
|
|
642
|
-
const errorCode = getErrorCode(fetchErr) ?? "unknown";
|
|
643
|
-
const errorMessage = fetchErr instanceof Error
|
|
644
|
-
? fetchErr.message
|
|
645
|
-
: String(fetchErr);
|
|
646
|
-
lastError = errorMessage;
|
|
647
|
-
logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
|
|
648
|
-
logAttempt(502, "network_error", errorMessage);
|
|
649
|
-
continue;
|
|
650
|
-
}
|
|
651
|
-
// Check 429 (with Retry-After + exponential backoff) → continue.
|
|
652
|
-
if (response.status === 429) {
|
|
653
|
-
sawRateLimit = true;
|
|
654
|
-
const retryAfter = response.headers.get("retry-after");
|
|
655
|
-
let cooldownMs = 0;
|
|
656
|
-
if (retryAfter) {
|
|
657
|
-
const seconds = parseInt(retryAfter, 10);
|
|
658
|
-
if (!Number.isNaN(seconds)) {
|
|
659
|
-
cooldownMs = seconds * 1000;
|
|
660
|
-
}
|
|
661
|
-
else {
|
|
662
|
-
const date = new Date(retryAfter);
|
|
663
|
-
// eslint-disable-next-line max-depth
|
|
664
|
-
if (!Number.isNaN(date.getTime())) {
|
|
665
|
-
cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
|
|
666
|
-
}
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
const level = accountState.backoffLevel;
|
|
670
|
-
const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
|
|
671
|
-
const backoffMs = Math.min(baseCooldown * Math.pow(2, level), RATE_LIMIT_BACKOFF_CAP_MS);
|
|
672
|
-
accountState.coolingUntil = Date.now() + backoffMs;
|
|
673
|
-
accountState.backoffLevel += 1;
|
|
674
|
-
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
675
|
-
recordError(account.label, account.type, 429);
|
|
676
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
677
|
-
lastError = await response.text();
|
|
678
|
-
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
|
|
679
|
-
logAttempt(429, "rate_limit_error", String(lastError));
|
|
680
|
-
continue;
|
|
681
|
-
}
|
|
682
|
-
// On 401 for refreshable OAuth: refresh token and retry before failing over.
|
|
683
|
-
if (response.status === 401 &&
|
|
684
|
-
account.type === "oauth" &&
|
|
685
|
-
account.refreshToken) {
|
|
686
|
-
recordError(account.label, account.type, 401);
|
|
687
|
-
let authRetrySucceeded = false;
|
|
688
|
-
let authRetryError = "received 401 from Anthropic";
|
|
689
|
-
for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
|
|
690
|
-
logger.always(`[proxy] ← 401 account=${account.label} refreshing (attempt ${authRetry + 1}/${MAX_AUTH_RETRIES})`);
|
|
691
|
-
const refreshSucceeded = await refreshToken(account);
|
|
692
|
-
if (!refreshSucceeded.success) {
|
|
693
|
-
accountState.consecutiveRefreshFailures += 1;
|
|
694
|
-
authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
|
|
695
|
-
lastError = authRetryError;
|
|
696
|
-
logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
|
|
697
|
-
// eslint-disable-next-line max-depth
|
|
698
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
699
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
700
|
-
await disableAccountUntilReauth(account, accountState);
|
|
701
|
-
authFailureMessage = formatReauthMessage(account.label);
|
|
702
|
-
break;
|
|
703
|
-
}
|
|
704
|
-
// eslint-disable-next-line max-depth
|
|
705
|
-
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
706
|
-
await sleep(2000);
|
|
707
|
-
}
|
|
708
|
-
continue;
|
|
709
|
-
}
|
|
710
|
-
if (account.persistTarget) {
|
|
711
|
-
await persistTokens(account.persistTarget, account);
|
|
712
|
-
}
|
|
713
|
-
headers.authorization = `Bearer ${account.token}`;
|
|
714
|
-
try {
|
|
715
|
-
const retryResp = await fetch(url, {
|
|
716
|
-
method: "POST",
|
|
717
|
-
headers,
|
|
718
|
-
body: buildUpstreamBody(),
|
|
719
|
-
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
720
|
-
});
|
|
721
|
-
// eslint-disable-next-line max-depth
|
|
722
|
-
if (retryResp.ok) {
|
|
723
|
-
authRetrySucceeded = true;
|
|
724
|
-
accountState.consecutiveRefreshFailures = 0;
|
|
725
|
-
accountState.backoffLevel = 0;
|
|
726
|
-
accountState.coolingUntil = undefined;
|
|
727
|
-
logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
|
|
728
|
-
recordSuccess(account.label, account.type);
|
|
729
|
-
logAttempt(retryResp.status);
|
|
730
|
-
// Capture quota headers after successful auth-retry
|
|
731
|
-
{
|
|
732
|
-
const retryQuota = parseQuotaHeaders(retryResp.headers);
|
|
733
|
-
// eslint-disable-next-line max-depth
|
|
734
|
-
if (retryQuota) {
|
|
735
|
-
saveAccountQuota(account.label, retryQuota).catch(() => { });
|
|
736
|
-
}
|
|
737
|
-
}
|
|
738
|
-
// eslint-disable-next-line max-depth
|
|
739
|
-
if (body.stream && retryResp.body) {
|
|
740
|
-
const retryReader = retryResp.body.getReader();
|
|
741
|
-
let retryStreamClosed = false;
|
|
742
|
-
const retryStream = new ReadableStream({
|
|
743
|
-
async pull(controller) {
|
|
744
|
-
if (retryStreamClosed) {
|
|
745
|
-
return;
|
|
746
|
-
}
|
|
747
|
-
try {
|
|
748
|
-
const { done, value } = await retryReader.read();
|
|
749
|
-
if (retryStreamClosed) {
|
|
750
|
-
return;
|
|
751
|
-
}
|
|
752
|
-
if (done) {
|
|
753
|
-
retryStreamClosed = true;
|
|
754
|
-
controller.close();
|
|
755
|
-
return;
|
|
756
|
-
}
|
|
757
|
-
controller.enqueue(value);
|
|
758
|
-
}
|
|
759
|
-
catch (streamErr) {
|
|
760
|
-
const errMsg = streamErr instanceof Error
|
|
761
|
-
? streamErr.message
|
|
762
|
-
: String(streamErr);
|
|
763
|
-
logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
|
|
764
|
-
logStreamError({
|
|
765
|
-
timestamp: new Date().toISOString(),
|
|
766
|
-
requestId: ctx.requestId,
|
|
767
|
-
account: account.label,
|
|
768
|
-
model: body.model,
|
|
769
|
-
errorMessage: errMsg,
|
|
770
|
-
durationMs: Date.now() - fetchStartMs,
|
|
771
|
-
});
|
|
772
|
-
if (!retryStreamClosed) {
|
|
773
|
-
retryStreamClosed = true;
|
|
774
|
-
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
775
|
-
controller.enqueue(new TextEncoder().encode(errorEvent));
|
|
776
|
-
controller.close();
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
},
|
|
780
|
-
cancel() {
|
|
781
|
-
retryStreamClosed = true;
|
|
782
|
-
retryReader.cancel();
|
|
783
|
-
},
|
|
784
|
-
});
|
|
785
|
-
const responseHeaders = {
|
|
786
|
-
"content-type": "text/event-stream",
|
|
787
|
-
"cache-control": "no-cache",
|
|
788
|
-
connection: "keep-alive",
|
|
789
|
-
};
|
|
790
|
-
// eslint-disable-next-line max-depth
|
|
791
|
-
for (const h of [
|
|
792
|
-
"retry-after",
|
|
793
|
-
"anthropic-ratelimit-requests-remaining",
|
|
794
|
-
"anthropic-ratelimit-requests-limit",
|
|
795
|
-
"anthropic-ratelimit-tokens-remaining",
|
|
796
|
-
"anthropic-ratelimit-tokens-limit",
|
|
797
|
-
]) {
|
|
798
|
-
const val = retryResp.headers.get(h);
|
|
799
|
-
// eslint-disable-next-line max-depth
|
|
800
|
-
if (val) {
|
|
801
|
-
responseHeaders[h] = val;
|
|
802
|
-
}
|
|
803
|
-
}
|
|
804
|
-
return new Response(retryStream, {
|
|
805
|
-
status: retryResp.status,
|
|
806
|
-
headers: responseHeaders,
|
|
807
|
-
});
|
|
808
|
-
}
|
|
809
|
-
return retryResp.json();
|
|
810
|
-
}
|
|
811
|
-
const retryStatus = retryResp.status;
|
|
812
|
-
const retryBody = await retryResp.text();
|
|
813
|
-
authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
|
|
814
|
-
lastError = retryBody;
|
|
815
|
-
logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
|
|
816
|
-
recordError(account.label, account.type, retryStatus);
|
|
817
|
-
// eslint-disable-next-line max-depth
|
|
818
|
-
if (retryStatus === 429) {
|
|
819
|
-
sawRateLimit = true;
|
|
820
|
-
const retryAfter = retryResp.headers.get("retry-after");
|
|
821
|
-
const parsedRetryAfter = parseInt(retryAfter ?? "", 10);
|
|
822
|
-
const cooldownMs = Number.isNaN(parsedRetryAfter)
|
|
823
|
-
? 60_000
|
|
824
|
-
: Math.max(1, parsedRetryAfter) * 1000;
|
|
825
|
-
accountState.coolingUntil = Date.now() + cooldownMs;
|
|
826
|
-
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
827
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
828
|
-
break;
|
|
829
|
-
}
|
|
830
|
-
// eslint-disable-next-line max-depth
|
|
831
|
-
if (retryStatus === 401 ||
|
|
832
|
-
retryStatus === 402 ||
|
|
833
|
-
retryStatus === 403) {
|
|
834
|
-
// eslint-disable-next-line max-depth
|
|
835
|
-
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
836
|
-
await sleep(1000);
|
|
837
|
-
}
|
|
838
|
-
continue;
|
|
839
|
-
}
|
|
840
|
-
// eslint-disable-next-line max-depth
|
|
841
|
-
if (isTransientHttpFailure(retryStatus, retryBody)) {
|
|
842
|
-
// Decision 8: No cooldown for transient errors — rotate immediately
|
|
843
|
-
sawTransientFailure = true;
|
|
844
|
-
break;
|
|
845
|
-
}
|
|
846
|
-
logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
|
|
847
|
-
// eslint-disable-next-line max-depth
|
|
848
|
-
try {
|
|
849
|
-
return JSON.parse(retryBody);
|
|
850
|
-
}
|
|
851
|
-
catch {
|
|
852
|
-
return buildClaudeError(retryStatus, retryBody);
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
catch (retryFetchErr) {
|
|
856
|
-
// Decision 8: No cooldown for network errors — rotate immediately
|
|
857
|
-
sawNetworkError = true;
|
|
858
|
-
recordError(account.label, account.type, 502);
|
|
859
|
-
const message = retryFetchErr instanceof Error
|
|
860
|
-
? retryFetchErr.message
|
|
861
|
-
: String(retryFetchErr);
|
|
862
|
-
authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
|
|
863
|
-
lastError = authRetryError;
|
|
864
|
-
logger.debug(`[proxy] ${authRetryError}`);
|
|
865
|
-
break;
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
if (!authRetrySucceeded) {
|
|
869
|
-
// eslint-disable-next-line max-depth
|
|
870
|
-
if (!accountState.permanentlyDisabled) {
|
|
871
|
-
// eslint-disable-next-line max-depth
|
|
872
|
-
if (!accountState.coolingUntil ||
|
|
873
|
-
accountState.coolingUntil <= Date.now()) {
|
|
874
|
-
accountState.coolingUntil =
|
|
875
|
-
Date.now() + AUTH_COOLDOWN_MS;
|
|
876
|
-
}
|
|
877
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
878
|
-
}
|
|
879
|
-
lastError = authRetryError;
|
|
880
|
-
logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
|
|
881
|
-
logAttempt(401, "authentication_error", authRetryError);
|
|
882
|
-
continue;
|
|
883
|
-
}
|
|
884
|
-
}
|
|
885
|
-
if (!response.ok) {
|
|
886
|
-
const errBody = await response.text();
|
|
887
|
-
// Log full error for debugging
|
|
888
|
-
const errRespHeaders = {};
|
|
889
|
-
response.headers.forEach((v, k) => {
|
|
890
|
-
errRespHeaders[k] = v;
|
|
891
|
-
});
|
|
892
|
-
logFullRequestResponse({
|
|
893
|
-
timestamp: new Date().toISOString(),
|
|
894
|
-
requestId: ctx.requestId,
|
|
895
|
-
account: account.label,
|
|
896
|
-
model: body.model,
|
|
897
|
-
stream: !!body.stream,
|
|
898
|
-
requestHeaders: redactSensitiveHeaders(headers),
|
|
899
|
-
requestBody: {
|
|
900
|
-
model: body.model,
|
|
901
|
-
max_tokens: body.max_tokens,
|
|
902
|
-
stream: body.stream,
|
|
903
|
-
system: Array.isArray(body.system)
|
|
904
|
-
? `[${body.system.length} blocks]`
|
|
905
|
-
: typeof body.system,
|
|
906
|
-
messages: Array.isArray(body.messages)
|
|
907
|
-
? `[${body.messages.length} messages]`
|
|
908
|
-
: "?",
|
|
909
|
-
tools: Array.isArray(body.tools)
|
|
910
|
-
? `[${body.tools.length} tools]`
|
|
911
|
-
: "none",
|
|
912
|
-
tool_choice: body.tool_choice,
|
|
913
|
-
thinking: body.thinking,
|
|
914
|
-
},
|
|
915
|
-
requestBodySize: bodyStr.length,
|
|
916
|
-
responseStatus: response.status,
|
|
917
|
-
responseHeaders: errRespHeaders,
|
|
918
|
-
responseBody: errBody.substring(0, 2000),
|
|
919
|
-
responseBodySize: errBody.length,
|
|
920
|
-
durationMs: Date.now() - fetchStartMs,
|
|
921
|
-
});
|
|
922
|
-
// Request-shape errors (do not retry).
|
|
923
|
-
if (isInvalidRequestError(response.status, errBody)) {
|
|
924
|
-
logger.always(`[proxy] ← ${response.status} request-shape error (no retry)`);
|
|
925
|
-
logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
|
|
926
|
-
try {
|
|
927
|
-
return JSON.parse(errBody);
|
|
928
|
-
}
|
|
929
|
-
catch {
|
|
930
|
-
return buildClaudeError(response.status, errBody);
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
// Auth failures for OAuth accounts without refresh token.
|
|
934
|
-
if ((response.status === 401 ||
|
|
935
|
-
response.status === 402 ||
|
|
936
|
-
response.status === 403) &&
|
|
937
|
-
account.type === "oauth" &&
|
|
938
|
-
!account.refreshToken) {
|
|
939
|
-
recordError(account.label, account.type, response.status);
|
|
940
|
-
accountState.consecutiveRefreshFailures += 1;
|
|
941
|
-
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
942
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
943
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
944
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
945
|
-
await disableAccountUntilReauth(account, accountState);
|
|
946
|
-
}
|
|
947
|
-
authFailureMessage = formatReauthMessage(account.label);
|
|
948
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
949
|
-
lastError = errBody;
|
|
950
|
-
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
951
|
-
continue;
|
|
952
|
-
}
|
|
953
|
-
// Auth failures for API-key accounts.
|
|
954
|
-
if ((response.status === 401 ||
|
|
955
|
-
response.status === 402 ||
|
|
956
|
-
response.status === 403) &&
|
|
957
|
-
account.type === "api_key") {
|
|
958
|
-
recordError(account.label, account.type, response.status);
|
|
959
|
-
authFailureMessage =
|
|
960
|
-
"Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
|
|
961
|
-
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
962
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
963
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
964
|
-
lastError = errBody;
|
|
965
|
-
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
966
|
-
continue;
|
|
967
|
-
}
|
|
968
|
-
// 404 is generally model/account specific; return immediately (no cooldown per Decision 8).
|
|
969
|
-
if (response.status === 404) {
|
|
970
|
-
recordError(account.label, account.type, response.status);
|
|
971
|
-
logger.always(`[proxy] ← 404 account=${account.label}`);
|
|
972
|
-
logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
|
|
973
|
-
try {
|
|
974
|
-
return JSON.parse(errBody);
|
|
975
|
-
}
|
|
976
|
-
catch {
|
|
977
|
-
return buildClaudeError(404, errBody);
|
|
978
|
-
}
|
|
979
|
-
}
|
|
980
|
-
// Decision 8: Transient upstream failures — immediate rotation, NO cooldown.
|
|
981
|
-
if (isTransientHttpFailure(response.status, errBody)) {
|
|
982
|
-
recordError(account.label, account.type, response.status);
|
|
983
|
-
sawTransientFailure = true;
|
|
984
|
-
// No cooldown for transient errors (502, 503, etc.) — rotate immediately
|
|
985
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
|
|
986
|
-
lastError = errBody;
|
|
987
|
-
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
988
|
-
continue;
|
|
989
|
-
}
|
|
990
|
-
// Other non-ok errors → return as-is.
|
|
991
|
-
recordError(account.label, account.type, response.status);
|
|
992
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
993
|
-
logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
|
|
994
|
-
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
995
|
-
try {
|
|
996
|
-
return JSON.parse(errBody);
|
|
997
|
-
}
|
|
998
|
-
catch {
|
|
999
|
-
return buildClaudeError(response.status, errBody);
|
|
1000
|
-
}
|
|
1001
|
-
}
|
|
1002
|
-
// Success path.
|
|
1003
|
-
accountState.backoffLevel = 0;
|
|
1004
|
-
accountState.coolingUntil = undefined;
|
|
1005
|
-
accountState.consecutiveRefreshFailures = 0;
|
|
1006
|
-
recordSuccess(account.label, account.type);
|
|
1007
|
-
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1008
|
-
logAttempt(response.status);
|
|
1009
|
-
// Capture quota/utilisation headers (fire-and-forget).
|
|
1010
|
-
const quota = parseQuotaHeaders(response.headers);
|
|
1011
|
-
if (quota) {
|
|
1012
|
-
saveAccountQuota(account.label, quota).catch(() => {
|
|
1013
|
-
// Non-fatal: quota persistence is best-effort
|
|
1014
|
-
});
|
|
1015
|
-
}
|
|
1016
|
-
// Log full request + response headers for debugging
|
|
1017
|
-
const respHeaders = {};
|
|
1018
|
-
response.headers.forEach((v, k) => {
|
|
1019
|
-
respHeaders[k] = v;
|
|
1020
|
-
});
|
|
1021
|
-
logFullRequestResponse({
|
|
1022
|
-
timestamp: new Date().toISOString(),
|
|
1023
|
-
requestId: ctx.requestId,
|
|
1024
|
-
account: account.label,
|
|
1025
|
-
model: body.model,
|
|
1026
|
-
stream: !!body.stream,
|
|
1027
|
-
requestHeaders: redactSensitiveHeaders(headers),
|
|
1028
|
-
requestBody: {
|
|
1029
|
-
model: body.model,
|
|
1030
|
-
max_tokens: body.max_tokens,
|
|
1031
|
-
stream: body.stream,
|
|
1032
|
-
system: Array.isArray(body.system)
|
|
1033
|
-
? `[${body.system.length} blocks]`
|
|
1034
|
-
: typeof body.system,
|
|
1035
|
-
messages: Array.isArray(body.messages)
|
|
1036
|
-
? `[${body.messages.length} messages]`
|
|
1037
|
-
: "?",
|
|
1038
|
-
tools: Array.isArray(body.tools)
|
|
1039
|
-
? `[${body.tools.length} tools]`
|
|
1040
|
-
: "none",
|
|
1041
|
-
tool_choice: body.tool_choice,
|
|
1042
|
-
thinking: body.thinking,
|
|
1043
|
-
metadata: body.metadata ? "present" : "absent",
|
|
1044
|
-
},
|
|
1045
|
-
requestBodySize: bodyStr.length,
|
|
1046
|
-
responseStatus: response.status,
|
|
1047
|
-
responseHeaders: respHeaders,
|
|
1048
|
-
durationMs: Date.now() - fetchStartMs,
|
|
1049
|
-
});
|
|
1050
|
-
if (body.stream) {
|
|
1051
|
-
// Bootstrap retry: read first chunk to verify stream is valid.
|
|
1052
|
-
if (response.body) {
|
|
1053
|
-
const reader = response.body.getReader();
|
|
1054
|
-
const firstChunk = await reader.read();
|
|
1055
|
-
if (firstChunk.done ||
|
|
1056
|
-
!firstChunk.value ||
|
|
1057
|
-
firstChunk.value.length === 0) {
|
|
1058
|
-
// Empty stream — retry with next account.
|
|
1059
|
-
reader.cancel();
|
|
1060
|
-
accountState.coolingUntil = Date.now() + 10_000;
|
|
1061
|
-
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
1062
|
-
logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
|
|
1063
|
-
continue;
|
|
1064
|
-
}
|
|
1065
|
-
// Stream is valid — create a new ReadableStream with first chunk prepended.
|
|
1066
|
-
let mainStreamClosed = false;
|
|
1067
|
-
const remainingStream = new ReadableStream({
|
|
1068
|
-
start(controller) {
|
|
1069
|
-
controller.enqueue(firstChunk.value);
|
|
1070
|
-
},
|
|
1071
|
-
async pull(controller) {
|
|
1072
|
-
if (mainStreamClosed) {
|
|
1073
|
-
return;
|
|
1074
|
-
}
|
|
1075
|
-
try {
|
|
1076
|
-
const { done, value } = await reader.read();
|
|
1077
|
-
if (mainStreamClosed) {
|
|
1078
|
-
return;
|
|
1079
|
-
}
|
|
1080
|
-
if (done) {
|
|
1081
|
-
mainStreamClosed = true;
|
|
1082
|
-
controller.close();
|
|
1083
|
-
return;
|
|
1084
|
-
}
|
|
1085
|
-
controller.enqueue(value);
|
|
1086
|
-
}
|
|
1087
|
-
catch (streamErr) {
|
|
1088
|
-
const errMsg = streamErr instanceof Error
|
|
1089
|
-
? streamErr.message
|
|
1090
|
-
: String(streamErr);
|
|
1091
|
-
logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
|
|
1092
|
-
logStreamError({
|
|
1093
|
-
timestamp: new Date().toISOString(),
|
|
1094
|
-
requestId: ctx.requestId,
|
|
1095
|
-
account: account.label,
|
|
1096
|
-
model: body.model,
|
|
1097
|
-
errorMessage: errMsg,
|
|
1098
|
-
durationMs: Date.now() - fetchStartMs,
|
|
1099
|
-
});
|
|
1100
|
-
// Send SSE error event so the client gets a meaningful error
|
|
1101
|
-
// instead of a raw connection drop
|
|
1102
|
-
if (!mainStreamClosed) {
|
|
1103
|
-
mainStreamClosed = true;
|
|
1104
|
-
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
1105
|
-
controller.enqueue(new TextEncoder().encode(errorEvent));
|
|
1106
|
-
controller.close();
|
|
1107
|
-
}
|
|
1108
|
-
}
|
|
1109
|
-
},
|
|
1110
|
-
cancel() {
|
|
1111
|
-
mainStreamClosed = true;
|
|
1112
|
-
reader.cancel();
|
|
1113
|
-
},
|
|
1114
|
-
});
|
|
1115
|
-
// Forward rate limit headers from Anthropic.
|
|
1116
|
-
const responseHeaders = {
|
|
1117
|
-
"content-type": "text/event-stream",
|
|
1118
|
-
"cache-control": "no-cache",
|
|
1119
|
-
connection: "keep-alive",
|
|
1120
|
-
};
|
|
1121
|
-
for (const h of [
|
|
1122
|
-
"retry-after",
|
|
1123
|
-
"anthropic-ratelimit-requests-remaining",
|
|
1124
|
-
"anthropic-ratelimit-requests-limit",
|
|
1125
|
-
"anthropic-ratelimit-tokens-remaining",
|
|
1126
|
-
"anthropic-ratelimit-tokens-limit",
|
|
1127
|
-
]) {
|
|
1128
|
-
const val = response.headers.get(h);
|
|
1129
|
-
// eslint-disable-next-line max-depth
|
|
1130
|
-
if (val) {
|
|
1131
|
-
responseHeaders[h] = val;
|
|
1132
|
-
}
|
|
1133
|
-
}
|
|
1134
|
-
return new Response(remainingStream, {
|
|
1135
|
-
status: response.status,
|
|
1136
|
-
headers: responseHeaders,
|
|
1137
|
-
});
|
|
1138
|
-
}
|
|
1139
|
-
return buildClaudeError(502, "No response body from upstream");
|
|
1140
|
-
}
|
|
1141
|
-
// Non-streaming: return JSON directly.
|
|
1142
|
-
return response.json();
|
|
1143
|
-
}
|
|
1144
|
-
// All accounts exhausted — compute earliest recovery time.
|
|
1145
|
-
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
1146
|
-
const coolingUntil = getOrCreateRuntimeState(account.key).coolingUntil;
|
|
1147
|
-
return coolingUntil ? Math.min(min, coolingUntil) : min;
|
|
1148
|
-
}, Infinity);
|
|
1149
|
-
const retryAfterSec = Number.isFinite(earliestRecovery)
|
|
1150
|
-
? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
|
|
1151
|
-
: 60;
|
|
1152
|
-
// Try fallback chain (alternative providers)
|
|
1153
|
-
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1154
|
-
for (const fallback of chain) {
|
|
1155
|
-
try {
|
|
1156
|
-
logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
|
|
1157
|
-
const parsed = parseClaudeRequest(body);
|
|
1158
|
-
const opts = {
|
|
1159
|
-
input: {
|
|
1160
|
-
text: parsed.prompt,
|
|
1161
|
-
...(parsed.images.length > 0
|
|
1162
|
-
? { images: parsed.images }
|
|
1163
|
-
: {}),
|
|
1164
|
-
},
|
|
1165
|
-
provider: fallback.provider,
|
|
1166
|
-
model: fallback.model,
|
|
1167
|
-
systemPrompt: parsed.systemPrompt,
|
|
1168
|
-
maxTokens: parsed.maxTokens,
|
|
1169
|
-
...(parsed.temperature !== undefined
|
|
1170
|
-
? { temperature: parsed.temperature }
|
|
1171
|
-
: {}),
|
|
1172
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1173
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1174
|
-
...(parsed.stopSequences?.length
|
|
1175
|
-
? { stopSequences: parsed.stopSequences }
|
|
1176
|
-
: {}),
|
|
1177
|
-
tools: parsed.tools,
|
|
1178
|
-
...(parsed.toolChoice
|
|
1179
|
-
? { toolChoice: parsed.toolChoice }
|
|
1180
|
-
: {}),
|
|
1181
|
-
...(parsed.thinkingConfig
|
|
1182
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1183
|
-
: {}),
|
|
1184
|
-
...(parsed.conversationMessages?.length
|
|
1185
|
-
? {
|
|
1186
|
-
conversationMessages: parsed.conversationMessages.slice(0, -1),
|
|
1187
|
-
}
|
|
1188
|
-
: {}),
|
|
1189
|
-
maxSteps: 1,
|
|
1190
|
-
};
|
|
1191
|
-
if (body.stream) {
|
|
1192
|
-
const streamResult = await ctx.neurolink.stream(opts);
|
|
1193
|
-
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
1194
|
-
async function* sseGenerator() {
|
|
1195
|
-
for (const frame of serializer.start()) {
|
|
1196
|
-
yield frame;
|
|
1197
|
-
}
|
|
1198
|
-
for await (const chunk of streamResult.stream) {
|
|
1199
|
-
const text = extractText(chunk);
|
|
1200
|
-
if (text) {
|
|
1201
|
-
for (const frame of serializer.pushDelta(text)) {
|
|
1202
|
-
yield frame;
|
|
1203
|
-
}
|
|
1204
|
-
}
|
|
1205
|
-
}
|
|
1206
|
-
// Emit tool_use blocks if model wants to call tools
|
|
1207
|
-
if (streamResult.toolCalls?.length) {
|
|
1208
|
-
for (const tc of streamResult.toolCalls) {
|
|
1209
|
-
const toolName = tc.toolName ??
|
|
1210
|
-
tc.name ??
|
|
1211
|
-
"unknown";
|
|
1212
|
-
const toolArgs = tc.args ??
|
|
1213
|
-
tc.parameters ??
|
|
1214
|
-
{};
|
|
1215
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
1216
|
-
yield frame;
|
|
1217
|
-
}
|
|
1218
|
-
}
|
|
1219
|
-
}
|
|
1220
|
-
const reason = streamResult.finishReason ?? "end_turn";
|
|
1221
|
-
for (const frame of serializer.finish(0, reason)) {
|
|
1222
|
-
yield frame;
|
|
1223
|
-
}
|
|
1224
|
-
}
|
|
1225
|
-
return sseGenerator();
|
|
1226
|
-
}
|
|
1227
|
-
const streamResult = await ctx.neurolink.stream(opts);
|
|
1228
|
-
let collectedText = "";
|
|
1229
|
-
for await (const chunk of streamResult.stream) {
|
|
1230
|
-
const text = extractText(chunk);
|
|
1231
|
-
if (text) {
|
|
1232
|
-
collectedText += text;
|
|
1233
|
-
}
|
|
1234
|
-
}
|
|
1235
|
-
const internal = {
|
|
1236
|
-
content: collectedText,
|
|
1237
|
-
model: streamResult.model,
|
|
1238
|
-
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1239
|
-
reasoning: undefined,
|
|
1240
|
-
usage: streamResult.usage
|
|
1241
|
-
? {
|
|
1242
|
-
input: streamResult.usage.input ??
|
|
1243
|
-
0,
|
|
1244
|
-
output: streamResult.usage
|
|
1245
|
-
.output ?? 0,
|
|
1246
|
-
total: streamResult.usage.total ??
|
|
1247
|
-
0,
|
|
1248
|
-
}
|
|
1249
|
-
: undefined,
|
|
1250
|
-
toolCalls: streamResult.toolCalls,
|
|
1251
|
-
};
|
|
1252
|
-
return serializeClaudeResponse(internal, body.model);
|
|
1253
|
-
}
|
|
1254
|
-
catch (fallbackErr) {
|
|
1255
|
-
logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1256
|
-
continue;
|
|
1257
|
-
}
|
|
1258
|
-
}
|
|
1259
|
-
// If no explicit fallback chain is configured, try SDK auto-provider fallback.
|
|
1260
|
-
if (chain.length === 0) {
|
|
1261
|
-
try {
|
|
1262
|
-
logger.always("[proxy] fallback → auto-provider");
|
|
1263
|
-
const parsed = parseClaudeRequest(body);
|
|
1264
|
-
const opts = {
|
|
1265
|
-
input: {
|
|
1266
|
-
text: parsed.prompt,
|
|
1267
|
-
...(parsed.images.length > 0
|
|
1268
|
-
? { images: parsed.images }
|
|
1269
|
-
: {}),
|
|
1270
|
-
},
|
|
1271
|
-
systemPrompt: parsed.systemPrompt,
|
|
1272
|
-
maxTokens: parsed.maxTokens,
|
|
1273
|
-
...(parsed.temperature !== undefined
|
|
1274
|
-
? { temperature: parsed.temperature }
|
|
1275
|
-
: {}),
|
|
1276
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1277
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1278
|
-
...(parsed.stopSequences?.length
|
|
1279
|
-
? { stopSequences: parsed.stopSequences }
|
|
1280
|
-
: {}),
|
|
1281
|
-
tools: parsed.tools,
|
|
1282
|
-
...(parsed.toolChoice
|
|
1283
|
-
? { toolChoice: parsed.toolChoice }
|
|
1284
|
-
: {}),
|
|
1285
|
-
...(parsed.thinkingConfig
|
|
1286
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1287
|
-
: {}),
|
|
1288
|
-
...(parsed.conversationMessages?.length
|
|
1289
|
-
? {
|
|
1290
|
-
conversationMessages: parsed.conversationMessages.slice(0, -1),
|
|
1291
|
-
}
|
|
1292
|
-
: {}),
|
|
1293
|
-
maxSteps: 1,
|
|
1294
|
-
};
|
|
1295
|
-
if (body.stream) {
|
|
1296
|
-
const streamResult = await ctx.neurolink.stream(opts);
|
|
1297
|
-
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
1298
|
-
async function* sseGenerator() {
|
|
1299
|
-
for (const frame of serializer.start()) {
|
|
1300
|
-
yield frame;
|
|
1301
|
-
}
|
|
1302
|
-
for await (const chunk of streamResult.stream) {
|
|
1303
|
-
const text = extractText(chunk);
|
|
1304
|
-
if (text) {
|
|
1305
|
-
for (const frame of serializer.pushDelta(text)) {
|
|
1306
|
-
yield frame;
|
|
1307
|
-
}
|
|
1308
|
-
}
|
|
1309
|
-
}
|
|
1310
|
-
// Emit tool_use blocks if model wants to call tools
|
|
1311
|
-
if (streamResult.toolCalls?.length) {
|
|
1312
|
-
for (const tc of streamResult.toolCalls) {
|
|
1313
|
-
const toolName = tc.toolName ??
|
|
1314
|
-
tc.name ??
|
|
1315
|
-
"unknown";
|
|
1316
|
-
const toolArgs = tc.args ??
|
|
1317
|
-
tc.parameters ??
|
|
1318
|
-
{};
|
|
1319
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
1320
|
-
yield frame;
|
|
1321
|
-
}
|
|
1322
|
-
}
|
|
1323
|
-
}
|
|
1324
|
-
const reason = streamResult.finishReason ?? "end_turn";
|
|
1325
|
-
for (const frame of serializer.finish(0, reason)) {
|
|
1326
|
-
yield frame;
|
|
1327
|
-
}
|
|
1328
|
-
}
|
|
1329
|
-
return sseGenerator();
|
|
1330
|
-
}
|
|
1331
|
-
const streamResult = await ctx.neurolink.stream(opts);
|
|
1332
|
-
let collectedText = "";
|
|
1333
|
-
for await (const chunk of streamResult.stream) {
|
|
1334
|
-
const text = extractText(chunk);
|
|
1335
|
-
if (text) {
|
|
1336
|
-
collectedText += text;
|
|
1337
|
-
}
|
|
1338
|
-
}
|
|
1339
|
-
const internal = {
|
|
1340
|
-
content: collectedText,
|
|
1341
|
-
model: streamResult.model,
|
|
1342
|
-
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1343
|
-
reasoning: undefined,
|
|
1344
|
-
usage: streamResult.usage
|
|
1345
|
-
? {
|
|
1346
|
-
input: streamResult.usage.input ??
|
|
1347
|
-
0,
|
|
1348
|
-
output: streamResult.usage
|
|
1349
|
-
.output ?? 0,
|
|
1350
|
-
total: streamResult.usage.total ??
|
|
1351
|
-
0,
|
|
1352
|
-
}
|
|
1353
|
-
: undefined,
|
|
1354
|
-
toolCalls: streamResult.toolCalls,
|
|
1355
|
-
};
|
|
1356
|
-
return serializeClaudeResponse(internal, body.model);
|
|
1357
|
-
}
|
|
1358
|
-
catch (fallbackErr) {
|
|
1359
|
-
logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error
|
|
1360
|
-
? fallbackErr.message
|
|
1361
|
-
: String(fallbackErr)}`);
|
|
1362
|
-
}
|
|
1363
|
-
}
|
|
1364
|
-
if (authFailureMessage && !sawRateLimit) {
|
|
1365
|
-
return buildClaudeError(401, authFailureMessage);
|
|
1366
|
-
}
|
|
1367
|
-
if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
|
|
1368
|
-
return buildClaudeError(502, `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error
|
|
1369
|
-
? lastError.message
|
|
1370
|
-
: String(lastError ?? "unknown")}`);
|
|
1371
|
-
}
|
|
1372
|
-
if (!sawRateLimit) {
|
|
1373
|
-
return buildClaudeError(502, `All Anthropic accounts failed. Last error: ${lastError instanceof Error
|
|
1374
|
-
? lastError.message
|
|
1375
|
-
: String(lastError ?? "unknown")}`);
|
|
1376
|
-
}
|
|
1377
|
-
// All accounts AND all fallbacks exhausted — return 429 with Retry-After
|
|
1378
|
-
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1379
|
-
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
|
|
1380
|
-
return new Response(JSON.stringify(errorBody), {
|
|
1381
|
-
status: 429,
|
|
1382
|
-
headers: {
|
|
1383
|
-
"content-type": "application/json",
|
|
1384
|
-
"retry-after": String(retryAfterSec),
|
|
1385
|
-
},
|
|
3076
|
+
return handleAnthropicRoutedClaudeRequest({
|
|
3077
|
+
ctx,
|
|
3078
|
+
body,
|
|
3079
|
+
modelRouter,
|
|
3080
|
+
tracer,
|
|
3081
|
+
requestStartTime,
|
|
3082
|
+
accountStrategy,
|
|
3083
|
+
buildLoggedClaudeError,
|
|
3084
|
+
logProxyBody,
|
|
3085
|
+
logFinalRequest,
|
|
1386
3086
|
});
|
|
1387
3087
|
}
|
|
1388
3088
|
else {
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
text: parsed.prompt,
|
|
1396
|
-
...(parsed.images.length > 0
|
|
1397
|
-
? { images: parsed.images }
|
|
1398
|
-
: {}),
|
|
3089
|
+
return handleTranslatedClaudeRequest({
|
|
3090
|
+
ctx,
|
|
3091
|
+
body,
|
|
3092
|
+
route: {
|
|
3093
|
+
provider: route.provider,
|
|
3094
|
+
model: route.model,
|
|
1399
3095
|
},
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
? { temperature: parsed.temperature }
|
|
1406
|
-
: {}),
|
|
1407
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1408
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1409
|
-
...(parsed.stopSequences?.length
|
|
1410
|
-
? { stopSequences: parsed.stopSequences }
|
|
1411
|
-
: {}),
|
|
1412
|
-
...(parsed.thinkingConfig
|
|
1413
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1414
|
-
: {}),
|
|
1415
|
-
tools: parsed.tools,
|
|
1416
|
-
...(parsed.toolChoice ? { toolChoice: parsed.toolChoice } : {}),
|
|
1417
|
-
maxSteps: 1,
|
|
1418
|
-
...(historyMessages.length > 0
|
|
1419
|
-
? { conversationMessages: historyMessages }
|
|
1420
|
-
: {}),
|
|
1421
|
-
};
|
|
1422
|
-
if (body.stream) {
|
|
1423
|
-
const streamResult = await ctx.neurolink.stream(options);
|
|
1424
|
-
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
1425
|
-
const KEEPALIVE_INTERVAL_MS = 15_000; // 15 seconds
|
|
1426
|
-
// Return a ReadableStream that emits SSE keep-alive comments
|
|
1427
|
-
// every ~15s independently of upstream chunk arrival, so
|
|
1428
|
-
// intermediaries don't drop the connection during stalls.
|
|
1429
|
-
const encoder = new TextEncoder();
|
|
1430
|
-
let translationKeepAliveTimer;
|
|
1431
|
-
let translationCancelled = false;
|
|
1432
|
-
// Hold a reference to the upstream async iterator so
|
|
1433
|
-
// we can abort it when the client disconnects.
|
|
1434
|
-
let upstreamIterator;
|
|
1435
|
-
const translationStream = new ReadableStream({
|
|
1436
|
-
async start(controller) {
|
|
1437
|
-
// Emit start frames
|
|
1438
|
-
for (const frame of serializer.start()) {
|
|
1439
|
-
controller.enqueue(encoder.encode(frame));
|
|
1440
|
-
}
|
|
1441
|
-
// Keep-alive interval — fires even when upstream is stalled
|
|
1442
|
-
translationKeepAliveTimer = setInterval(() => {
|
|
1443
|
-
try {
|
|
1444
|
-
controller.enqueue(encoder.encode(": keep-alive\n\n"));
|
|
1445
|
-
}
|
|
1446
|
-
catch {
|
|
1447
|
-
// Controller already closed — ignore
|
|
1448
|
-
}
|
|
1449
|
-
}, KEEPALIVE_INTERVAL_MS);
|
|
1450
|
-
try {
|
|
1451
|
-
const iterable = streamResult.stream;
|
|
1452
|
-
upstreamIterator = iterable[Symbol.asyncIterator]();
|
|
1453
|
-
// Manually drive the async iterator so we can cancel it
|
|
1454
|
-
while (true) {
|
|
1455
|
-
if (translationCancelled) {
|
|
1456
|
-
break;
|
|
1457
|
-
}
|
|
1458
|
-
const { value: chunk, done } = await upstreamIterator.next();
|
|
1459
|
-
if (done) {
|
|
1460
|
-
break;
|
|
1461
|
-
}
|
|
1462
|
-
if (translationCancelled) {
|
|
1463
|
-
break;
|
|
1464
|
-
}
|
|
1465
|
-
const text = extractText(chunk);
|
|
1466
|
-
if (text) {
|
|
1467
|
-
for (const frame of serializer.pushDelta(text)) {
|
|
1468
|
-
controller.enqueue(encoder.encode(frame));
|
|
1469
|
-
}
|
|
1470
|
-
}
|
|
1471
|
-
}
|
|
1472
|
-
// Emit tool_use blocks if model wants to call tools
|
|
1473
|
-
if (!translationCancelled &&
|
|
1474
|
-
streamResult.toolCalls?.length) {
|
|
1475
|
-
for (const tc of streamResult.toolCalls) {
|
|
1476
|
-
const toolName = tc.toolName ??
|
|
1477
|
-
tc.name ??
|
|
1478
|
-
"unknown";
|
|
1479
|
-
const toolArgs = tc.args ??
|
|
1480
|
-
tc.parameters ??
|
|
1481
|
-
{};
|
|
1482
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
1483
|
-
controller.enqueue(encoder.encode(frame));
|
|
1484
|
-
}
|
|
1485
|
-
}
|
|
1486
|
-
}
|
|
1487
|
-
if (!translationCancelled) {
|
|
1488
|
-
const reason = streamResult.finishReason ?? "end_turn";
|
|
1489
|
-
for (const frame of serializer.finish(0, reason)) {
|
|
1490
|
-
controller.enqueue(encoder.encode(frame));
|
|
1491
|
-
}
|
|
1492
|
-
}
|
|
1493
|
-
}
|
|
1494
|
-
catch (streamErr) {
|
|
1495
|
-
if (translationCancelled) {
|
|
1496
|
-
return;
|
|
1497
|
-
}
|
|
1498
|
-
const errMsg = streamErr instanceof Error
|
|
1499
|
-
? streamErr.message
|
|
1500
|
-
: String(streamErr);
|
|
1501
|
-
logger.always(`[proxy] mid-stream error (translation mode): ${errMsg}`);
|
|
1502
|
-
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
1503
|
-
controller.enqueue(encoder.encode(errorEvent));
|
|
1504
|
-
}
|
|
1505
|
-
finally {
|
|
1506
|
-
if (translationKeepAliveTimer) {
|
|
1507
|
-
clearInterval(translationKeepAliveTimer);
|
|
1508
|
-
}
|
|
1509
|
-
if (!translationCancelled) {
|
|
1510
|
-
controller.close();
|
|
1511
|
-
}
|
|
1512
|
-
}
|
|
1513
|
-
},
|
|
1514
|
-
cancel() {
|
|
1515
|
-
translationCancelled = true;
|
|
1516
|
-
if (translationKeepAliveTimer) {
|
|
1517
|
-
clearInterval(translationKeepAliveTimer);
|
|
1518
|
-
translationKeepAliveTimer = undefined;
|
|
1519
|
-
}
|
|
1520
|
-
// Propagate cancellation to the upstream provider stream
|
|
1521
|
-
if (upstreamIterator?.return) {
|
|
1522
|
-
upstreamIterator.return(undefined).catch((cancelErr) => {
|
|
1523
|
-
logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`);
|
|
1524
|
-
});
|
|
1525
|
-
}
|
|
1526
|
-
},
|
|
1527
|
-
});
|
|
1528
|
-
return new Response(translationStream, {
|
|
1529
|
-
headers: {
|
|
1530
|
-
"content-type": "text/event-stream",
|
|
1531
|
-
"cache-control": "no-cache",
|
|
1532
|
-
connection: "keep-alive",
|
|
1533
|
-
},
|
|
1534
|
-
});
|
|
1535
|
-
}
|
|
1536
|
-
const streamResult = await ctx.neurolink.stream(options);
|
|
1537
|
-
let collectedText = "";
|
|
1538
|
-
for await (const chunk of streamResult.stream) {
|
|
1539
|
-
const text = extractText(chunk);
|
|
1540
|
-
if (text) {
|
|
1541
|
-
collectedText += text;
|
|
1542
|
-
}
|
|
1543
|
-
}
|
|
1544
|
-
const internal = {
|
|
1545
|
-
content: collectedText,
|
|
1546
|
-
model: streamResult.model,
|
|
1547
|
-
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1548
|
-
reasoning: undefined,
|
|
1549
|
-
usage: streamResult.usage
|
|
1550
|
-
? {
|
|
1551
|
-
input: streamResult.usage.input ?? 0,
|
|
1552
|
-
output: streamResult.usage.output ?? 0,
|
|
1553
|
-
total: streamResult.usage.total ?? 0,
|
|
1554
|
-
}
|
|
1555
|
-
: undefined,
|
|
1556
|
-
toolCalls: streamResult.toolCalls,
|
|
1557
|
-
};
|
|
1558
|
-
return serializeClaudeResponse(internal, body.model);
|
|
3096
|
+
modelRouter,
|
|
3097
|
+
tracer,
|
|
3098
|
+
requestStartTime,
|
|
3099
|
+
logProxyBody,
|
|
3100
|
+
});
|
|
1559
3101
|
}
|
|
1560
3102
|
}
|
|
1561
3103
|
catch (error) {
|
|
1562
|
-
|
|
1563
|
-
|
|
3104
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
3105
|
+
logger.error(`[claude-proxy] Generation error for ${body.model}: ${errMsg}`);
|
|
3106
|
+
tracer?.setError("generation_error", errMsg.slice(0, 500));
|
|
3107
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
3108
|
+
return buildLoggedClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
|
|
1564
3109
|
}
|
|
1565
3110
|
},
|
|
1566
3111
|
description: "Claude-compatible messages endpoint routed through NeuroLink",
|
|
@@ -1621,6 +3166,26 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1621
3166
|
// ---------------------------------------------------------------------------
|
|
1622
3167
|
// Helpers
|
|
1623
3168
|
// ---------------------------------------------------------------------------
|
|
3169
|
+
/**
|
|
3170
|
+
* Extract token usage from a StreamResult.usage object, handling multiple
|
|
3171
|
+
* naming conventions across AI SDK versions and providers:
|
|
3172
|
+
* - AI SDK v6: inputTokens / outputTokens
|
|
3173
|
+
* - AI SDK v4: promptTokens / completionTokens
|
|
3174
|
+
* - NeuroLink internal: input / output
|
|
3175
|
+
*/
|
|
3176
|
+
function extractUsageFromStreamResult(usage) {
|
|
3177
|
+
if (!usage || typeof usage !== "object") {
|
|
3178
|
+
return { input: 0, output: 0, total: 0 };
|
|
3179
|
+
}
|
|
3180
|
+
const u = usage;
|
|
3181
|
+
const input = (typeof u.inputTokens === "number" ? u.inputTokens : 0) ||
|
|
3182
|
+
(typeof u.promptTokens === "number" ? u.promptTokens : 0) ||
|
|
3183
|
+
(typeof u.input === "number" ? u.input : 0);
|
|
3184
|
+
const output = (typeof u.outputTokens === "number" ? u.outputTokens : 0) ||
|
|
3185
|
+
(typeof u.completionTokens === "number" ? u.completionTokens : 0) ||
|
|
3186
|
+
(typeof u.output === "number" ? u.output : 0);
|
|
3187
|
+
return { input, output, total: input + output };
|
|
3188
|
+
}
|
|
1624
3189
|
/**
|
|
1625
3190
|
* Extract text content from a stream chunk (handles various chunk formats).
|
|
1626
3191
|
*/
|
|
@@ -1781,6 +3346,124 @@ export function isInvalidRequestError(status, errBody) {
|
|
|
1781
3346
|
return (parsed.errorType === "invalid_request_error" ||
|
|
1782
3347
|
errBody.includes("invalid_request_error"));
|
|
1783
3348
|
}
|
|
3349
|
+
function normalizeClaudeRequestForAnthropic(body) {
|
|
3350
|
+
return {
|
|
3351
|
+
...body,
|
|
3352
|
+
messages: body.messages.map((msg) => {
|
|
3353
|
+
if (typeof msg.content !== "string") {
|
|
3354
|
+
return msg;
|
|
3355
|
+
}
|
|
3356
|
+
return {
|
|
3357
|
+
...msg,
|
|
3358
|
+
content: [{ type: "text", text: msg.content }],
|
|
3359
|
+
};
|
|
3360
|
+
}),
|
|
3361
|
+
};
|
|
3362
|
+
}
|
|
3363
|
+
export function buildProxyFallbackOptions(parsed, overrides = {}) {
|
|
3364
|
+
const historyMessages = parsed.conversationMessages.slice(0, -1);
|
|
3365
|
+
const toolNames = Object.keys(parsed.tools);
|
|
3366
|
+
const images = shouldOmitImagesForTarget(overrides.provider, overrides.model)
|
|
3367
|
+
? []
|
|
3368
|
+
: parsed.images;
|
|
3369
|
+
const thinkingConfig = shouldOmitThinkingConfigForTarget(overrides.provider, overrides.model)
|
|
3370
|
+
? undefined
|
|
3371
|
+
: parsed.thinkingConfig;
|
|
3372
|
+
const toolChoice = parsed.toolChoiceName
|
|
3373
|
+
? { type: "tool", toolName: parsed.toolChoiceName }
|
|
3374
|
+
: parsed.toolChoice;
|
|
3375
|
+
return {
|
|
3376
|
+
input: {
|
|
3377
|
+
text: parsed.prompt,
|
|
3378
|
+
...(images.length > 0 ? { images } : {}),
|
|
3379
|
+
},
|
|
3380
|
+
...(overrides.provider ? { provider: overrides.provider } : {}),
|
|
3381
|
+
...(overrides.model ? { model: overrides.model } : {}),
|
|
3382
|
+
systemPrompt: parsed.systemPrompt,
|
|
3383
|
+
maxTokens: parsed.maxTokens,
|
|
3384
|
+
...(parsed.temperature !== undefined
|
|
3385
|
+
? { temperature: parsed.temperature }
|
|
3386
|
+
: {}),
|
|
3387
|
+
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
3388
|
+
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
3389
|
+
...(parsed.stopSequences?.length
|
|
3390
|
+
? { stopSequences: parsed.stopSequences }
|
|
3391
|
+
: {}),
|
|
3392
|
+
...(thinkingConfig ? { thinkingConfig } : {}),
|
|
3393
|
+
...(toolNames.length === 0 ? { disableTools: true } : {}),
|
|
3394
|
+
// Claude-compatible requests already declare the exact tool contract.
|
|
3395
|
+
// Filter out NeuroLink's built-in agent tools so translated fallbacks only
|
|
3396
|
+
// expose the tools the client actually knows how to handle.
|
|
3397
|
+
...(toolNames.length > 0
|
|
3398
|
+
? {
|
|
3399
|
+
tools: parsed.tools,
|
|
3400
|
+
toolFilter: toolNames,
|
|
3401
|
+
}
|
|
3402
|
+
: {}),
|
|
3403
|
+
...(toolChoice ? { toolChoice } : {}),
|
|
3404
|
+
...(historyMessages.length > 0
|
|
3405
|
+
? { conversationMessages: historyMessages }
|
|
3406
|
+
: {}),
|
|
3407
|
+
disableInternalFallback: true,
|
|
3408
|
+
skipToolPromptInjection: true,
|
|
3409
|
+
maxSteps: 1,
|
|
3410
|
+
};
|
|
3411
|
+
}
|
|
3412
|
+
export function buildProxyTranslationAttempts(primary, modelRouter, parsed) {
|
|
3413
|
+
const attempts = [
|
|
3414
|
+
{
|
|
3415
|
+
provider: primary.provider,
|
|
3416
|
+
model: primary.model,
|
|
3417
|
+
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
3418
|
+
},
|
|
3419
|
+
];
|
|
3420
|
+
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
3421
|
+
for (const fallback of chain) {
|
|
3422
|
+
if (fallback.provider === primary.provider &&
|
|
3423
|
+
fallback.model === primary.model) {
|
|
3424
|
+
continue;
|
|
3425
|
+
}
|
|
3426
|
+
if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsed)) {
|
|
3427
|
+
continue;
|
|
3428
|
+
}
|
|
3429
|
+
attempts.push({
|
|
3430
|
+
provider: fallback.provider,
|
|
3431
|
+
model: fallback.model,
|
|
3432
|
+
label: `${fallback.provider}/${fallback.model}`,
|
|
3433
|
+
});
|
|
3434
|
+
}
|
|
3435
|
+
if (chain.length === 0) {
|
|
3436
|
+
attempts.push({ label: "auto-provider" });
|
|
3437
|
+
}
|
|
3438
|
+
return attempts;
|
|
3439
|
+
}
|
|
3440
|
+
function hasTranslatedOutput(collectedText, toolCalls) {
|
|
3441
|
+
return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
|
|
3442
|
+
}
|
|
3443
|
+
function shouldOmitImagesForTarget(provider, model) {
|
|
3444
|
+
// `open-large` in our LiteLLM setup handles text and tools, but returns an
|
|
3445
|
+
// empty completion when binary images are forwarded. Claude Code already
|
|
3446
|
+
// includes textual image markers in the prompt, so dropping only the binary
|
|
3447
|
+
// image payload keeps the request usable instead of breaking fallback.
|
|
3448
|
+
return provider === "litellm" && model === "open-large";
|
|
3449
|
+
}
|
|
3450
|
+
function shouldOmitThinkingConfigForTarget(provider, model) {
|
|
3451
|
+
return provider === "vertex" && model === "gemini-2.5-flash";
|
|
3452
|
+
}
|
|
3453
|
+
function shouldSkipTranslationTarget(provider, model, parsed) {
|
|
3454
|
+
if (provider === "ollama" &&
|
|
3455
|
+
model === "qwen2.5:0.5b" &&
|
|
3456
|
+
(parsed?.images.length ?? 0) > 0) {
|
|
3457
|
+
return true;
|
|
3458
|
+
}
|
|
3459
|
+
return false;
|
|
3460
|
+
}
|
|
3461
|
+
function extractToolArgs(toolCall) {
|
|
3462
|
+
return (toolCall.args ??
|
|
3463
|
+
toolCall.parameters ??
|
|
3464
|
+
toolCall.input ??
|
|
3465
|
+
{});
|
|
3466
|
+
}
|
|
1784
3467
|
/**
|
|
1785
3468
|
* Detect transient upstream failures that should trigger account/provider failover.
|
|
1786
3469
|
*
|