@juspay/neurolink 9.41.0 → 9.42.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +7 -1
- package/dist/auth/anthropicOAuth.d.ts +18 -3
- package/dist/auth/anthropicOAuth.js +137 -4
- package/dist/auth/providers/firebase.js +5 -1
- package/dist/auth/providers/jwt.js +5 -1
- package/dist/auth/providers/workos.js +5 -1
- package/dist/auth/sessionManager.d.ts +1 -1
- package/dist/auth/sessionManager.js +58 -27
- package/dist/browser/neurolink.min.js +337 -318
- package/dist/cli/commands/mcp.js +3 -0
- package/dist/cli/commands/proxy.d.ts +2 -1
- package/dist/cli/commands/proxy.js +279 -16
- package/dist/cli/commands/task.js +3 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +38 -0
- package/dist/cli/parser.js +4 -3
- package/dist/client/aiSdkAdapter.js +3 -0
- package/dist/client/streamingClient.js +30 -10
- package/dist/core/modules/GenerationHandler.js +3 -2
- package/dist/core/redisConversationMemoryManager.js +7 -3
- package/dist/evaluation/BatchEvaluator.js +4 -1
- package/dist/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/evaluation/pipeline/evaluationPipeline.js +20 -8
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
- package/dist/lib/auth/anthropicOAuth.js +137 -4
- package/dist/lib/auth/providers/firebase.js +5 -1
- package/dist/lib/auth/providers/jwt.js +5 -1
- package/dist/lib/auth/providers/workos.js +5 -1
- package/dist/lib/auth/sessionManager.d.ts +1 -1
- package/dist/lib/auth/sessionManager.js +58 -27
- package/dist/lib/client/aiSdkAdapter.js +3 -0
- package/dist/lib/client/streamingClient.js +30 -10
- package/dist/lib/core/modules/GenerationHandler.js +3 -2
- package/dist/lib/core/redisConversationMemoryManager.js +7 -3
- package/dist/lib/evaluation/BatchEvaluator.js +4 -1
- package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +20 -8
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
- package/dist/lib/neurolink.d.ts +3 -2
- package/dist/lib/neurolink.js +260 -494
- package/dist/lib/observability/otelBridge.d.ts +2 -2
- package/dist/lib/observability/otelBridge.js +12 -3
- package/dist/lib/providers/amazonBedrock.js +2 -4
- package/dist/lib/providers/anthropic.d.ts +9 -5
- package/dist/lib/providers/anthropic.js +19 -14
- package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/lib/providers/anthropicBaseProvider.js +5 -4
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +5 -4
- package/dist/lib/providers/googleAiStudio.js +30 -1
- package/dist/lib/providers/googleVertex.js +28 -6
- package/dist/lib/providers/huggingFace.d.ts +3 -3
- package/dist/lib/providers/huggingFace.js +6 -8
- package/dist/lib/providers/litellm.js +41 -29
- package/dist/lib/providers/mistral.js +2 -1
- package/dist/lib/providers/ollama.js +80 -23
- package/dist/lib/providers/openAI.js +3 -2
- package/dist/lib/providers/openRouter.js +2 -1
- package/dist/lib/providers/openaiCompatible.d.ts +4 -4
- package/dist/lib/providers/openaiCompatible.js +4 -4
- package/dist/lib/proxy/claudeFormat.d.ts +3 -2
- package/dist/lib/proxy/claudeFormat.js +25 -20
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/lib/proxy/modelRouter.js +3 -0
- package/dist/lib/proxy/oauthFetch.d.ts +1 -1
- package/dist/lib/proxy/oauthFetch.js +65 -72
- package/dist/lib/proxy/proxyConfig.js +44 -24
- package/dist/lib/proxy/proxyEnv.d.ts +19 -0
- package/dist/lib/proxy/proxyEnv.js +73 -0
- package/dist/lib/proxy/proxyFetch.js +50 -4
- package/dist/lib/proxy/proxyTracer.d.ts +133 -0
- package/dist/lib/proxy/proxyTracer.js +645 -0
- package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/lib/proxy/rawStreamCapture.js +83 -0
- package/dist/lib/proxy/requestLogger.d.ts +32 -5
- package/dist/lib/proxy/requestLogger.js +406 -37
- package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
- package/dist/lib/proxy/sseInterceptor.js +402 -0
- package/dist/lib/proxy/usageStats.d.ts +4 -3
- package/dist/lib/proxy/usageStats.js +25 -12
- package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/lib/rag/chunking/markdownChunker.js +15 -6
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +7 -2
- package/dist/lib/server/routes/claudeProxyRoutes.js +1737 -508
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +240 -40
- package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/lib/tasks/backends/bullmqBackend.js +14 -7
- package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/lib/tasks/store/redisTaskStore.js +34 -26
- package/dist/lib/tasks/taskManager.d.ts +3 -0
- package/dist/lib/tasks/taskManager.js +63 -30
- package/dist/lib/telemetry/index.d.ts +2 -1
- package/dist/lib/telemetry/index.js +2 -1
- package/dist/lib/telemetry/telemetryService.d.ts +3 -0
- package/dist/lib/telemetry/telemetryService.js +65 -5
- package/dist/lib/types/cli.d.ts +10 -0
- package/dist/lib/types/proxyTypes.d.ts +37 -5
- package/dist/lib/types/streamTypes.d.ts +25 -3
- package/dist/lib/utils/messageBuilder.js +3 -2
- package/dist/lib/utils/providerHealth.d.ts +18 -0
- package/dist/lib/utils/providerHealth.js +240 -9
- package/dist/lib/utils/providerUtils.js +14 -8
- package/dist/lib/utils/toolChoice.d.ts +4 -0
- package/dist/lib/utils/toolChoice.js +7 -0
- package/dist/neurolink.d.ts +3 -2
- package/dist/neurolink.js +260 -494
- package/dist/observability/otelBridge.d.ts +2 -2
- package/dist/observability/otelBridge.js +12 -3
- package/dist/providers/amazonBedrock.js +2 -4
- package/dist/providers/anthropic.d.ts +9 -5
- package/dist/providers/anthropic.js +19 -14
- package/dist/providers/anthropicBaseProvider.d.ts +3 -3
- package/dist/providers/anthropicBaseProvider.js +5 -4
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +5 -4
- package/dist/providers/googleAiStudio.js +30 -1
- package/dist/providers/googleVertex.js +28 -6
- package/dist/providers/huggingFace.d.ts +3 -3
- package/dist/providers/huggingFace.js +6 -7
- package/dist/providers/litellm.js +41 -29
- package/dist/providers/mistral.js +2 -1
- package/dist/providers/ollama.js +80 -23
- package/dist/providers/openAI.js +3 -2
- package/dist/providers/openRouter.js +2 -1
- package/dist/providers/openaiCompatible.d.ts +4 -4
- package/dist/providers/openaiCompatible.js +4 -3
- package/dist/proxy/claudeFormat.d.ts +3 -2
- package/dist/proxy/claudeFormat.js +25 -20
- package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
- package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
- package/dist/proxy/modelRouter.js +3 -0
- package/dist/proxy/oauthFetch.d.ts +1 -1
- package/dist/proxy/oauthFetch.js +65 -72
- package/dist/proxy/proxyConfig.js +44 -24
- package/dist/proxy/proxyEnv.d.ts +19 -0
- package/dist/proxy/proxyEnv.js +72 -0
- package/dist/proxy/proxyFetch.js +50 -4
- package/dist/proxy/proxyTracer.d.ts +133 -0
- package/dist/proxy/proxyTracer.js +644 -0
- package/dist/proxy/rawStreamCapture.d.ts +10 -0
- package/dist/proxy/rawStreamCapture.js +82 -0
- package/dist/proxy/requestLogger.d.ts +32 -5
- package/dist/proxy/requestLogger.js +406 -37
- package/dist/proxy/sseInterceptor.d.ts +97 -0
- package/dist/proxy/sseInterceptor.js +401 -0
- package/dist/proxy/usageStats.d.ts +4 -3
- package/dist/proxy/usageStats.js +25 -12
- package/dist/rag/chunkers/MarkdownChunker.js +13 -5
- package/dist/rag/chunking/markdownChunker.js +15 -6
- package/dist/server/routes/claudeProxyRoutes.d.ts +7 -2
- package/dist/server/routes/claudeProxyRoutes.js +1737 -508
- package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
- package/dist/services/server/ai/observability/instrumentation.js +240 -40
- package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
- package/dist/tasks/backends/bullmqBackend.js +14 -7
- package/dist/tasks/store/redisTaskStore.d.ts +1 -0
- package/dist/tasks/store/redisTaskStore.js +34 -26
- package/dist/tasks/taskManager.d.ts +3 -0
- package/dist/tasks/taskManager.js +63 -30
- package/dist/telemetry/index.d.ts +2 -1
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/telemetryService.d.ts +3 -0
- package/dist/telemetry/telemetryService.js +65 -5
- package/dist/types/cli.d.ts +10 -0
- package/dist/types/proxyTypes.d.ts +37 -5
- package/dist/types/streamTypes.d.ts +25 -3
- package/dist/utils/messageBuilder.js +3 -2
- package/dist/utils/providerHealth.d.ts +18 -0
- package/dist/utils/providerHealth.js +240 -9
- package/dist/utils/providerUtils.js +14 -8
- package/dist/utils/toolChoice.d.ts +4 -0
- package/dist/utils/toolChoice.js +6 -0
- package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
- package/docs/changelog.md +252 -0
- package/package.json +17 -1
- package/scripts/observability/check-proxy-telemetry.mjs +235 -0
- package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
- package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
- package/scripts/observability/manage-local-openobserve.sh +184 -0
- package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
- package/scripts/observability/proxy-observability.env.example +23 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-depth */
|
|
1
2
|
/**
|
|
2
3
|
* Claude-Compatible Proxy Routes
|
|
3
4
|
*
|
|
@@ -9,20 +10,23 @@
|
|
|
9
10
|
* provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
|
|
10
11
|
* Without a router, models are passed through to the Anthropic provider.
|
|
11
12
|
*/
|
|
12
|
-
import { readFile,
|
|
13
|
-
import { join } from "node:path";
|
|
13
|
+
import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
|
|
14
14
|
import { homedir } from "node:os";
|
|
15
|
-
import {
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
|
|
17
|
+
import { parseQuotaHeaders, saveAccountQuota } from "../../proxy/accountQuota.js";
|
|
18
|
+
import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
|
|
19
|
+
import { ProxyTracer } from "../../proxy/proxyTracer.js";
|
|
20
|
+
import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
|
|
21
|
+
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError } from "../../proxy/requestLogger.js";
|
|
22
|
+
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
|
|
23
|
+
import { needsRefresh, persistTokens, refreshToken } from "../../proxy/tokenRefresh.js";
|
|
24
|
+
import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
|
|
16
25
|
import { logger } from "../../utils/logger.js";
|
|
17
|
-
import {
|
|
18
|
-
import { logRequest, logFullRequestResponse, logStreamError, } from "../../proxy/requestLogger.js";
|
|
19
|
-
import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
|
|
20
|
-
import { needsRefresh, refreshToken, persistTokens, } from "../../proxy/tokenRefresh.js";
|
|
26
|
+
import { ProviderHealthChecker } from "../../utils/providerHealth.js";
|
|
21
27
|
// ---------------------------------------------------------------------------
|
|
22
28
|
// Helpers
|
|
23
29
|
// ---------------------------------------------------------------------------
|
|
24
|
-
/** Header names whose values must be masked in debug logs. */
|
|
25
|
-
const SENSITIVE_HEADERS = new Set(["authorization", "x-api-key"]);
|
|
26
30
|
/** Headers that must never be forwarded upstream to Anthropic. */
|
|
27
31
|
const BLOCKED_UPSTREAM_HEADERS = new Set([
|
|
28
32
|
"cookie",
|
|
@@ -32,22 +36,6 @@ const BLOCKED_UPSTREAM_HEADERS = new Set([
|
|
|
32
36
|
"content-length",
|
|
33
37
|
"transfer-encoding",
|
|
34
38
|
]);
|
|
35
|
-
/** Return a shallow copy of `headers` with sensitive values redacted. */
|
|
36
|
-
function redactSensitiveHeaders(headers) {
|
|
37
|
-
const redacted = {};
|
|
38
|
-
for (const [key, value] of Object.entries(headers)) {
|
|
39
|
-
if (SENSITIVE_HEADERS.has(key.toLowerCase()) && value.length > 8) {
|
|
40
|
-
redacted[key] = value.substring(0, 8) + "...";
|
|
41
|
-
}
|
|
42
|
-
else if (SENSITIVE_HEADERS.has(key.toLowerCase())) {
|
|
43
|
-
redacted[key] = "***";
|
|
44
|
-
}
|
|
45
|
-
else {
|
|
46
|
-
redacted[key] = value;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
return redacted;
|
|
50
|
-
}
|
|
51
39
|
// ---------------------------------------------------------------------------
|
|
52
40
|
// Module-level state
|
|
53
41
|
// ---------------------------------------------------------------------------
|
|
@@ -83,65 +71,192 @@ function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
|
|
|
83
71
|
}
|
|
84
72
|
primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount;
|
|
85
73
|
}
|
|
86
|
-
// ---------------------------------------------------------------------------
|
|
87
|
-
// OAuth polyfill helpers (extracted to reduce block nesting)
|
|
88
|
-
// ---------------------------------------------------------------------------
|
|
89
74
|
const snapshotCache = new Map();
|
|
90
75
|
const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
76
|
+
const SNAPSHOT_STABLE_HEADERS = new Set([
|
|
77
|
+
"accept",
|
|
78
|
+
"accept-encoding",
|
|
79
|
+
"accept-language",
|
|
80
|
+
"anthropic-beta",
|
|
81
|
+
"anthropic-dangerous-direct-browser-access",
|
|
82
|
+
"anthropic-version",
|
|
83
|
+
"sec-fetch-mode",
|
|
84
|
+
"user-agent",
|
|
85
|
+
"x-app",
|
|
86
|
+
"x-stainless-arch",
|
|
87
|
+
"x-stainless-lang",
|
|
88
|
+
"x-stainless-os",
|
|
89
|
+
"x-stainless-package-version",
|
|
90
|
+
"x-stainless-retry-count",
|
|
91
|
+
"x-stainless-runtime",
|
|
92
|
+
"x-stainless-runtime-version",
|
|
93
|
+
"x-stainless-timeout",
|
|
94
|
+
"x-subscription-tier",
|
|
95
|
+
]);
|
|
96
|
+
const NON_CLAUDE_OAUTH_BETAS = [
|
|
97
|
+
"oauth-2025-04-20",
|
|
98
|
+
"claude-code-20250219",
|
|
99
|
+
"fine-grained-tool-streaming-2025-05-14",
|
|
100
|
+
];
|
|
101
|
+
function getSnapshotSafeLabel(accountLabel) {
|
|
102
|
+
return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
|
|
103
|
+
}
|
|
104
|
+
function getSnapshotPath(accountLabel) {
|
|
105
|
+
return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`);
|
|
106
|
+
}
|
|
107
|
+
function applySnapshotHeaders(headers, snapshot) {
|
|
108
|
+
if (!snapshot?.headers) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
for (const [sk, sv] of Object.entries(snapshot.headers)) {
|
|
112
|
+
const lower = sk.toLowerCase();
|
|
113
|
+
if (typeof sv === "string" &&
|
|
114
|
+
!headers[lower] &&
|
|
115
|
+
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
116
|
+
lower !== "authorization" &&
|
|
117
|
+
lower !== "x-api-key" &&
|
|
118
|
+
lower !== "x-claude-code-session-id") {
|
|
119
|
+
headers[lower] = sv;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
async function loadClaudeSnapshot(accountLabel) {
|
|
97
124
|
try {
|
|
98
|
-
|
|
99
|
-
const safeLabel = accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
|
|
100
|
-
// Check cache first
|
|
125
|
+
const safeLabel = getSnapshotSafeLabel(accountLabel);
|
|
101
126
|
const cached = snapshotCache.get(safeLabel);
|
|
102
127
|
if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) {
|
|
103
|
-
|
|
104
|
-
const lower = sk.toLowerCase();
|
|
105
|
-
if (typeof sv === "string" &&
|
|
106
|
-
!headers[lower] &&
|
|
107
|
-
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
108
|
-
lower !== "authorization" &&
|
|
109
|
-
lower !== "x-api-key") {
|
|
110
|
-
headers[lower] = sv;
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
return;
|
|
128
|
+
return cached.snapshot;
|
|
114
129
|
}
|
|
115
|
-
const snapshotPath =
|
|
130
|
+
const snapshotPath = getSnapshotPath(accountLabel);
|
|
116
131
|
try {
|
|
117
132
|
await access(snapshotPath);
|
|
118
133
|
}
|
|
119
134
|
catch {
|
|
120
|
-
return;
|
|
135
|
+
return null;
|
|
121
136
|
}
|
|
122
137
|
const snapshot = JSON.parse(await readFile(snapshotPath, "utf8"));
|
|
123
|
-
if (!snapshot
|
|
124
|
-
return;
|
|
138
|
+
if (!snapshot || typeof snapshot !== "object") {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
const normalized = {
|
|
142
|
+
accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string"
|
|
143
|
+
? snapshot.accountKey
|
|
144
|
+
: `anthropic:${accountLabel}`,
|
|
145
|
+
capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string"
|
|
146
|
+
? snapshot.capturedAt
|
|
147
|
+
: new Date(0).toISOString(),
|
|
148
|
+
source: "claude-code",
|
|
149
|
+
headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {},
|
|
150
|
+
...(snapshot.body ? { body: snapshot.body } : {}),
|
|
151
|
+
};
|
|
152
|
+
if (Object.keys(normalized.headers).length === 0 && Object.keys(normalized.body ?? {}).length === 0) {
|
|
153
|
+
return null;
|
|
125
154
|
}
|
|
126
|
-
// Store in cache
|
|
127
155
|
snapshotCache.set(safeLabel, {
|
|
128
|
-
|
|
156
|
+
snapshot: normalized,
|
|
129
157
|
loadedAt: Date.now(),
|
|
130
158
|
});
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
159
|
+
return normalized;
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
function buildSnapshotHeaders(headers, existingHeaders) {
|
|
166
|
+
const merged = { ...(existingHeaders ?? {}) };
|
|
167
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
168
|
+
const lower = key.toLowerCase();
|
|
169
|
+
if (typeof value === "string" &&
|
|
170
|
+
SNAPSHOT_STABLE_HEADERS.has(lower) &&
|
|
171
|
+
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
|
|
172
|
+
lower !== "authorization" &&
|
|
173
|
+
lower !== "x-api-key" &&
|
|
174
|
+
lower !== "x-claude-code-session-id") {
|
|
175
|
+
merged[lower] = value;
|
|
140
176
|
}
|
|
141
177
|
}
|
|
178
|
+
return merged;
|
|
179
|
+
}
|
|
180
|
+
function extractSnapshotBody(body) {
|
|
181
|
+
if (!body || typeof body !== "object") {
|
|
182
|
+
return undefined;
|
|
183
|
+
}
|
|
184
|
+
const parsed = body;
|
|
185
|
+
const identity = parseClaudeCodeUserId(parsed.metadata?.user_id);
|
|
186
|
+
const systemBlocks = Array.isArray(parsed.system)
|
|
187
|
+
? parsed.system
|
|
188
|
+
: typeof parsed.system === "string"
|
|
189
|
+
? [{ type: "text", text: parsed.system }]
|
|
190
|
+
: [];
|
|
191
|
+
const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("x-anthropic-billing-header"))?.text;
|
|
192
|
+
const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("Claude Agent SDK"))?.text;
|
|
193
|
+
if (!identity && !billingHeader && !agentBlock) {
|
|
194
|
+
return undefined;
|
|
195
|
+
}
|
|
196
|
+
return {
|
|
197
|
+
...(identity ? { metadataUserId: identity.metadataUserId } : {}),
|
|
198
|
+
...(identity ? { sessionId: identity.sessionId } : {}),
|
|
199
|
+
...(billingHeader ? { billingHeader } : {}),
|
|
200
|
+
...(agentBlock ? { agentBlock } : {}),
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
function isLikelyClaudeClient(headers, snapshotBody) {
|
|
204
|
+
return (typeof headers["x-claude-code-session-id"] === "string" ||
|
|
205
|
+
headers["user-agent"]?.startsWith("claude-cli/") ||
|
|
206
|
+
!!snapshotBody?.metadataUserId ||
|
|
207
|
+
!!snapshotBody?.billingHeader ||
|
|
208
|
+
!!snapshotBody?.agentBlock);
|
|
209
|
+
}
|
|
210
|
+
function snapshotsMatch(existing, next) {
|
|
211
|
+
if (!existing) {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
return (JSON.stringify(existing.headers ?? {}) === JSON.stringify(next.headers ?? {}) &&
|
|
215
|
+
JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {}));
|
|
216
|
+
}
|
|
217
|
+
async function persistClaudeSnapshot(accountLabel, snapshot) {
|
|
218
|
+
const snapshotPath = getSnapshotPath(accountLabel);
|
|
219
|
+
const dirPath = join(homedir(), ".neurolink", "header-snapshots");
|
|
220
|
+
await mkdir(dirPath, { recursive: true });
|
|
221
|
+
const tmpPath = `${snapshotPath}.tmp`;
|
|
222
|
+
await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
|
|
223
|
+
await rename(tmpPath, snapshotPath);
|
|
224
|
+
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
|
|
225
|
+
snapshot,
|
|
226
|
+
loadedAt: Date.now(),
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) {
|
|
230
|
+
const existing = await loadClaudeSnapshot(accountLabel);
|
|
231
|
+
let parsedBody;
|
|
232
|
+
try {
|
|
233
|
+
parsedBody = JSON.parse(bodyStr);
|
|
234
|
+
}
|
|
142
235
|
catch {
|
|
143
|
-
|
|
236
|
+
return existing;
|
|
237
|
+
}
|
|
238
|
+
const body = extractSnapshotBody(parsedBody);
|
|
239
|
+
if (!isLikelyClaudeClient(headers, body)) {
|
|
240
|
+
return existing;
|
|
241
|
+
}
|
|
242
|
+
const next = {
|
|
243
|
+
accountKey,
|
|
244
|
+
capturedAt: new Date().toISOString(),
|
|
245
|
+
source: "claude-code",
|
|
246
|
+
headers: buildSnapshotHeaders(headers, existing?.headers),
|
|
247
|
+
body: {
|
|
248
|
+
...(existing?.body ?? {}),
|
|
249
|
+
...(body ?? {}),
|
|
250
|
+
...(typeof headers["x-claude-code-session-id"] === "string"
|
|
251
|
+
? { sessionId: headers["x-claude-code-session-id"] }
|
|
252
|
+
: {}),
|
|
253
|
+
},
|
|
254
|
+
};
|
|
255
|
+
if (snapshotsMatch(existing, next)) {
|
|
256
|
+
return existing;
|
|
144
257
|
}
|
|
258
|
+
await persistClaudeSnapshot(accountLabel, next);
|
|
259
|
+
return next;
|
|
145
260
|
}
|
|
146
261
|
/**
|
|
147
262
|
* Polyfill the request body for OAuth accounts.
|
|
@@ -149,59 +264,74 @@ async function applyHeaderSnapshot(headers, accountLabel) {
|
|
|
149
264
|
* into the body. Non-CC clients (Curator, custom apps) don't send these —
|
|
150
265
|
* Anthropic rejects without them.
|
|
151
266
|
*/
|
|
152
|
-
function polyfillOAuthBody(bodyStr, accountToken) {
|
|
267
|
+
function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) {
|
|
153
268
|
try {
|
|
154
269
|
const parsed = JSON.parse(bodyStr);
|
|
155
270
|
// Billing header block (required by Anthropic for OAuth)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
text: `x-anthropic-billing-header: cc_version=2.1.86.${randomHex}; cc_entrypoint=cli; cch=proxy;`,
|
|
160
|
-
};
|
|
271
|
+
// NOTE: This block MUST be deterministic (no random values) to preserve
|
|
272
|
+
// Anthropic's prompt caching prefix chain. We keep the real Claude Code
|
|
273
|
+
// version/entrypoint shape when present, but stabilize the volatile cch.
|
|
161
274
|
const agentBlock = {
|
|
162
275
|
type: "text",
|
|
163
|
-
text: "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
|
|
276
|
+
text: snapshot?.body?.agentBlock || "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
|
|
164
277
|
};
|
|
165
|
-
// Normalise system to array and
|
|
278
|
+
// Normalise system to array and APPEND billing + agent blocks.
|
|
279
|
+
// IMPORTANT: We append (not prepend) to preserve the client's cache
|
|
280
|
+
// prefix chain. Anthropic's prompt caching uses prefix matching — if we
|
|
281
|
+
// insert anything before the client's system blocks, we invalidate all
|
|
282
|
+
// cached content (tools, system prompt, message history).
|
|
283
|
+
//
|
|
284
|
+
// Claude Code sends a billing block with a `cch=<hash>` value that changes
|
|
285
|
+
// on every request. We fix this by:
|
|
286
|
+
// 1. Removing the client's billing block from its current position
|
|
287
|
+
// 2. Stabilizing it while keeping the official Claude Code shape
|
|
288
|
+
// 3. Appending it at the END so the cacheable system blocks stay
|
|
289
|
+
// at the front of the prefix chain
|
|
166
290
|
if (parsed.system) {
|
|
167
291
|
if (typeof parsed.system === "string") {
|
|
168
292
|
parsed.system = [{ type: "text", text: parsed.system }];
|
|
169
293
|
}
|
|
170
294
|
if (Array.isArray(parsed.system)) {
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
const
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
parsed.system
|
|
295
|
+
// Find and remove existing billing/agent blocks from wherever
|
|
296
|
+
// the client placed them (typically at system[0])
|
|
297
|
+
const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("x-anthropic-billing-header"));
|
|
298
|
+
const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
|
|
299
|
+
const billingBlock = {
|
|
300
|
+
type: "text",
|
|
301
|
+
text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader),
|
|
302
|
+
};
|
|
303
|
+
// Remove in reverse index order so indices stay valid
|
|
304
|
+
const indicesToRemove = [billingIdx, agentIdx].filter((i) => i >= 0).sort((a, b) => b - a);
|
|
305
|
+
for (const idx of indicesToRemove) {
|
|
306
|
+
parsed.system.splice(idx, 1);
|
|
183
307
|
}
|
|
308
|
+
// Always append a deterministic billing block at the end.
|
|
309
|
+
// If the client sent one, we stripped its dynamic cch= and use
|
|
310
|
+
// our stable version instead. If not, we add ours.
|
|
311
|
+
parsed.system = [...parsed.system, billingBlock, agentBlock];
|
|
184
312
|
}
|
|
185
313
|
}
|
|
186
314
|
else {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if (!parsed.metadata?.user_id) {
|
|
191
|
-
const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
|
|
192
|
-
const hash = Array.from(new TextEncoder().encode(tokenPrefix))
|
|
193
|
-
.reduce((a, b) => ((a << 5) - a + b) | 0, 0)
|
|
194
|
-
.toString(16)
|
|
195
|
-
.replace("-", "");
|
|
196
|
-
parsed.metadata = {
|
|
197
|
-
...parsed.metadata,
|
|
198
|
-
user_id: `proxy-${hash}`,
|
|
315
|
+
const billingBlock = {
|
|
316
|
+
type: "text",
|
|
317
|
+
text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader),
|
|
199
318
|
};
|
|
319
|
+
parsed.system = [billingBlock, agentBlock];
|
|
200
320
|
}
|
|
201
|
-
|
|
321
|
+
// Inject Claude-Code-shaped metadata.user_id (required for OAuth).
|
|
322
|
+
const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
|
|
323
|
+
const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, {
|
|
324
|
+
existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId,
|
|
325
|
+
preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId,
|
|
326
|
+
});
|
|
327
|
+
parsed.metadata = {
|
|
328
|
+
...parsed.metadata,
|
|
329
|
+
user_id: identity.metadataUserId,
|
|
330
|
+
};
|
|
331
|
+
return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId };
|
|
202
332
|
}
|
|
203
333
|
catch {
|
|
204
|
-
return bodyStr; // JSON parse failed — use original body
|
|
334
|
+
return { bodyStr }; // JSON parse failed — use original body
|
|
205
335
|
}
|
|
206
336
|
}
|
|
207
337
|
// ---------------------------------------------------------------------------
|
|
@@ -269,7 +399,8 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
|
|
|
269
399
|
* @param basePath - Base path prefix (default: "" since Claude API uses /v1/...).
|
|
270
400
|
* @returns RouteGroup with Claude-compatible endpoints.
|
|
271
401
|
*/
|
|
272
|
-
|
|
402
|
+
// eslint-disable-next-line max-lines-per-function
|
|
403
|
+
export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first", passthroughMode = false) {
|
|
273
404
|
return {
|
|
274
405
|
prefix: `${basePath}/v1`,
|
|
275
406
|
routes: [
|
|
@@ -282,8 +413,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
282
413
|
handler: async (ctx) => {
|
|
283
414
|
const body = ctx.body;
|
|
284
415
|
// 1. Validate
|
|
285
|
-
if (typeof body?.model !== "string" ||
|
|
286
|
-
!Array.isArray(body?.messages)) {
|
|
416
|
+
if (typeof body?.model !== "string" || !Array.isArray(body?.messages)) {
|
|
287
417
|
return buildClaudeError(400, "Missing required fields: model, messages");
|
|
288
418
|
}
|
|
289
419
|
// 2. Resolve model via router (or pass through to anthropic)
|
|
@@ -298,15 +428,476 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
298
428
|
provider: "anthropic",
|
|
299
429
|
model: body.model,
|
|
300
430
|
};
|
|
431
|
+
const clientRequestBody = JSON.stringify(body);
|
|
432
|
+
// ── OTel tracing ──────────────────────────────────────
|
|
433
|
+
let tracer;
|
|
434
|
+
try {
|
|
435
|
+
tracer = ProxyTracer.startRequest({
|
|
436
|
+
requestId: ctx.requestId,
|
|
437
|
+
method: ctx.method,
|
|
438
|
+
path: ctx.path,
|
|
439
|
+
model: body.model,
|
|
440
|
+
stream: body.stream ?? false,
|
|
441
|
+
toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
|
|
442
|
+
sessionId: ctx.headers["x-neurolink-session-id"] ?? ctx.headers["x-claude-code-session-id"] ?? undefined,
|
|
443
|
+
userAgent: ctx.headers["user-agent"] ?? undefined,
|
|
444
|
+
}, ctx.headers);
|
|
445
|
+
const receiveSpan = tracer.startReceive();
|
|
446
|
+
tracer.logRequestHeaders(ctx.headers);
|
|
447
|
+
tracer.logRequestBody(clientRequestBody);
|
|
448
|
+
receiveSpan.end();
|
|
449
|
+
}
|
|
450
|
+
catch {
|
|
451
|
+
// Graceful degradation — continue without tracing
|
|
452
|
+
tracer = undefined;
|
|
453
|
+
}
|
|
454
|
+
const requestStartTime = Date.now();
|
|
455
|
+
const logProxyBody = (capture) => {
|
|
456
|
+
const traceCtx = tracer?.getTraceContext();
|
|
457
|
+
void logBodyCapture({
|
|
458
|
+
timestamp: new Date().toISOString(),
|
|
459
|
+
requestId: ctx.requestId,
|
|
460
|
+
model: body.model,
|
|
461
|
+
stream: body.stream ?? false,
|
|
462
|
+
...capture,
|
|
463
|
+
...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
|
|
464
|
+
});
|
|
465
|
+
};
|
|
466
|
+
const logFinalRequest = (status, accountLabel, accountType, errorType, errorMessage, extra) => {
|
|
467
|
+
const traceCtx = tracer?.getTraceContext();
|
|
468
|
+
logRequest({
|
|
469
|
+
timestamp: new Date().toISOString(),
|
|
470
|
+
requestId: ctx.requestId,
|
|
471
|
+
method: ctx.method,
|
|
472
|
+
path: ctx.path,
|
|
473
|
+
model: body.model,
|
|
474
|
+
stream: !!body.stream,
|
|
475
|
+
toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
|
|
476
|
+
account: accountLabel,
|
|
477
|
+
accountType,
|
|
478
|
+
responseStatus: status,
|
|
479
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
480
|
+
...(errorType ? { errorType } : {}),
|
|
481
|
+
...(errorMessage ? { errorMessage } : {}),
|
|
482
|
+
...(extra?.inputTokens !== undefined ? { inputTokens: extra.inputTokens } : {}),
|
|
483
|
+
...(extra?.outputTokens !== undefined ? { outputTokens: extra.outputTokens } : {}),
|
|
484
|
+
...(extra?.cacheCreationTokens !== undefined ? { cacheCreationTokens: extra.cacheCreationTokens } : {}),
|
|
485
|
+
...(extra?.cacheReadTokens !== undefined ? { cacheReadTokens: extra.cacheReadTokens } : {}),
|
|
486
|
+
...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
|
|
487
|
+
});
|
|
488
|
+
};
|
|
489
|
+
logProxyBody({
|
|
490
|
+
phase: "client_request",
|
|
491
|
+
headers: ctx.headers,
|
|
492
|
+
body: clientRequestBody,
|
|
493
|
+
bodySize: Buffer.byteLength(clientRequestBody, "utf8"),
|
|
494
|
+
contentType: ctx.headers["content-type"] ?? "application/json",
|
|
495
|
+
});
|
|
496
|
+
const buildLoggedClaudeError = (status, message, errorType, extra) => {
|
|
497
|
+
const errorBody = buildClaudeError(status, message, errorType);
|
|
498
|
+
const errorBodyText = JSON.stringify(errorBody);
|
|
499
|
+
recordFinalError(status, extra?.account, extra?.accountType);
|
|
500
|
+
logFinalRequest(status, extra?.account ?? "", extra?.accountType ?? "final", errorType, message);
|
|
501
|
+
logProxyBody({
|
|
502
|
+
phase: "client_response",
|
|
503
|
+
headers: { "content-type": "application/json" },
|
|
504
|
+
body: errorBodyText,
|
|
505
|
+
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
|
|
506
|
+
contentType: "application/json",
|
|
507
|
+
responseStatus: status,
|
|
508
|
+
durationMs: Date.now() - requestStartTime,
|
|
509
|
+
...extra,
|
|
510
|
+
});
|
|
511
|
+
return errorBody;
|
|
512
|
+
};
|
|
301
513
|
try {
|
|
302
514
|
// 3. Route based on target provider
|
|
303
515
|
if (route.provider === null) {
|
|
304
|
-
|
|
305
|
-
|
|
516
|
+
tracer?.setError("not_found_error", `Model '${body.model}' is not a Claude model.`);
|
|
517
|
+
tracer?.end(404, Date.now() - requestStartTime);
|
|
518
|
+
return buildLoggedClaudeError(404, `Model '${body.model}' is not a Claude model. ` + `Use a model router to route it to another provider.`);
|
|
306
519
|
}
|
|
307
520
|
const isClaudeTarget = route.provider === "anthropic";
|
|
308
521
|
if (isClaudeTarget) {
|
|
309
|
-
//
|
|
522
|
+
// --- PASSTHROUGH MODE (Claude -> Claude) -------------------
|
|
523
|
+
tracer?.setMode("passthrough");
|
|
524
|
+
// ── CLI --passthrough: raw transparent forwarding ──────
|
|
525
|
+
if (passthroughMode) {
|
|
526
|
+
tracer?.setMode("passthrough-cli");
|
|
527
|
+
const bodyStr = clientRequestBody;
|
|
528
|
+
const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
|
|
529
|
+
// Forward client headers as-is, filtering blocked ones
|
|
530
|
+
const upstreamHeaders = {};
|
|
531
|
+
for (const [key, value] of Object.entries(ctx.headers)) {
|
|
532
|
+
if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) {
|
|
533
|
+
upstreamHeaders[key] = value;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
// Ensure content-type is set
|
|
537
|
+
if (!upstreamHeaders["content-type"]) {
|
|
538
|
+
upstreamHeaders["content-type"] = "application/json";
|
|
539
|
+
}
|
|
540
|
+
const upstreamSpan = tracer?.startUpstreamAttempt({
|
|
541
|
+
account: "passthrough",
|
|
542
|
+
attempt: 1,
|
|
543
|
+
polyfillHeaders: false,
|
|
544
|
+
polyfillBody: false,
|
|
545
|
+
upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true",
|
|
546
|
+
});
|
|
547
|
+
tracer?.logUpstreamRequestHeaders(upstreamHeaders);
|
|
548
|
+
tracer?.logUpstreamRequestBody(bodyStr);
|
|
549
|
+
logProxyBody({
|
|
550
|
+
phase: "upstream_request",
|
|
551
|
+
headers: upstreamHeaders,
|
|
552
|
+
body: bodyStr,
|
|
553
|
+
bodySize: Buffer.byteLength(bodyStr, "utf8"),
|
|
554
|
+
contentType: upstreamHeaders["content-type"] ?? "application/json",
|
|
555
|
+
account: "passthrough",
|
|
556
|
+
accountType: "passthrough",
|
|
557
|
+
attempt: 1,
|
|
558
|
+
});
|
|
559
|
+
let response;
|
|
560
|
+
try {
|
|
561
|
+
response = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
|
|
562
|
+
method: "POST",
|
|
563
|
+
headers: upstreamHeaders,
|
|
564
|
+
body: bodyStr,
|
|
565
|
+
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
566
|
+
});
|
|
567
|
+
}
|
|
568
|
+
catch (fetchErr) {
|
|
569
|
+
const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
|
|
570
|
+
tracer?.setError("network_error", errMsg);
|
|
571
|
+
upstreamSpan?.end();
|
|
572
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
573
|
+
logRequest({
|
|
574
|
+
timestamp: new Date().toISOString(),
|
|
575
|
+
requestId: ctx.requestId,
|
|
576
|
+
method: ctx.method,
|
|
577
|
+
path: ctx.path,
|
|
578
|
+
model: body.model,
|
|
579
|
+
stream: body.stream ?? false,
|
|
580
|
+
toolCount,
|
|
581
|
+
account: "passthrough",
|
|
582
|
+
accountType: "passthrough",
|
|
583
|
+
responseStatus: 502,
|
|
584
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
585
|
+
errorType: "network_error",
|
|
586
|
+
errorMessage: errMsg,
|
|
587
|
+
});
|
|
588
|
+
const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`);
|
|
589
|
+
logProxyBody({
|
|
590
|
+
phase: "client_response",
|
|
591
|
+
headers: { "content-type": "application/json" },
|
|
592
|
+
body: JSON.stringify(errorBody),
|
|
593
|
+
bodySize: Buffer.byteLength(JSON.stringify(errorBody), "utf8"),
|
|
594
|
+
contentType: "application/json",
|
|
595
|
+
account: "passthrough",
|
|
596
|
+
accountType: "passthrough",
|
|
597
|
+
attempt: 1,
|
|
598
|
+
responseStatus: 502,
|
|
599
|
+
durationMs: Date.now() - requestStartTime,
|
|
600
|
+
});
|
|
601
|
+
return errorBody;
|
|
602
|
+
}
|
|
603
|
+
const upstreamResponseHeaders = {};
|
|
604
|
+
response.headers.forEach((v, k) => {
|
|
605
|
+
upstreamResponseHeaders[k] = v;
|
|
606
|
+
});
|
|
607
|
+
tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders);
|
|
608
|
+
if (!response.ok) {
|
|
609
|
+
const errorText = await response.text();
|
|
610
|
+
tracer?.logUpstreamResponseBody(errorText);
|
|
611
|
+
logProxyBody({
|
|
612
|
+
phase: "upstream_response",
|
|
613
|
+
headers: upstreamResponseHeaders,
|
|
614
|
+
body: errorText,
|
|
615
|
+
bodySize: Buffer.byteLength(errorText, "utf8"),
|
|
616
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
617
|
+
account: "passthrough",
|
|
618
|
+
accountType: "passthrough",
|
|
619
|
+
attempt: 1,
|
|
620
|
+
responseStatus: response.status,
|
|
621
|
+
durationMs: Date.now() - requestStartTime,
|
|
622
|
+
});
|
|
623
|
+
logProxyBody({
|
|
624
|
+
phase: "client_response",
|
|
625
|
+
headers: upstreamResponseHeaders,
|
|
626
|
+
body: errorText,
|
|
627
|
+
bodySize: Buffer.byteLength(errorText, "utf8"),
|
|
628
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
629
|
+
account: "passthrough",
|
|
630
|
+
accountType: "passthrough",
|
|
631
|
+
attempt: 1,
|
|
632
|
+
responseStatus: response.status,
|
|
633
|
+
durationMs: Date.now() - requestStartTime,
|
|
634
|
+
});
|
|
635
|
+
upstreamSpan?.end();
|
|
636
|
+
tracer?.setError("api_error", errorText.slice(0, 500));
|
|
637
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
638
|
+
try {
|
|
639
|
+
return JSON.parse(errorText);
|
|
640
|
+
}
|
|
641
|
+
catch {
|
|
642
|
+
return buildClaudeError(response.status, errorText);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
// Streaming response
|
|
646
|
+
if (body.stream && response.body) {
|
|
647
|
+
const responseHeaders = { ...upstreamResponseHeaders };
|
|
648
|
+
const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
|
|
649
|
+
let streamSource = response.body;
|
|
650
|
+
if (tracer) {
|
|
651
|
+
try {
|
|
652
|
+
const { stream: interceptor, telemetry } = createSSEInterceptor({ captureRawText: true });
|
|
653
|
+
streamSource = streamSource.pipeThrough(interceptor);
|
|
654
|
+
const capturedTracer = tracer;
|
|
655
|
+
const capturedUpstreamSpan = upstreamSpan;
|
|
656
|
+
const capturedResponse = response;
|
|
657
|
+
const capturedRequestBytes = bodyStr.length;
|
|
658
|
+
Promise.all([telemetry, clientCapture])
|
|
659
|
+
.then(([data, clientBody]) => {
|
|
660
|
+
capturedTracer.setUsage({
|
|
661
|
+
inputTokens: data.usage.inputTokens,
|
|
662
|
+
outputTokens: data.usage.outputTokens,
|
|
663
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
664
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
665
|
+
});
|
|
666
|
+
capturedTracer.logStreamEvents(data.events);
|
|
667
|
+
const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
668
|
+
const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
669
|
+
const usageUpdate = {
|
|
670
|
+
inputTokens: data.usage.inputTokens,
|
|
671
|
+
outputTokens: data.usage.outputTokens,
|
|
672
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
673
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
674
|
+
};
|
|
675
|
+
if (!isNaN(rateLimit5h)) {
|
|
676
|
+
usageUpdate.rateLimitAfter5h = rateLimit5h;
|
|
677
|
+
}
|
|
678
|
+
if (!isNaN(rateLimit7d)) {
|
|
679
|
+
usageUpdate.rateLimitAfter7d = rateLimit7d;
|
|
680
|
+
}
|
|
681
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
682
|
+
capturedTracer.setUsage(usageUpdate);
|
|
683
|
+
}
|
|
684
|
+
capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
|
|
685
|
+
capturedTracer.recordMetrics();
|
|
686
|
+
capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
|
|
687
|
+
capturedUpstreamSpan?.end();
|
|
688
|
+
capturedTracer.end(200, Date.now() - requestStartTime);
|
|
689
|
+
const traceCtx = capturedTracer.getTraceContext();
|
|
690
|
+
logRequest({
|
|
691
|
+
timestamp: new Date().toISOString(),
|
|
692
|
+
requestId: ctx.requestId,
|
|
693
|
+
method: ctx.method,
|
|
694
|
+
path: ctx.path,
|
|
695
|
+
model: body.model,
|
|
696
|
+
stream: true,
|
|
697
|
+
toolCount,
|
|
698
|
+
account: "passthrough",
|
|
699
|
+
accountType: "passthrough",
|
|
700
|
+
responseStatus: 200,
|
|
701
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
702
|
+
inputTokens: data.usage.inputTokens,
|
|
703
|
+
outputTokens: data.usage.outputTokens,
|
|
704
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
705
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
706
|
+
traceId: traceCtx.traceId,
|
|
707
|
+
spanId: traceCtx.spanId,
|
|
708
|
+
});
|
|
709
|
+
logProxyBody({
|
|
710
|
+
phase: "upstream_response",
|
|
711
|
+
headers: responseHeaders,
|
|
712
|
+
body: data.rawText ?? "",
|
|
713
|
+
bodySize: data.totalBytesReceived,
|
|
714
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
715
|
+
account: "passthrough",
|
|
716
|
+
accountType: "passthrough",
|
|
717
|
+
attempt: 1,
|
|
718
|
+
responseStatus: 200,
|
|
719
|
+
durationMs: Date.now() - requestStartTime,
|
|
720
|
+
});
|
|
721
|
+
logProxyBody({
|
|
722
|
+
phase: "client_response",
|
|
723
|
+
headers: responseHeaders,
|
|
724
|
+
body: clientBody.text,
|
|
725
|
+
bodySize: clientBody.totalBytes,
|
|
726
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
727
|
+
account: "passthrough",
|
|
728
|
+
accountType: "passthrough",
|
|
729
|
+
attempt: 1,
|
|
730
|
+
responseStatus: 200,
|
|
731
|
+
durationMs: Date.now() - requestStartTime,
|
|
732
|
+
});
|
|
733
|
+
})
|
|
734
|
+
.catch((err) => {
|
|
735
|
+
capturedTracer.setError("stream_error", err instanceof Error ? err.message : String(err));
|
|
736
|
+
capturedUpstreamSpan?.end();
|
|
737
|
+
capturedTracer.end(500, Date.now() - requestStartTime);
|
|
738
|
+
const traceCtx = capturedTracer.getTraceContext();
|
|
739
|
+
logRequest({
|
|
740
|
+
timestamp: new Date().toISOString(),
|
|
741
|
+
requestId: ctx.requestId,
|
|
742
|
+
method: ctx.method,
|
|
743
|
+
path: ctx.path,
|
|
744
|
+
model: body.model,
|
|
745
|
+
stream: true,
|
|
746
|
+
toolCount,
|
|
747
|
+
account: "passthrough",
|
|
748
|
+
accountType: "passthrough",
|
|
749
|
+
responseStatus: 500,
|
|
750
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
751
|
+
errorType: "stream_error",
|
|
752
|
+
errorMessage: err instanceof Error ? err.message : String(err),
|
|
753
|
+
traceId: traceCtx.traceId,
|
|
754
|
+
spanId: traceCtx.spanId,
|
|
755
|
+
});
|
|
756
|
+
});
|
|
757
|
+
}
|
|
758
|
+
catch {
|
|
759
|
+
// Streaming capture is best-effort; request completion is handled elsewhere.
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
else {
|
|
763
|
+
clientCapture
|
|
764
|
+
.then((clientBody) => {
|
|
765
|
+
logProxyBody({
|
|
766
|
+
phase: "upstream_response",
|
|
767
|
+
headers: responseHeaders,
|
|
768
|
+
body: clientBody.text,
|
|
769
|
+
bodySize: clientBody.totalBytes,
|
|
770
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
771
|
+
account: "passthrough",
|
|
772
|
+
accountType: "passthrough",
|
|
773
|
+
attempt: 1,
|
|
774
|
+
responseStatus: 200,
|
|
775
|
+
durationMs: Date.now() - requestStartTime,
|
|
776
|
+
});
|
|
777
|
+
logProxyBody({
|
|
778
|
+
phase: "client_response",
|
|
779
|
+
headers: responseHeaders,
|
|
780
|
+
body: clientBody.text,
|
|
781
|
+
bodySize: clientBody.totalBytes,
|
|
782
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
783
|
+
account: "passthrough",
|
|
784
|
+
accountType: "passthrough",
|
|
785
|
+
attempt: 1,
|
|
786
|
+
responseStatus: 200,
|
|
787
|
+
durationMs: Date.now() - requestStartTime,
|
|
788
|
+
});
|
|
789
|
+
})
|
|
790
|
+
.catch(() => {
|
|
791
|
+
// Non-fatal
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
const clientStream = streamSource.pipeThrough(clientCaptureStream);
|
|
795
|
+
return new Response(clientStream, {
|
|
796
|
+
status: response.status,
|
|
797
|
+
headers: responseHeaders,
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
// Non-streaming response
|
|
801
|
+
const responseText = await response.text();
|
|
802
|
+
tracer?.logUpstreamResponseBody(responseText);
|
|
803
|
+
logProxyBody({
|
|
804
|
+
phase: "upstream_response",
|
|
805
|
+
headers: upstreamResponseHeaders,
|
|
806
|
+
body: responseText,
|
|
807
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
808
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
809
|
+
account: "passthrough",
|
|
810
|
+
accountType: "passthrough",
|
|
811
|
+
attempt: 1,
|
|
812
|
+
responseStatus: response.status,
|
|
813
|
+
durationMs: Date.now() - requestStartTime,
|
|
814
|
+
});
|
|
815
|
+
logProxyBody({
|
|
816
|
+
phase: "client_response",
|
|
817
|
+
headers: upstreamResponseHeaders,
|
|
818
|
+
body: responseText,
|
|
819
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
820
|
+
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
|
|
821
|
+
account: "passthrough",
|
|
822
|
+
accountType: "passthrough",
|
|
823
|
+
attempt: 1,
|
|
824
|
+
responseStatus: response.status,
|
|
825
|
+
durationMs: Date.now() - requestStartTime,
|
|
826
|
+
});
|
|
827
|
+
const responseJson = JSON.parse(responseText);
|
|
828
|
+
if (tracer && responseJson && typeof responseJson === "object") {
|
|
829
|
+
const usage = responseJson.usage;
|
|
830
|
+
if (usage) {
|
|
831
|
+
tracer.setUsage({
|
|
832
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
833
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
834
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
835
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
836
|
+
});
|
|
837
|
+
const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
838
|
+
const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
839
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
840
|
+
const usageWithRates = {
|
|
841
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
842
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
843
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
844
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
845
|
+
};
|
|
846
|
+
if (!isNaN(rateLimit5h)) {
|
|
847
|
+
usageWithRates.rateLimitAfter5h = rateLimit5h;
|
|
848
|
+
}
|
|
849
|
+
if (!isNaN(rateLimit7d)) {
|
|
850
|
+
usageWithRates.rateLimitAfter7d = rateLimit7d;
|
|
851
|
+
}
|
|
852
|
+
tracer.setUsage(usageWithRates);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
tracer.recordMetrics();
|
|
856
|
+
const responseJsonStr = JSON.stringify(responseJson);
|
|
857
|
+
tracer.recordBodySizes(bodyStr.length, responseJsonStr.length);
|
|
858
|
+
upstreamSpan?.end();
|
|
859
|
+
tracer.end(response.status, Date.now() - requestStartTime);
|
|
860
|
+
const traceCtx = tracer.getTraceContext();
|
|
861
|
+
logRequest({
|
|
862
|
+
timestamp: new Date().toISOString(),
|
|
863
|
+
requestId: ctx.requestId,
|
|
864
|
+
method: ctx.method,
|
|
865
|
+
path: ctx.path,
|
|
866
|
+
model: body.model,
|
|
867
|
+
stream: false,
|
|
868
|
+
toolCount,
|
|
869
|
+
account: "passthrough",
|
|
870
|
+
accountType: "passthrough",
|
|
871
|
+
responseStatus: response.status,
|
|
872
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
873
|
+
inputTokens: usage?.input_tokens,
|
|
874
|
+
outputTokens: usage?.output_tokens,
|
|
875
|
+
cacheCreationTokens: usage?.cache_creation_input_tokens,
|
|
876
|
+
cacheReadTokens: usage?.cache_read_input_tokens,
|
|
877
|
+
traceId: traceCtx.traceId,
|
|
878
|
+
spanId: traceCtx.spanId,
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
else {
|
|
882
|
+
upstreamSpan?.end();
|
|
883
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
884
|
+
logRequest({
|
|
885
|
+
timestamp: new Date().toISOString(),
|
|
886
|
+
requestId: ctx.requestId,
|
|
887
|
+
method: ctx.method,
|
|
888
|
+
path: ctx.path,
|
|
889
|
+
model: body.model,
|
|
890
|
+
stream: false,
|
|
891
|
+
toolCount,
|
|
892
|
+
account: "passthrough",
|
|
893
|
+
accountType: "passthrough",
|
|
894
|
+
responseStatus: response.status,
|
|
895
|
+
responseTimeMs: Date.now() - requestStartTime,
|
|
896
|
+
});
|
|
897
|
+
}
|
|
898
|
+
return responseJson;
|
|
899
|
+
}
|
|
900
|
+
// ── END CLI --passthrough ─────────────────────────────
|
|
310
901
|
const fs = await import("fs");
|
|
311
902
|
const os = await import("os");
|
|
312
903
|
const accounts = [];
|
|
@@ -330,8 +921,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
330
921
|
// On cold start, lastToken is empty — don't treat that as a
|
|
331
922
|
// credential change; only compare on subsequent reloads.
|
|
332
923
|
const tokens = await tokenStore.loadTokens(key);
|
|
333
|
-
const hasTrackedTokens = existingState.lastToken !== undefined &&
|
|
334
|
-
existingState.lastToken !== "";
|
|
924
|
+
const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== "";
|
|
335
925
|
const tokenChanged = tokens &&
|
|
336
926
|
hasTrackedTokens &&
|
|
337
927
|
(existingState.lastToken !== tokens.accessToken ||
|
|
@@ -436,13 +1026,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
436
1026
|
});
|
|
437
1027
|
}
|
|
438
1028
|
if (accounts.length === 0) {
|
|
439
|
-
|
|
1029
|
+
tracer?.setError("authentication_error", "No Anthropic credentials found");
|
|
1030
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
1031
|
+
return buildLoggedClaudeError(401, "No Anthropic credentials found");
|
|
440
1032
|
}
|
|
441
1033
|
// Sync in-memory runtime state with current token material.
|
|
442
1034
|
for (const account of accounts) {
|
|
443
1035
|
const state = getOrCreateRuntimeState(account.key);
|
|
444
|
-
const tokenChanged = state.lastToken !== account.token ||
|
|
445
|
-
state.lastRefreshToken !== account.refreshToken;
|
|
1036
|
+
const tokenChanged = state.lastToken !== account.token || state.lastRefreshToken !== account.refreshToken;
|
|
446
1037
|
if (tokenChanged) {
|
|
447
1038
|
if (state.permanentlyDisabled) {
|
|
448
1039
|
logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
|
|
@@ -456,11 +1047,13 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
456
1047
|
state.lastRefreshToken = account.refreshToken;
|
|
457
1048
|
}
|
|
458
1049
|
const enabledAccounts = accounts.filter((account) => {
|
|
459
|
-
return !getOrCreateRuntimeState(account.key)
|
|
460
|
-
.permanentlyDisabled;
|
|
1050
|
+
return !getOrCreateRuntimeState(account.key).permanentlyDisabled;
|
|
461
1051
|
});
|
|
462
1052
|
if (enabledAccounts.length === 0) {
|
|
463
|
-
|
|
1053
|
+
const reauthMsg = formatReauthMessage(accounts.map((account) => account.label));
|
|
1054
|
+
tracer?.setError("authentication_error", reauthMsg);
|
|
1055
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
1056
|
+
return buildLoggedClaudeError(401, reauthMsg);
|
|
464
1057
|
}
|
|
465
1058
|
// Order accounts based on the configured strategy.
|
|
466
1059
|
// - fill-first: always start with the primary account;
|
|
@@ -472,8 +1065,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
472
1065
|
// (e.g. a new account was authenticated while the proxy was running).
|
|
473
1066
|
// Only applies to round-robin; fill-first uses primaryAccountIndex
|
|
474
1067
|
// as a sticky primary and should not be disrupted.
|
|
475
|
-
if (accountStrategy === "round-robin" &&
|
|
476
|
-
orderedAccounts.length !== lastKnownAccountCount) {
|
|
1068
|
+
if (accountStrategy === "round-robin" && orderedAccounts.length !== lastKnownAccountCount) {
|
|
477
1069
|
primaryAccountIndex = 0;
|
|
478
1070
|
lastKnownAccountCount = orderedAccounts.length;
|
|
479
1071
|
}
|
|
@@ -481,8 +1073,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
481
1073
|
if (accountStrategy === "round-robin") {
|
|
482
1074
|
// Advance the index on every request for even distribution
|
|
483
1075
|
const idx = primaryAccountIndex % orderedAccounts.length;
|
|
484
|
-
primaryAccountIndex =
|
|
485
|
-
(primaryAccountIndex + 1) % orderedAccounts.length;
|
|
1076
|
+
primaryAccountIndex = (primaryAccountIndex + 1) % orderedAccounts.length;
|
|
486
1077
|
if (idx > 0) {
|
|
487
1078
|
const head = orderedAccounts.splice(0, idx);
|
|
488
1079
|
orderedAccounts.push(...head);
|
|
@@ -501,24 +1092,30 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
501
1092
|
let sawRateLimit = false;
|
|
502
1093
|
let sawNetworkError = false;
|
|
503
1094
|
let sawTransientFailure = false;
|
|
1095
|
+
let invalidRequestFailure = null;
|
|
504
1096
|
let authFailureMessage = null;
|
|
505
|
-
const
|
|
1097
|
+
const normalizedAnthropicBody = normalizeClaudeRequestForAnthropic(body);
|
|
1098
|
+
const bodyStr = JSON.stringify(normalizedAnthropicBody);
|
|
506
1099
|
const requestStart = Date.now();
|
|
507
|
-
const toolCount = Array.isArray(body.tools)
|
|
508
|
-
? body.tools.length
|
|
509
|
-
: 0;
|
|
1100
|
+
const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
|
|
510
1101
|
const url = "https://api.anthropic.com/v1/messages?beta=true";
|
|
511
1102
|
const clientHeaders = ctx.headers ?? {};
|
|
1103
|
+
const clientSnapshotBody = extractSnapshotBody(body);
|
|
1104
|
+
const isClaudeClientRequest = isLikelyClaudeClient(clientHeaders, clientSnapshotBody);
|
|
1105
|
+
let attemptNumber = 0;
|
|
1106
|
+
// OTel: account selection span (covers the whole selection phase)
|
|
1107
|
+
const acctSelectionSpan = tracer?.startAccountSelection();
|
|
512
1108
|
for (const account of orderedAccounts) {
|
|
513
1109
|
const accountState = getOrCreateRuntimeState(account.key);
|
|
514
|
-
if (accountState.coolingUntil &&
|
|
515
|
-
accountState.coolingUntil > Date.now()) {
|
|
1110
|
+
if (accountState.coolingUntil && accountState.coolingUntil > Date.now()) {
|
|
516
1111
|
continue;
|
|
517
1112
|
}
|
|
518
|
-
const logAttempt = (status, errorType, errorMessage) => {
|
|
519
|
-
|
|
1113
|
+
const logAttempt = (status, errorType, errorMessage, extra) => {
|
|
1114
|
+
const traceCtx = tracer?.getTraceContext();
|
|
1115
|
+
logRequestAttempt({
|
|
520
1116
|
timestamp: new Date().toISOString(),
|
|
521
1117
|
requestId: ctx.requestId,
|
|
1118
|
+
attempt: attemptNumber,
|
|
522
1119
|
method: ctx.method,
|
|
523
1120
|
path: ctx.path,
|
|
524
1121
|
model: body.model,
|
|
@@ -530,8 +1127,31 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
530
1127
|
responseTimeMs: Date.now() - requestStart,
|
|
531
1128
|
...(errorType ? { errorType } : {}),
|
|
532
1129
|
...(errorMessage ? { errorMessage } : {}),
|
|
1130
|
+
...(extra?.inputTokens !== undefined ? { inputTokens: extra.inputTokens } : {}),
|
|
1131
|
+
...(extra?.outputTokens !== undefined ? { outputTokens: extra.outputTokens } : {}),
|
|
1132
|
+
...(extra?.cacheCreationTokens !== undefined
|
|
1133
|
+
? { cacheCreationTokens: extra.cacheCreationTokens }
|
|
1134
|
+
: {}),
|
|
1135
|
+
...(extra?.cacheReadTokens !== undefined ? { cacheReadTokens: extra.cacheReadTokens } : {}),
|
|
1136
|
+
...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
|
|
533
1137
|
});
|
|
534
1138
|
};
|
|
1139
|
+
// OTel: record account selection and start upstream attempt span
|
|
1140
|
+
attemptNumber++;
|
|
1141
|
+
if (tracer) {
|
|
1142
|
+
// End the selection span on first actual attempt
|
|
1143
|
+
if (attemptNumber === 1 && acctSelectionSpan) {
|
|
1144
|
+
tracer.setAccountSelection({
|
|
1145
|
+
strategy: accountStrategy,
|
|
1146
|
+
accountsTotal: accounts.length,
|
|
1147
|
+
accountsHealthy: enabledAccounts.length,
|
|
1148
|
+
selectedAccount: account.label,
|
|
1149
|
+
accountType: account.type,
|
|
1150
|
+
});
|
|
1151
|
+
acctSelectionSpan.end();
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
let upstreamSpan;
|
|
535
1155
|
// Auto-refresh expiring access tokens once before making the request.
|
|
536
1156
|
if (needsRefresh(account)) {
|
|
537
1157
|
const refreshed = await refreshToken(account);
|
|
@@ -545,8 +1165,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
545
1165
|
accountState.consecutiveRefreshFailures += 1;
|
|
546
1166
|
lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
|
|
547
1167
|
logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
|
|
548
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
549
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
1168
|
+
if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
550
1169
|
await disableAccountUntilReauth(account, accountState);
|
|
551
1170
|
authFailureMessage = formatReauthMessage(account.label);
|
|
552
1171
|
logAttempt(401, "authentication_error", String(lastError));
|
|
@@ -555,6 +1174,9 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
555
1174
|
}
|
|
556
1175
|
}
|
|
557
1176
|
const isOAuth = account.type === "oauth";
|
|
1177
|
+
const snapshot = isOAuth
|
|
1178
|
+
? await maybeRefreshClaudeSnapshot(account.label, account.key, clientHeaders, bodyStr)
|
|
1179
|
+
: null;
|
|
558
1180
|
// Decision 6: Passthrough client headers, fill gaps only.
|
|
559
1181
|
// Start with a copy of incoming client headers, then set
|
|
560
1182
|
// defaults for anything the client didn't send. Always
|
|
@@ -562,8 +1184,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
562
1184
|
const headers = {};
|
|
563
1185
|
for (const [hk, hv] of Object.entries(clientHeaders)) {
|
|
564
1186
|
const lower = hk.toLowerCase();
|
|
565
|
-
if (typeof hv === "string" &&
|
|
566
|
-
!BLOCKED_UPSTREAM_HEADERS.has(lower)) {
|
|
1187
|
+
if (typeof hv === "string" && !BLOCKED_UPSTREAM_HEADERS.has(lower)) {
|
|
567
1188
|
headers[lower] = hv;
|
|
568
1189
|
}
|
|
569
1190
|
}
|
|
@@ -579,11 +1200,11 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
579
1200
|
}
|
|
580
1201
|
// Apply header snapshot defaults for OAuth accounts
|
|
581
1202
|
if (isOAuth) {
|
|
582
|
-
|
|
1203
|
+
applySnapshotHeaders(headers, snapshot);
|
|
583
1204
|
}
|
|
584
1205
|
// Hard defaults for anything still missing
|
|
585
1206
|
if (!headers["user-agent"]) {
|
|
586
|
-
headers["user-agent"] =
|
|
1207
|
+
headers["user-agent"] = CLAUDE_CLI_USER_AGENT;
|
|
587
1208
|
}
|
|
588
1209
|
if (!headers["anthropic-version"]) {
|
|
589
1210
|
headers["anthropic-version"] = "2023-06-01";
|
|
@@ -591,15 +1212,25 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
591
1212
|
if (!headers["anthropic-dangerous-direct-browser-access"]) {
|
|
592
1213
|
headers["anthropic-dangerous-direct-browser-access"] = "true";
|
|
593
1214
|
}
|
|
1215
|
+
if (!headers["x-app"]) {
|
|
1216
|
+
headers["x-app"] = "cli";
|
|
1217
|
+
}
|
|
1218
|
+
if (!headers["accept"]) {
|
|
1219
|
+
headers["accept"] = "application/json";
|
|
1220
|
+
}
|
|
594
1221
|
// Manage anthropic-beta header based on auth type.
|
|
595
1222
|
// OAuth requires specific betas; API-key must NOT carry them.
|
|
596
1223
|
if (isOAuth) {
|
|
597
|
-
const
|
|
1224
|
+
const betaSeed = isClaudeClientRequest
|
|
1225
|
+
? (headers["anthropic-beta"] ?? "")
|
|
1226
|
+
: (clientHeaders["anthropic-beta"] ?? "");
|
|
1227
|
+
const existing = new Set(betaSeed
|
|
598
1228
|
.split(",")
|
|
599
1229
|
.map((s) => s.trim())
|
|
600
1230
|
.filter(Boolean));
|
|
601
|
-
|
|
602
|
-
|
|
1231
|
+
for (const beta of isClaudeClientRequest ? CLAUDE_CODE_OAUTH_BETAS : NON_CLAUDE_OAUTH_BETAS) {
|
|
1232
|
+
existing.add(beta);
|
|
1233
|
+
}
|
|
603
1234
|
headers["anthropic-beta"] = [...existing].join(",");
|
|
604
1235
|
}
|
|
605
1236
|
else {
|
|
@@ -607,7 +1238,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
607
1238
|
const cleaned = (headers["anthropic-beta"] ?? "")
|
|
608
1239
|
.split(",")
|
|
609
1240
|
.map((s) => s.trim())
|
|
610
|
-
.filter((s) => s && s
|
|
1241
|
+
.filter((s) => s && !CLAUDE_CODE_OAUTH_BETAS.includes(s))
|
|
611
1242
|
.join(",");
|
|
612
1243
|
if (cleaned) {
|
|
613
1244
|
headers["anthropic-beta"] = cleaned;
|
|
@@ -616,13 +1247,46 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
616
1247
|
delete headers["anthropic-beta"];
|
|
617
1248
|
}
|
|
618
1249
|
}
|
|
619
|
-
// Polyfill request body for OAuth accounts
|
|
620
|
-
|
|
621
|
-
|
|
1250
|
+
// Polyfill request body for ALL OAuth accounts.
|
|
1251
|
+
// Anthropic requires metadata.user_id and billing headers
|
|
1252
|
+
// for OAuth — not just Claude Code clients.
|
|
1253
|
+
const shouldPolyfillBody = isOAuth;
|
|
1254
|
+
const buildUpstreamBody = (token) => shouldPolyfillBody
|
|
1255
|
+
? polyfillOAuthBody(bodyStr, token, snapshot, headers["x-claude-code-session-id"])
|
|
1256
|
+
: { bodyStr };
|
|
1257
|
+
const polyfilledBody = buildUpstreamBody(account.token);
|
|
1258
|
+
if (isOAuth && polyfilledBody.sessionId && !headers["x-claude-code-session-id"]) {
|
|
1259
|
+
headers["x-claude-code-session-id"] = polyfilledBody.sessionId;
|
|
1260
|
+
}
|
|
1261
|
+
const finalBodyStr = polyfilledBody.bodyStr;
|
|
622
1262
|
logger.always(`[proxy] → account=${account.label} (${account.type})`);
|
|
623
|
-
|
|
1263
|
+
recordAttempt(account.label, account.type);
|
|
624
1264
|
// Log full request for debugging (written to ~/.neurolink/logs/proxy-debug-*.jsonl)
|
|
625
1265
|
const fetchStartMs = Date.now();
|
|
1266
|
+
// OTel: start upstream attempt span and inject trace headers
|
|
1267
|
+
if (tracer) {
|
|
1268
|
+
upstreamSpan = tracer.startUpstreamAttempt({
|
|
1269
|
+
attempt: attemptNumber,
|
|
1270
|
+
account: account.label,
|
|
1271
|
+
polyfillHeaders: isOAuth,
|
|
1272
|
+
polyfillBody: isOAuth,
|
|
1273
|
+
upstreamUrl: url,
|
|
1274
|
+
});
|
|
1275
|
+
tracer.logUpstreamRequestHeaders(headers);
|
|
1276
|
+
tracer.logUpstreamRequestBody(finalBodyStr);
|
|
1277
|
+
const traceHeaders = tracer.getTraceHeaders();
|
|
1278
|
+
Object.assign(headers, traceHeaders);
|
|
1279
|
+
}
|
|
1280
|
+
logProxyBody({
|
|
1281
|
+
phase: "upstream_request",
|
|
1282
|
+
headers,
|
|
1283
|
+
body: finalBodyStr,
|
|
1284
|
+
bodySize: Buffer.byteLength(finalBodyStr, "utf8"),
|
|
1285
|
+
contentType: headers["content-type"] ?? "application/json",
|
|
1286
|
+
account: account.label,
|
|
1287
|
+
accountType: account.type,
|
|
1288
|
+
attempt: attemptNumber,
|
|
1289
|
+
});
|
|
626
1290
|
let response;
|
|
627
1291
|
try {
|
|
628
1292
|
response = await fetch(url, {
|
|
@@ -638,14 +1302,16 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
638
1302
|
}
|
|
639
1303
|
// Decision 8: Network errors — immediate rotation, no cooldown
|
|
640
1304
|
sawNetworkError = true;
|
|
641
|
-
|
|
1305
|
+
recordAttemptError(account.label, account.type, 502);
|
|
642
1306
|
const errorCode = getErrorCode(fetchErr) ?? "unknown";
|
|
643
|
-
const errorMessage = fetchErr instanceof Error
|
|
644
|
-
? fetchErr.message
|
|
645
|
-
: String(fetchErr);
|
|
1307
|
+
const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
|
|
646
1308
|
lastError = errorMessage;
|
|
647
1309
|
logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
|
|
648
1310
|
logAttempt(502, "network_error", errorMessage);
|
|
1311
|
+
tracer?.setError("network_error", errorMessage);
|
|
1312
|
+
tracer?.recordRetry(account.label, "network_error");
|
|
1313
|
+
upstreamSpan?.end();
|
|
1314
|
+
upstreamSpan = undefined;
|
|
649
1315
|
continue;
|
|
650
1316
|
}
|
|
651
1317
|
// Check 429 (with Retry-After + exponential backoff) → continue.
|
|
@@ -660,7 +1326,6 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
660
1326
|
}
|
|
661
1327
|
else {
|
|
662
1328
|
const date = new Date(retryAfter);
|
|
663
|
-
// eslint-disable-next-line max-depth
|
|
664
1329
|
if (!Number.isNaN(date.getTime())) {
|
|
665
1330
|
cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
|
|
666
1331
|
}
|
|
@@ -668,22 +1333,24 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
668
1333
|
}
|
|
669
1334
|
const level = accountState.backoffLevel;
|
|
670
1335
|
const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
|
|
671
|
-
const backoffMs = Math.min(baseCooldown *
|
|
1336
|
+
const backoffMs = Math.min(baseCooldown * 2 ** level, RATE_LIMIT_BACKOFF_CAP_MS);
|
|
672
1337
|
accountState.coolingUntil = Date.now() + backoffMs;
|
|
673
1338
|
accountState.backoffLevel += 1;
|
|
674
1339
|
advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
|
|
675
|
-
|
|
1340
|
+
recordAttemptError(account.label, account.type, 429);
|
|
676
1341
|
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
677
1342
|
lastError = await response.text();
|
|
678
1343
|
logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
|
|
679
1344
|
logAttempt(429, "rate_limit_error", String(lastError));
|
|
1345
|
+
tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
|
|
1346
|
+
tracer?.recordRetry(account.label, "rate_limit");
|
|
1347
|
+
upstreamSpan?.end();
|
|
1348
|
+
upstreamSpan = undefined;
|
|
680
1349
|
continue;
|
|
681
1350
|
}
|
|
682
1351
|
// On 401 for refreshable OAuth: refresh token and retry before failing over.
|
|
683
|
-
if (response.status === 401 &&
|
|
684
|
-
account.type
|
|
685
|
-
account.refreshToken) {
|
|
686
|
-
recordError(account.label, account.type, 401);
|
|
1352
|
+
if (response.status === 401 && account.type === "oauth" && account.refreshToken) {
|
|
1353
|
+
recordAttemptError(account.label, account.type, 401);
|
|
687
1354
|
let authRetrySucceeded = false;
|
|
688
1355
|
let authRetryError = "received 401 from Anthropic";
|
|
689
1356
|
for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
|
|
@@ -694,14 +1361,11 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
694
1361
|
authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
|
|
695
1362
|
lastError = authRetryError;
|
|
696
1363
|
logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
|
|
697
|
-
|
|
698
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
699
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
1364
|
+
if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
700
1365
|
await disableAccountUntilReauth(account, accountState);
|
|
701
1366
|
authFailureMessage = formatReauthMessage(account.label);
|
|
702
1367
|
break;
|
|
703
1368
|
}
|
|
704
|
-
// eslint-disable-next-line max-depth
|
|
705
1369
|
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
706
1370
|
await sleep(2000);
|
|
707
1371
|
}
|
|
@@ -715,27 +1379,24 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
715
1379
|
const retryResp = await fetch(url, {
|
|
716
1380
|
method: "POST",
|
|
717
1381
|
headers,
|
|
718
|
-
body: buildUpstreamBody(),
|
|
1382
|
+
body: buildUpstreamBody(account.token).bodyStr,
|
|
719
1383
|
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
|
|
720
1384
|
});
|
|
721
|
-
// eslint-disable-next-line max-depth
|
|
722
1385
|
if (retryResp.ok) {
|
|
723
1386
|
authRetrySucceeded = true;
|
|
724
1387
|
accountState.consecutiveRefreshFailures = 0;
|
|
725
1388
|
accountState.backoffLevel = 0;
|
|
726
1389
|
accountState.coolingUntil = undefined;
|
|
727
1390
|
logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
|
|
728
|
-
|
|
729
|
-
|
|
1391
|
+
// Final success is recorded only once the response path
|
|
1392
|
+
// that reaches the client is fully determined.
|
|
730
1393
|
// Capture quota headers after successful auth-retry
|
|
731
1394
|
{
|
|
732
1395
|
const retryQuota = parseQuotaHeaders(retryResp.headers);
|
|
733
|
-
// eslint-disable-next-line max-depth
|
|
734
1396
|
if (retryQuota) {
|
|
735
1397
|
saveAccountQuota(account.label, retryQuota).catch(() => { });
|
|
736
1398
|
}
|
|
737
1399
|
}
|
|
738
|
-
// eslint-disable-next-line max-depth
|
|
739
1400
|
if (body.stream && retryResp.body) {
|
|
740
1401
|
const retryReader = retryResp.body.getReader();
|
|
741
1402
|
let retryStreamClosed = false;
|
|
@@ -757,9 +1418,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
757
1418
|
controller.enqueue(value);
|
|
758
1419
|
}
|
|
759
1420
|
catch (streamErr) {
|
|
760
|
-
const errMsg = streamErr instanceof Error
|
|
761
|
-
? streamErr.message
|
|
762
|
-
: String(streamErr);
|
|
1421
|
+
const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
|
|
763
1422
|
logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
|
|
764
1423
|
logStreamError({
|
|
765
1424
|
timestamp: new Date().toISOString(),
|
|
@@ -782,12 +1441,57 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
782
1441
|
retryReader.cancel();
|
|
783
1442
|
},
|
|
784
1443
|
});
|
|
1444
|
+
// OTel: pipe auth-retry stream through SSE interceptor
|
|
1445
|
+
let retryClientStream = retryStream;
|
|
1446
|
+
if (tracer) {
|
|
1447
|
+
try {
|
|
1448
|
+
const { stream: retryInterceptor, telemetry: retryTelemetry } = createSSEInterceptor();
|
|
1449
|
+
retryClientStream = retryStream.pipeThrough(retryInterceptor);
|
|
1450
|
+
const capturedTracer2 = tracer;
|
|
1451
|
+
const capturedUpstreamSpan2 = upstreamSpan;
|
|
1452
|
+
const capturedRetryResp = retryResp;
|
|
1453
|
+
const capturedRetryRequestBytes = finalBodyStr.length;
|
|
1454
|
+
const capturedAccountLabel2 = account.label;
|
|
1455
|
+
retryTelemetry
|
|
1456
|
+
.then((data) => {
|
|
1457
|
+
capturedTracer2.setUsage({
|
|
1458
|
+
inputTokens: data.usage.inputTokens,
|
|
1459
|
+
outputTokens: data.usage.outputTokens,
|
|
1460
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1461
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1462
|
+
});
|
|
1463
|
+
capturedTracer2.logStreamEvents(data.events);
|
|
1464
|
+
capturedTracer2.logUpstreamResponseHeaders(Object.fromEntries([...capturedRetryResp.headers.entries()]));
|
|
1465
|
+
capturedTracer2.recordMetrics();
|
|
1466
|
+
capturedTracer2.recordBodySizes(capturedRetryRequestBytes, data.totalBytesReceived);
|
|
1467
|
+
capturedUpstreamSpan2?.end();
|
|
1468
|
+
capturedTracer2.end(200, Date.now() - requestStartTime);
|
|
1469
|
+
recordFinalSuccess(capturedAccountLabel2, account.type);
|
|
1470
|
+
// Deferred JSONL log with token usage (auth-retry streaming)
|
|
1471
|
+
logFinalRequest(200, capturedAccountLabel2, account.type, undefined, undefined, {
|
|
1472
|
+
inputTokens: data.usage.inputTokens,
|
|
1473
|
+
outputTokens: data.usage.outputTokens,
|
|
1474
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1475
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1476
|
+
});
|
|
1477
|
+
})
|
|
1478
|
+
.catch((err) => {
|
|
1479
|
+
capturedTracer2.setError("stream_error", err instanceof Error ? err.message : String(err));
|
|
1480
|
+
capturedUpstreamSpan2?.end();
|
|
1481
|
+
capturedTracer2.end(500, Date.now() - requestStartTime);
|
|
1482
|
+
recordFinalError(500, capturedAccountLabel2, account.type);
|
|
1483
|
+
logFinalRequest(500, capturedAccountLabel2, account.type, "stream_error", err instanceof Error ? err.message : String(err));
|
|
1484
|
+
});
|
|
1485
|
+
}
|
|
1486
|
+
catch {
|
|
1487
|
+
retryClientStream = retryStream;
|
|
1488
|
+
}
|
|
1489
|
+
}
|
|
785
1490
|
const responseHeaders = {
|
|
786
1491
|
"content-type": "text/event-stream",
|
|
787
1492
|
"cache-control": "no-cache",
|
|
788
1493
|
connection: "keep-alive",
|
|
789
1494
|
};
|
|
790
|
-
// eslint-disable-next-line max-depth
|
|
791
1495
|
for (const h of [
|
|
792
1496
|
"retry-after",
|
|
793
1497
|
"anthropic-ratelimit-requests-remaining",
|
|
@@ -796,25 +1500,81 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
796
1500
|
"anthropic-ratelimit-tokens-limit",
|
|
797
1501
|
]) {
|
|
798
1502
|
const val = retryResp.headers.get(h);
|
|
799
|
-
// eslint-disable-next-line max-depth
|
|
800
1503
|
if (val) {
|
|
801
1504
|
responseHeaders[h] = val;
|
|
802
1505
|
}
|
|
803
1506
|
}
|
|
804
|
-
return new Response(
|
|
1507
|
+
return new Response(retryClientStream, {
|
|
805
1508
|
status: retryResp.status,
|
|
806
1509
|
headers: responseHeaders,
|
|
807
1510
|
});
|
|
808
1511
|
}
|
|
809
|
-
|
|
1512
|
+
// OTel: non-streaming auth-retry success
|
|
1513
|
+
const retryRespHeaders = Object.fromEntries([...retryResp.headers.entries()]);
|
|
1514
|
+
const retryText = await retryResp.text();
|
|
1515
|
+
tracer?.logUpstreamResponseHeaders(retryRespHeaders);
|
|
1516
|
+
tracer?.logUpstreamResponseBody(retryText);
|
|
1517
|
+
logProxyBody({
|
|
1518
|
+
phase: "upstream_response",
|
|
1519
|
+
headers: retryRespHeaders,
|
|
1520
|
+
body: retryText,
|
|
1521
|
+
bodySize: Buffer.byteLength(retryText, "utf8"),
|
|
1522
|
+
contentType: retryRespHeaders["content-type"] ?? "application/json",
|
|
1523
|
+
account: account.label,
|
|
1524
|
+
accountType: account.type,
|
|
1525
|
+
attempt: attemptNumber,
|
|
1526
|
+
responseStatus: retryResp.status,
|
|
1527
|
+
durationMs: Date.now() - fetchStartMs,
|
|
1528
|
+
});
|
|
1529
|
+
logProxyBody({
|
|
1530
|
+
phase: "client_response",
|
|
1531
|
+
headers: retryRespHeaders,
|
|
1532
|
+
body: retryText,
|
|
1533
|
+
bodySize: Buffer.byteLength(retryText, "utf8"),
|
|
1534
|
+
contentType: retryRespHeaders["content-type"] ?? "application/json",
|
|
1535
|
+
account: account.label,
|
|
1536
|
+
accountType: account.type,
|
|
1537
|
+
attempt: attemptNumber,
|
|
1538
|
+
responseStatus: retryResp.status,
|
|
1539
|
+
durationMs: Date.now() - requestStartTime,
|
|
1540
|
+
});
|
|
1541
|
+
const retryJson = JSON.parse(retryText);
|
|
1542
|
+
if (tracer && retryJson && typeof retryJson === "object") {
|
|
1543
|
+
const retryUsage = retryJson.usage;
|
|
1544
|
+
if (retryUsage) {
|
|
1545
|
+
tracer.setUsage({
|
|
1546
|
+
inputTokens: retryUsage.input_tokens ?? 0,
|
|
1547
|
+
outputTokens: retryUsage.output_tokens ?? 0,
|
|
1548
|
+
cacheCreationTokens: retryUsage.cache_creation_input_tokens ?? 0,
|
|
1549
|
+
cacheReadTokens: retryUsage.cache_read_input_tokens ?? 0,
|
|
1550
|
+
});
|
|
1551
|
+
}
|
|
1552
|
+
tracer.recordMetrics();
|
|
1553
|
+
const retryJsonStr = JSON.stringify(retryJson);
|
|
1554
|
+
tracer.recordBodySizes(finalBodyStr.length, retryJsonStr.length);
|
|
1555
|
+
upstreamSpan?.end();
|
|
1556
|
+
tracer.end(retryResp.status, Date.now() - requestStartTime);
|
|
1557
|
+
recordFinalSuccess(account.label, account.type);
|
|
1558
|
+
logFinalRequest(retryResp.status, account.label, account.type, undefined, undefined, {
|
|
1559
|
+
inputTokens: retryUsage?.input_tokens,
|
|
1560
|
+
outputTokens: retryUsage?.output_tokens,
|
|
1561
|
+
cacheCreationTokens: retryUsage?.cache_creation_input_tokens,
|
|
1562
|
+
cacheReadTokens: retryUsage?.cache_read_input_tokens,
|
|
1563
|
+
});
|
|
1564
|
+
}
|
|
1565
|
+
else {
|
|
1566
|
+
upstreamSpan?.end();
|
|
1567
|
+
recordFinalSuccess(account.label, account.type);
|
|
1568
|
+
logFinalRequest(retryResp.status, account.label, account.type);
|
|
1569
|
+
}
|
|
1570
|
+
return retryJson;
|
|
810
1571
|
}
|
|
811
1572
|
const retryStatus = retryResp.status;
|
|
812
1573
|
const retryBody = await retryResp.text();
|
|
813
1574
|
authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
|
|
814
1575
|
lastError = retryBody;
|
|
815
1576
|
logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
|
|
816
|
-
|
|
817
|
-
// eslint-disable-next-line max-depth
|
|
1577
|
+
recordAttemptError(account.label, account.type, retryStatus);
|
|
818
1578
|
if (retryStatus === 429) {
|
|
819
1579
|
sawRateLimit = true;
|
|
820
1580
|
const retryAfter = retryResp.headers.get("retry-after");
|
|
@@ -827,38 +1587,33 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
827
1587
|
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
828
1588
|
break;
|
|
829
1589
|
}
|
|
830
|
-
|
|
831
|
-
if (retryStatus === 401 ||
|
|
832
|
-
retryStatus === 402 ||
|
|
833
|
-
retryStatus === 403) {
|
|
834
|
-
// eslint-disable-next-line max-depth
|
|
1590
|
+
if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
|
|
835
1591
|
if (authRetry < MAX_AUTH_RETRIES - 1) {
|
|
836
1592
|
await sleep(1000);
|
|
837
1593
|
}
|
|
838
1594
|
continue;
|
|
839
1595
|
}
|
|
840
|
-
// eslint-disable-next-line max-depth
|
|
841
1596
|
if (isTransientHttpFailure(retryStatus, retryBody)) {
|
|
842
1597
|
// Decision 8: No cooldown for transient errors — rotate immediately
|
|
843
1598
|
sawTransientFailure = true;
|
|
844
1599
|
break;
|
|
845
1600
|
}
|
|
846
1601
|
logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
|
|
847
|
-
|
|
1602
|
+
recordFinalError(retryStatus, account.label, account.type);
|
|
848
1603
|
try {
|
|
1604
|
+
logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
|
|
849
1605
|
return JSON.parse(retryBody);
|
|
850
1606
|
}
|
|
851
1607
|
catch {
|
|
1608
|
+
logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
|
|
852
1609
|
return buildClaudeError(retryStatus, retryBody);
|
|
853
1610
|
}
|
|
854
1611
|
}
|
|
855
1612
|
catch (retryFetchErr) {
|
|
856
1613
|
// Decision 8: No cooldown for network errors — rotate immediately
|
|
857
1614
|
sawNetworkError = true;
|
|
858
|
-
|
|
859
|
-
const message = retryFetchErr instanceof Error
|
|
860
|
-
? retryFetchErr.message
|
|
861
|
-
: String(retryFetchErr);
|
|
1615
|
+
recordAttemptError(account.label, account.type, 502);
|
|
1616
|
+
const message = retryFetchErr instanceof Error ? retryFetchErr.message : String(retryFetchErr);
|
|
862
1617
|
authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
|
|
863
1618
|
lastError = authRetryError;
|
|
864
1619
|
logger.debug(`[proxy] ${authRetryError}`);
|
|
@@ -866,96 +1621,83 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
866
1621
|
}
|
|
867
1622
|
}
|
|
868
1623
|
if (!authRetrySucceeded) {
|
|
869
|
-
// eslint-disable-next-line max-depth
|
|
870
1624
|
if (!accountState.permanentlyDisabled) {
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
accountState.coolingUntil <= Date.now()) {
|
|
874
|
-
accountState.coolingUntil =
|
|
875
|
-
Date.now() + AUTH_COOLDOWN_MS;
|
|
1625
|
+
if (!accountState.coolingUntil || accountState.coolingUntil <= Date.now()) {
|
|
1626
|
+
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
876
1627
|
}
|
|
877
1628
|
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
878
1629
|
}
|
|
879
1630
|
lastError = authRetryError;
|
|
880
1631
|
logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
|
|
881
1632
|
logAttempt(401, "authentication_error", authRetryError);
|
|
1633
|
+
tracer?.setError("authentication_error", authRetryError);
|
|
1634
|
+
tracer?.recordRetry(account.label, "auth_exhausted");
|
|
1635
|
+
upstreamSpan?.end();
|
|
1636
|
+
upstreamSpan = undefined;
|
|
882
1637
|
continue;
|
|
883
1638
|
}
|
|
884
1639
|
}
|
|
885
1640
|
if (!response.ok) {
|
|
886
1641
|
const errBody = await response.text();
|
|
887
|
-
// Log full error for debugging
|
|
888
1642
|
const errRespHeaders = {};
|
|
889
1643
|
response.headers.forEach((v, k) => {
|
|
890
1644
|
errRespHeaders[k] = v;
|
|
891
1645
|
});
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
1646
|
+
tracer?.logUpstreamResponseHeaders(errRespHeaders);
|
|
1647
|
+
tracer?.logUpstreamResponseBody(errBody);
|
|
1648
|
+
logProxyBody({
|
|
1649
|
+
phase: "upstream_response",
|
|
1650
|
+
headers: errRespHeaders,
|
|
1651
|
+
body: errBody,
|
|
1652
|
+
bodySize: Buffer.byteLength(errBody, "utf8"),
|
|
1653
|
+
contentType: errRespHeaders["content-type"] ?? "application/json",
|
|
895
1654
|
account: account.label,
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
requestHeaders: redactSensitiveHeaders(headers),
|
|
899
|
-
requestBody: {
|
|
900
|
-
model: body.model,
|
|
901
|
-
max_tokens: body.max_tokens,
|
|
902
|
-
stream: body.stream,
|
|
903
|
-
system: Array.isArray(body.system)
|
|
904
|
-
? `[${body.system.length} blocks]`
|
|
905
|
-
: typeof body.system,
|
|
906
|
-
messages: Array.isArray(body.messages)
|
|
907
|
-
? `[${body.messages.length} messages]`
|
|
908
|
-
: "?",
|
|
909
|
-
tools: Array.isArray(body.tools)
|
|
910
|
-
? `[${body.tools.length} tools]`
|
|
911
|
-
: "none",
|
|
912
|
-
tool_choice: body.tool_choice,
|
|
913
|
-
thinking: body.thinking,
|
|
914
|
-
},
|
|
915
|
-
requestBodySize: bodyStr.length,
|
|
1655
|
+
accountType: account.type,
|
|
1656
|
+
attempt: attemptNumber,
|
|
916
1657
|
responseStatus: response.status,
|
|
917
|
-
responseHeaders: errRespHeaders,
|
|
918
|
-
responseBody: errBody.substring(0, 2000),
|
|
919
|
-
responseBodySize: errBody.length,
|
|
920
1658
|
durationMs: Date.now() - fetchStartMs,
|
|
921
1659
|
});
|
|
922
|
-
//
|
|
1660
|
+
// Upstream invalid_request_error responses are not retried on the
|
|
1661
|
+
// same Anthropic account, but may still be handed to fallback providers.
|
|
923
1662
|
if (isInvalidRequestError(response.status, errBody)) {
|
|
924
|
-
logger.always(`[proxy] ← ${response.status}
|
|
1663
|
+
logger.always(`[proxy] ← ${response.status} upstream invalid_request_error`);
|
|
925
1664
|
logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
}
|
|
1665
|
+
tracer?.setError("invalid_request_error", summarizeErrorMessage(errBody));
|
|
1666
|
+
invalidRequestFailure = {
|
|
1667
|
+
status: response.status,
|
|
1668
|
+
body: errBody,
|
|
1669
|
+
contentType: errRespHeaders["content-type"],
|
|
1670
|
+
};
|
|
1671
|
+
lastError = summarizeErrorMessage(errBody);
|
|
1672
|
+
upstreamSpan?.end();
|
|
1673
|
+
upstreamSpan = undefined;
|
|
1674
|
+
break;
|
|
932
1675
|
}
|
|
933
1676
|
// Auth failures for OAuth accounts without refresh token.
|
|
934
|
-
if ((response.status === 401 ||
|
|
935
|
-
response.status === 402 ||
|
|
936
|
-
response.status === 403) &&
|
|
1677
|
+
if ((response.status === 401 || response.status === 402 || response.status === 403) &&
|
|
937
1678
|
account.type === "oauth" &&
|
|
938
1679
|
!account.refreshToken) {
|
|
939
|
-
|
|
1680
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
940
1681
|
accountState.consecutiveRefreshFailures += 1;
|
|
941
1682
|
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
942
1683
|
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
943
|
-
if (accountState.consecutiveRefreshFailures >=
|
|
944
|
-
MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
1684
|
+
if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
|
|
945
1685
|
await disableAccountUntilReauth(account, accountState);
|
|
946
1686
|
}
|
|
947
1687
|
authFailureMessage = formatReauthMessage(account.label);
|
|
948
1688
|
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
949
1689
|
lastError = errBody;
|
|
950
1690
|
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
1691
|
+
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
1692
|
+
tracer?.recordRetry(account.label, "auth_no_refresh");
|
|
1693
|
+
upstreamSpan?.end();
|
|
1694
|
+
upstreamSpan = undefined;
|
|
951
1695
|
continue;
|
|
952
1696
|
}
|
|
953
1697
|
// Auth failures for API-key accounts.
|
|
954
|
-
if ((response.status === 401 ||
|
|
955
|
-
response.status === 402 ||
|
|
956
|
-
response.status === 403) &&
|
|
1698
|
+
if ((response.status === 401 || response.status === 402 || response.status === 403) &&
|
|
957
1699
|
account.type === "api_key") {
|
|
958
|
-
|
|
1700
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
959
1701
|
authFailureMessage =
|
|
960
1702
|
"Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
|
|
961
1703
|
accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
|
|
@@ -963,49 +1705,126 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
963
1705
|
logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
|
|
964
1706
|
lastError = errBody;
|
|
965
1707
|
logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
|
|
1708
|
+
tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
|
|
1709
|
+
tracer?.recordRetry(account.label, "auth_api_key");
|
|
1710
|
+
upstreamSpan?.end();
|
|
1711
|
+
upstreamSpan = undefined;
|
|
966
1712
|
continue;
|
|
967
1713
|
}
|
|
968
1714
|
// 404 is generally model/account specific; return immediately (no cooldown per Decision 8).
|
|
969
1715
|
if (response.status === 404) {
|
|
970
|
-
|
|
1716
|
+
recordFinalError(response.status, account.label, account.type);
|
|
971
1717
|
logger.always(`[proxy] ← 404 account=${account.label}`);
|
|
972
1718
|
logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
|
|
1719
|
+
tracer?.setError("not_found_error", summarizeErrorMessage(errBody));
|
|
1720
|
+
upstreamSpan?.end();
|
|
1721
|
+
tracer?.end(404, Date.now() - requestStartTime);
|
|
973
1722
|
try {
|
|
974
|
-
|
|
1723
|
+
const parsedError = JSON.parse(errBody);
|
|
1724
|
+
logFinalRequest(404, account.label, account.type, "not_found_error", summarizeErrorMessage(errBody));
|
|
1725
|
+
logProxyBody({
|
|
1726
|
+
phase: "client_response",
|
|
1727
|
+
headers: {
|
|
1728
|
+
"content-type": errRespHeaders["content-type"] ?? "application/json",
|
|
1729
|
+
},
|
|
1730
|
+
body: errBody,
|
|
1731
|
+
bodySize: Buffer.byteLength(errBody, "utf8"),
|
|
1732
|
+
contentType: errRespHeaders["content-type"] ?? "application/json",
|
|
1733
|
+
account: account.label,
|
|
1734
|
+
accountType: account.type,
|
|
1735
|
+
attempt: attemptNumber,
|
|
1736
|
+
responseStatus: 404,
|
|
1737
|
+
durationMs: Date.now() - requestStartTime,
|
|
1738
|
+
});
|
|
1739
|
+
return parsedError;
|
|
975
1740
|
}
|
|
976
1741
|
catch {
|
|
977
|
-
|
|
1742
|
+
logFinalRequest(404, account.label, account.type, "not_found_error", summarizeErrorMessage(errBody));
|
|
1743
|
+
const clientError = buildClaudeError(404, errBody);
|
|
1744
|
+
const clientErrorBody = JSON.stringify(clientError);
|
|
1745
|
+
logProxyBody({
|
|
1746
|
+
phase: "client_response",
|
|
1747
|
+
headers: { "content-type": "application/json" },
|
|
1748
|
+
body: clientErrorBody,
|
|
1749
|
+
bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
|
|
1750
|
+
contentType: "application/json",
|
|
1751
|
+
account: account.label,
|
|
1752
|
+
accountType: account.type,
|
|
1753
|
+
attempt: attemptNumber,
|
|
1754
|
+
responseStatus: 404,
|
|
1755
|
+
durationMs: Date.now() - requestStartTime,
|
|
1756
|
+
});
|
|
1757
|
+
return clientError;
|
|
978
1758
|
}
|
|
979
1759
|
}
|
|
980
1760
|
// Decision 8: Transient upstream failures — immediate rotation, NO cooldown.
|
|
981
1761
|
if (isTransientHttpFailure(response.status, errBody)) {
|
|
982
|
-
|
|
1762
|
+
recordAttemptError(account.label, account.type, response.status);
|
|
983
1763
|
sawTransientFailure = true;
|
|
984
1764
|
// No cooldown for transient errors (502, 503, etc.) — rotate immediately
|
|
985
1765
|
logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
|
|
986
1766
|
lastError = errBody;
|
|
987
1767
|
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
1768
|
+
tracer?.setError("transient_error", summarizeErrorMessage(errBody));
|
|
1769
|
+
tracer?.recordRetry(account.label, "transient");
|
|
1770
|
+
upstreamSpan?.end();
|
|
1771
|
+
upstreamSpan = undefined;
|
|
988
1772
|
continue;
|
|
989
1773
|
}
|
|
990
1774
|
// Other non-ok errors → return as-is.
|
|
991
|
-
|
|
1775
|
+
recordFinalError(response.status, account.label, account.type);
|
|
992
1776
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
993
1777
|
logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
|
|
994
1778
|
logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
|
|
1779
|
+
tracer?.setError("api_error", summarizeErrorMessage(errBody));
|
|
1780
|
+
upstreamSpan?.end();
|
|
1781
|
+
tracer?.end(response.status, Date.now() - requestStartTime);
|
|
995
1782
|
try {
|
|
996
|
-
|
|
1783
|
+
const parsedError = JSON.parse(errBody);
|
|
1784
|
+
logFinalRequest(response.status, account.label, account.type, "api_error", summarizeErrorMessage(errBody));
|
|
1785
|
+
logProxyBody({
|
|
1786
|
+
phase: "client_response",
|
|
1787
|
+
headers: {
|
|
1788
|
+
"content-type": errRespHeaders["content-type"] ?? "application/json",
|
|
1789
|
+
},
|
|
1790
|
+
body: errBody,
|
|
1791
|
+
bodySize: Buffer.byteLength(errBody, "utf8"),
|
|
1792
|
+
contentType: errRespHeaders["content-type"] ?? "application/json",
|
|
1793
|
+
account: account.label,
|
|
1794
|
+
accountType: account.type,
|
|
1795
|
+
attempt: attemptNumber,
|
|
1796
|
+
responseStatus: response.status,
|
|
1797
|
+
durationMs: Date.now() - requestStartTime,
|
|
1798
|
+
});
|
|
1799
|
+
return parsedError;
|
|
997
1800
|
}
|
|
998
1801
|
catch {
|
|
999
|
-
|
|
1802
|
+
logFinalRequest(response.status, account.label, account.type, "api_error", summarizeErrorMessage(errBody));
|
|
1803
|
+
const clientError = buildClaudeError(response.status, errBody);
|
|
1804
|
+
const clientErrorBody = JSON.stringify(clientError);
|
|
1805
|
+
logProxyBody({
|
|
1806
|
+
phase: "client_response",
|
|
1807
|
+
headers: { "content-type": "application/json" },
|
|
1808
|
+
body: clientErrorBody,
|
|
1809
|
+
bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
|
|
1810
|
+
contentType: "application/json",
|
|
1811
|
+
account: account.label,
|
|
1812
|
+
accountType: account.type,
|
|
1813
|
+
attempt: attemptNumber,
|
|
1814
|
+
responseStatus: response.status,
|
|
1815
|
+
durationMs: Date.now() - requestStartTime,
|
|
1816
|
+
});
|
|
1817
|
+
return clientError;
|
|
1000
1818
|
}
|
|
1001
1819
|
}
|
|
1002
1820
|
// Success path.
|
|
1003
1821
|
accountState.backoffLevel = 0;
|
|
1004
1822
|
accountState.coolingUntil = undefined;
|
|
1005
1823
|
accountState.consecutiveRefreshFailures = 0;
|
|
1006
|
-
recordSuccess(account.label, account.type);
|
|
1007
1824
|
logger.always(`[proxy] ← ${response.status} account=${account.label}`);
|
|
1008
|
-
logAttempt
|
|
1825
|
+
// NOTE: logAttempt is deferred below so we can include token
|
|
1826
|
+
// usage. For streaming, the SSE interceptor callback logs it;
|
|
1827
|
+
// for non-streaming, we log after JSON parsing.
|
|
1009
1828
|
// Capture quota/utilisation headers (fire-and-forget).
|
|
1010
1829
|
const quota = parseQuotaHeaders(response.headers);
|
|
1011
1830
|
if (quota) {
|
|
@@ -1013,53 +1832,25 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1013
1832
|
// Non-fatal: quota persistence is best-effort
|
|
1014
1833
|
});
|
|
1015
1834
|
}
|
|
1016
|
-
// Log full request + response headers for debugging
|
|
1017
1835
|
const respHeaders = {};
|
|
1018
1836
|
response.headers.forEach((v, k) => {
|
|
1019
1837
|
respHeaders[k] = v;
|
|
1020
1838
|
});
|
|
1021
|
-
|
|
1022
|
-
timestamp: new Date().toISOString(),
|
|
1023
|
-
requestId: ctx.requestId,
|
|
1024
|
-
account: account.label,
|
|
1025
|
-
model: body.model,
|
|
1026
|
-
stream: !!body.stream,
|
|
1027
|
-
requestHeaders: redactSensitiveHeaders(headers),
|
|
1028
|
-
requestBody: {
|
|
1029
|
-
model: body.model,
|
|
1030
|
-
max_tokens: body.max_tokens,
|
|
1031
|
-
stream: body.stream,
|
|
1032
|
-
system: Array.isArray(body.system)
|
|
1033
|
-
? `[${body.system.length} blocks]`
|
|
1034
|
-
: typeof body.system,
|
|
1035
|
-
messages: Array.isArray(body.messages)
|
|
1036
|
-
? `[${body.messages.length} messages]`
|
|
1037
|
-
: "?",
|
|
1038
|
-
tools: Array.isArray(body.tools)
|
|
1039
|
-
? `[${body.tools.length} tools]`
|
|
1040
|
-
: "none",
|
|
1041
|
-
tool_choice: body.tool_choice,
|
|
1042
|
-
thinking: body.thinking,
|
|
1043
|
-
metadata: body.metadata ? "present" : "absent",
|
|
1044
|
-
},
|
|
1045
|
-
requestBodySize: bodyStr.length,
|
|
1046
|
-
responseStatus: response.status,
|
|
1047
|
-
responseHeaders: respHeaders,
|
|
1048
|
-
durationMs: Date.now() - fetchStartMs,
|
|
1049
|
-
});
|
|
1839
|
+
tracer?.logUpstreamResponseHeaders(respHeaders);
|
|
1050
1840
|
if (body.stream) {
|
|
1051
1841
|
// Bootstrap retry: read first chunk to verify stream is valid.
|
|
1052
1842
|
if (response.body) {
|
|
1053
1843
|
const reader = response.body.getReader();
|
|
1054
1844
|
const firstChunk = await reader.read();
|
|
1055
|
-
if (firstChunk.done ||
|
|
1056
|
-
!firstChunk.value ||
|
|
1057
|
-
firstChunk.value.length === 0) {
|
|
1845
|
+
if (firstChunk.done || !firstChunk.value || firstChunk.value.length === 0) {
|
|
1058
1846
|
// Empty stream — retry with next account.
|
|
1059
1847
|
reader.cancel();
|
|
1060
1848
|
accountState.coolingUntil = Date.now() + 10_000;
|
|
1061
1849
|
recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
|
|
1062
1850
|
logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
|
|
1851
|
+
tracer?.recordRetry(account.label, "empty_stream");
|
|
1852
|
+
upstreamSpan?.end();
|
|
1853
|
+
upstreamSpan = undefined;
|
|
1063
1854
|
continue;
|
|
1064
1855
|
}
|
|
1065
1856
|
// Stream is valid — create a new ReadableStream with first chunk prepended.
|
|
@@ -1085,9 +1876,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1085
1876
|
controller.enqueue(value);
|
|
1086
1877
|
}
|
|
1087
1878
|
catch (streamErr) {
|
|
1088
|
-
const errMsg = streamErr instanceof Error
|
|
1089
|
-
? streamErr.message
|
|
1090
|
-
: String(streamErr);
|
|
1879
|
+
const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
|
|
1091
1880
|
logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
|
|
1092
1881
|
logStreamError({
|
|
1093
1882
|
timestamp: new Date().toISOString(),
|
|
@@ -1112,6 +1901,175 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1112
1901
|
reader.cancel();
|
|
1113
1902
|
},
|
|
1114
1903
|
});
|
|
1904
|
+
// OTel: pipe stream through SSE interceptor for telemetry extraction.
|
|
1905
|
+
// The interceptor passes all bytes through unmodified and resolves
|
|
1906
|
+
// its telemetry promise when the stream finishes.
|
|
1907
|
+
const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
|
|
1908
|
+
let streamSource = remainingStream;
|
|
1909
|
+
if (tracer) {
|
|
1910
|
+
try {
|
|
1911
|
+
const { stream: interceptor, telemetry } = createSSEInterceptor({ captureRawText: true });
|
|
1912
|
+
streamSource = streamSource.pipeThrough(interceptor);
|
|
1913
|
+
// Capture refs in const variables for the async closure —
|
|
1914
|
+
// loop variables (upstreamSpan, response) will change on next iteration,
|
|
1915
|
+
// and TypeScript needs the narrowed type for tracer.
|
|
1916
|
+
const capturedTracer = tracer;
|
|
1917
|
+
const capturedUpstreamSpan = upstreamSpan;
|
|
1918
|
+
const capturedResponse = response;
|
|
1919
|
+
const capturedRequestBytes = finalBodyStr.length;
|
|
1920
|
+
const capturedAccountLabel = account.label;
|
|
1921
|
+
Promise.all([telemetry, clientCapture])
|
|
1922
|
+
.then(([data, clientBody]) => {
|
|
1923
|
+
capturedTracer.setUsage({
|
|
1924
|
+
inputTokens: data.usage.inputTokens,
|
|
1925
|
+
outputTokens: data.usage.outputTokens,
|
|
1926
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1927
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1928
|
+
});
|
|
1929
|
+
capturedTracer.logStreamEvents(data.events);
|
|
1930
|
+
// Extract rate limits from response headers
|
|
1931
|
+
const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
1932
|
+
const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
1933
|
+
const usageUpdate = {
|
|
1934
|
+
inputTokens: data.usage.inputTokens,
|
|
1935
|
+
outputTokens: data.usage.outputTokens,
|
|
1936
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1937
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1938
|
+
};
|
|
1939
|
+
if (!isNaN(rateLimit5h)) {
|
|
1940
|
+
usageUpdate.rateLimitAfter5h = rateLimit5h;
|
|
1941
|
+
}
|
|
1942
|
+
if (!isNaN(rateLimit7d)) {
|
|
1943
|
+
usageUpdate.rateLimitAfter7d = rateLimit7d;
|
|
1944
|
+
}
|
|
1945
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
1946
|
+
capturedTracer.setUsage(usageUpdate);
|
|
1947
|
+
}
|
|
1948
|
+
capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
|
|
1949
|
+
capturedTracer.recordMetrics();
|
|
1950
|
+
capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
|
|
1951
|
+
capturedUpstreamSpan?.end();
|
|
1952
|
+
capturedTracer.end(200, Date.now() - requestStartTime);
|
|
1953
|
+
recordFinalSuccess(capturedAccountLabel, account.type);
|
|
1954
|
+
// Deferred JSONL log with token usage + traceId
|
|
1955
|
+
// (streaming: tokens only available after SSE stream finishes)
|
|
1956
|
+
logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
|
|
1957
|
+
inputTokens: data.usage.inputTokens,
|
|
1958
|
+
outputTokens: data.usage.outputTokens,
|
|
1959
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
1960
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
1961
|
+
});
|
|
1962
|
+
logProxyBody({
|
|
1963
|
+
phase: "upstream_response",
|
|
1964
|
+
headers: respHeaders,
|
|
1965
|
+
body: data.rawText ?? "",
|
|
1966
|
+
bodySize: data.totalBytesReceived,
|
|
1967
|
+
contentType: respHeaders["content-type"] ?? "text/event-stream",
|
|
1968
|
+
account: capturedAccountLabel,
|
|
1969
|
+
accountType: account.type,
|
|
1970
|
+
attempt: attemptNumber,
|
|
1971
|
+
responseStatus: 200,
|
|
1972
|
+
durationMs: Date.now() - requestStartTime,
|
|
1973
|
+
});
|
|
1974
|
+
logProxyBody({
|
|
1975
|
+
phase: "client_response",
|
|
1976
|
+
headers: responseHeaders,
|
|
1977
|
+
body: clientBody.text,
|
|
1978
|
+
bodySize: clientBody.totalBytes,
|
|
1979
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
1980
|
+
account: capturedAccountLabel,
|
|
1981
|
+
accountType: account.type,
|
|
1982
|
+
attempt: attemptNumber,
|
|
1983
|
+
responseStatus: 200,
|
|
1984
|
+
durationMs: Date.now() - requestStartTime,
|
|
1985
|
+
});
|
|
1986
|
+
})
|
|
1987
|
+
.catch((err) => {
|
|
1988
|
+
capturedTracer.setError("stream_error", err instanceof Error ? err.message : String(err));
|
|
1989
|
+
capturedUpstreamSpan?.end();
|
|
1990
|
+
capturedTracer.end(500, Date.now() - requestStartTime);
|
|
1991
|
+
recordFinalError(500, capturedAccountLabel, account.type);
|
|
1992
|
+
// Log the streaming error in JSONL
|
|
1993
|
+
logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", err instanceof Error ? err.message : String(err));
|
|
1994
|
+
});
|
|
1995
|
+
}
|
|
1996
|
+
catch {
|
|
1997
|
+
// Interceptor attachment failed after stream setup; response handling continues.
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
else {
|
|
2001
|
+
// No tracer — still intercept stream for JSONL token logging
|
|
2002
|
+
upstreamSpan?.end();
|
|
2003
|
+
try {
|
|
2004
|
+
const { stream: noTracerInterceptor, telemetry: noTracerTelemetry } = createSSEInterceptor({
|
|
2005
|
+
captureRawText: true,
|
|
2006
|
+
});
|
|
2007
|
+
streamSource = streamSource.pipeThrough(noTracerInterceptor);
|
|
2008
|
+
const capturedAccountLabel = account.label;
|
|
2009
|
+
Promise.all([noTracerTelemetry, clientCapture])
|
|
2010
|
+
.then(([data, clientBody]) => {
|
|
2011
|
+
recordFinalSuccess(capturedAccountLabel, account.type);
|
|
2012
|
+
logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
|
|
2013
|
+
inputTokens: data.usage.inputTokens,
|
|
2014
|
+
outputTokens: data.usage.outputTokens,
|
|
2015
|
+
cacheCreationTokens: data.usage.cacheCreationInputTokens,
|
|
2016
|
+
cacheReadTokens: data.usage.cacheReadInputTokens,
|
|
2017
|
+
});
|
|
2018
|
+
logProxyBody({
|
|
2019
|
+
phase: "upstream_response",
|
|
2020
|
+
headers: respHeaders,
|
|
2021
|
+
body: data.rawText ?? "",
|
|
2022
|
+
bodySize: data.totalBytesReceived,
|
|
2023
|
+
contentType: respHeaders["content-type"] ?? "text/event-stream",
|
|
2024
|
+
account: capturedAccountLabel,
|
|
2025
|
+
accountType: account.type,
|
|
2026
|
+
attempt: attemptNumber,
|
|
2027
|
+
responseStatus: 200,
|
|
2028
|
+
durationMs: Date.now() - requestStartTime,
|
|
2029
|
+
});
|
|
2030
|
+
logProxyBody({
|
|
2031
|
+
phase: "client_response",
|
|
2032
|
+
headers: responseHeaders,
|
|
2033
|
+
body: clientBody.text,
|
|
2034
|
+
bodySize: clientBody.totalBytes,
|
|
2035
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
2036
|
+
account: capturedAccountLabel,
|
|
2037
|
+
accountType: account.type,
|
|
2038
|
+
attempt: attemptNumber,
|
|
2039
|
+
responseStatus: 200,
|
|
2040
|
+
durationMs: Date.now() - requestStartTime,
|
|
2041
|
+
});
|
|
2042
|
+
})
|
|
2043
|
+
.catch(() => {
|
|
2044
|
+
recordFinalSuccess(account.label, account.type);
|
|
2045
|
+
logFinalRequest(response.status, account.label, account.type);
|
|
2046
|
+
});
|
|
2047
|
+
}
|
|
2048
|
+
catch {
|
|
2049
|
+
// SSE interceptor creation failed — log without tokens
|
|
2050
|
+
clientCapture
|
|
2051
|
+
.then((clientBody) => {
|
|
2052
|
+
logProxyBody({
|
|
2053
|
+
phase: "client_response",
|
|
2054
|
+
headers: responseHeaders,
|
|
2055
|
+
body: clientBody.text,
|
|
2056
|
+
bodySize: clientBody.totalBytes,
|
|
2057
|
+
contentType: responseHeaders["content-type"] ?? "text/event-stream",
|
|
2058
|
+
account: account.label,
|
|
2059
|
+
accountType: account.type,
|
|
2060
|
+
attempt: attemptNumber,
|
|
2061
|
+
responseStatus: 200,
|
|
2062
|
+
durationMs: Date.now() - requestStartTime,
|
|
2063
|
+
});
|
|
2064
|
+
})
|
|
2065
|
+
.catch(() => {
|
|
2066
|
+
// Non-fatal
|
|
2067
|
+
});
|
|
2068
|
+
recordFinalSuccess(account.label, account.type);
|
|
2069
|
+
logFinalRequest(response.status, account.label, account.type);
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
const clientStream = streamSource.pipeThrough(clientCaptureStream);
|
|
1115
2073
|
// Forward rate limit headers from Anthropic.
|
|
1116
2074
|
const responseHeaders = {
|
|
1117
2075
|
"content-type": "text/event-stream",
|
|
@@ -1126,20 +2084,124 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1126
2084
|
"anthropic-ratelimit-tokens-limit",
|
|
1127
2085
|
]) {
|
|
1128
2086
|
const val = response.headers.get(h);
|
|
1129
|
-
// eslint-disable-next-line max-depth
|
|
1130
2087
|
if (val) {
|
|
1131
2088
|
responseHeaders[h] = val;
|
|
1132
2089
|
}
|
|
1133
2090
|
}
|
|
1134
|
-
return new Response(
|
|
2091
|
+
return new Response(clientStream, {
|
|
1135
2092
|
status: response.status,
|
|
1136
2093
|
headers: responseHeaders,
|
|
1137
2094
|
});
|
|
1138
2095
|
}
|
|
1139
|
-
|
|
2096
|
+
upstreamSpan?.end();
|
|
2097
|
+
tracer?.setError("stream_error", "No response body from upstream");
|
|
2098
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
2099
|
+
recordFinalError(502, account.label, account.type);
|
|
2100
|
+
logFinalRequest(502, account.label, account.type, "stream_error", "No response body from upstream");
|
|
2101
|
+
const clientError = buildClaudeError(502, "No response body from upstream");
|
|
2102
|
+
logProxyBody({
|
|
2103
|
+
phase: "client_response",
|
|
2104
|
+
headers: { "content-type": "application/json" },
|
|
2105
|
+
body: JSON.stringify(clientError),
|
|
2106
|
+
bodySize: Buffer.byteLength(JSON.stringify(clientError), "utf8"),
|
|
2107
|
+
contentType: "application/json",
|
|
2108
|
+
account: account.label,
|
|
2109
|
+
accountType: account.type,
|
|
2110
|
+
attempt: attemptNumber,
|
|
2111
|
+
responseStatus: 502,
|
|
2112
|
+
durationMs: Date.now() - requestStartTime,
|
|
2113
|
+
});
|
|
2114
|
+
return clientError;
|
|
1140
2115
|
}
|
|
1141
2116
|
// Non-streaming: return JSON directly.
|
|
1142
|
-
|
|
2117
|
+
// OTel: extract usage from response JSON before returning.
|
|
2118
|
+
const responseText = await response.text();
|
|
2119
|
+
tracer?.logUpstreamResponseBody(responseText);
|
|
2120
|
+
logProxyBody({
|
|
2121
|
+
phase: "upstream_response",
|
|
2122
|
+
headers: respHeaders,
|
|
2123
|
+
body: responseText,
|
|
2124
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
2125
|
+
contentType: respHeaders["content-type"] ?? "application/json",
|
|
2126
|
+
account: account.label,
|
|
2127
|
+
accountType: account.type,
|
|
2128
|
+
attempt: attemptNumber,
|
|
2129
|
+
responseStatus: response.status,
|
|
2130
|
+
durationMs: Date.now() - fetchStartMs,
|
|
2131
|
+
});
|
|
2132
|
+
logProxyBody({
|
|
2133
|
+
phase: "client_response",
|
|
2134
|
+
headers: respHeaders,
|
|
2135
|
+
body: responseText,
|
|
2136
|
+
bodySize: Buffer.byteLength(responseText, "utf8"),
|
|
2137
|
+
contentType: respHeaders["content-type"] ?? "application/json",
|
|
2138
|
+
account: account.label,
|
|
2139
|
+
accountType: account.type,
|
|
2140
|
+
attempt: attemptNumber,
|
|
2141
|
+
responseStatus: response.status,
|
|
2142
|
+
durationMs: Date.now() - requestStartTime,
|
|
2143
|
+
});
|
|
2144
|
+
const responseJson = JSON.parse(responseText);
|
|
2145
|
+
if (tracer && responseJson && typeof responseJson === "object") {
|
|
2146
|
+
const usage = responseJson.usage;
|
|
2147
|
+
if (usage) {
|
|
2148
|
+
tracer.setUsage({
|
|
2149
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
2150
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
2151
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
2152
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
2153
|
+
});
|
|
2154
|
+
// Extract rate limits from response headers
|
|
2155
|
+
const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
|
|
2156
|
+
const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
|
|
2157
|
+
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
|
|
2158
|
+
const usageWithRates = {
|
|
2159
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
2160
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
2161
|
+
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
|
|
2162
|
+
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
|
|
2163
|
+
};
|
|
2164
|
+
if (!isNaN(rateLimit5h)) {
|
|
2165
|
+
usageWithRates.rateLimitAfter5h = rateLimit5h;
|
|
2166
|
+
}
|
|
2167
|
+
if (!isNaN(rateLimit7d)) {
|
|
2168
|
+
usageWithRates.rateLimitAfter7d = rateLimit7d;
|
|
2169
|
+
}
|
|
2170
|
+
tracer.setUsage(usageWithRates);
|
|
2171
|
+
}
|
|
2172
|
+
}
|
|
2173
|
+
tracer.recordMetrics();
|
|
2174
|
+
const responseJsonStr = JSON.stringify(responseJson);
|
|
2175
|
+
tracer.recordBodySizes(finalBodyStr.length, responseJsonStr.length);
|
|
2176
|
+
upstreamSpan?.end();
|
|
2177
|
+
tracer.end(response.status, Date.now() - requestStartTime);
|
|
2178
|
+
recordFinalSuccess(account.label, account.type);
|
|
2179
|
+
logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
|
|
2180
|
+
inputTokens: usage?.input_tokens,
|
|
2181
|
+
outputTokens: usage?.output_tokens,
|
|
2182
|
+
cacheCreationTokens: usage?.cache_creation_input_tokens,
|
|
2183
|
+
cacheReadTokens: usage?.cache_read_input_tokens,
|
|
2184
|
+
});
|
|
2185
|
+
}
|
|
2186
|
+
else {
|
|
2187
|
+
upstreamSpan?.end();
|
|
2188
|
+
// No tracer — still extract usage from response JSON for JSONL logging
|
|
2189
|
+
const noTracerUsage = responseJson && typeof responseJson === "object"
|
|
2190
|
+
? responseJson.usage
|
|
2191
|
+
: undefined;
|
|
2192
|
+
recordFinalSuccess(account.label, account.type);
|
|
2193
|
+
logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
|
|
2194
|
+
inputTokens: noTracerUsage?.input_tokens,
|
|
2195
|
+
outputTokens: noTracerUsage?.output_tokens,
|
|
2196
|
+
cacheCreationTokens: noTracerUsage?.cache_creation_input_tokens,
|
|
2197
|
+
cacheReadTokens: noTracerUsage?.cache_read_input_tokens,
|
|
2198
|
+
});
|
|
2199
|
+
}
|
|
2200
|
+
return responseJson;
|
|
2201
|
+
}
|
|
2202
|
+
// OTel: end account selection span if all accounts were skipped
|
|
2203
|
+
if (attemptNumber === 0) {
|
|
2204
|
+
acctSelectionSpan?.end();
|
|
1143
2205
|
}
|
|
1144
2206
|
// All accounts exhausted — compute earliest recovery time.
|
|
1145
2207
|
const earliestRecovery = orderedAccounts.reduce((min, account) => {
|
|
@@ -1152,42 +2214,18 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1152
2214
|
// Try fallback chain (alternative providers)
|
|
1153
2215
|
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
1154
2216
|
for (const fallback of chain) {
|
|
2217
|
+
const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
|
|
2218
|
+
if (!availability.available) {
|
|
2219
|
+
logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: ${availability.reason ?? "provider unavailable"}`);
|
|
2220
|
+
continue;
|
|
2221
|
+
}
|
|
1155
2222
|
try {
|
|
1156
2223
|
logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
|
|
1157
2224
|
const parsed = parseClaudeRequest(body);
|
|
1158
|
-
const opts = {
|
|
1159
|
-
input: {
|
|
1160
|
-
text: parsed.prompt,
|
|
1161
|
-
...(parsed.images.length > 0
|
|
1162
|
-
? { images: parsed.images }
|
|
1163
|
-
: {}),
|
|
1164
|
-
},
|
|
2225
|
+
const opts = buildProxyFallbackOptions(parsed, {
|
|
1165
2226
|
provider: fallback.provider,
|
|
1166
2227
|
model: fallback.model,
|
|
1167
|
-
|
|
1168
|
-
maxTokens: parsed.maxTokens,
|
|
1169
|
-
...(parsed.temperature !== undefined
|
|
1170
|
-
? { temperature: parsed.temperature }
|
|
1171
|
-
: {}),
|
|
1172
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1173
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1174
|
-
...(parsed.stopSequences?.length
|
|
1175
|
-
? { stopSequences: parsed.stopSequences }
|
|
1176
|
-
: {}),
|
|
1177
|
-
tools: parsed.tools,
|
|
1178
|
-
...(parsed.toolChoice
|
|
1179
|
-
? { toolChoice: parsed.toolChoice }
|
|
1180
|
-
: {}),
|
|
1181
|
-
...(parsed.thinkingConfig
|
|
1182
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1183
|
-
: {}),
|
|
1184
|
-
...(parsed.conversationMessages?.length
|
|
1185
|
-
? {
|
|
1186
|
-
conversationMessages: parsed.conversationMessages.slice(0, -1),
|
|
1187
|
-
}
|
|
1188
|
-
: {}),
|
|
1189
|
-
maxSteps: 1,
|
|
1190
|
-
};
|
|
2228
|
+
});
|
|
1191
2229
|
if (body.stream) {
|
|
1192
2230
|
const streamResult = await ctx.neurolink.stream(opts);
|
|
1193
2231
|
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
@@ -1195,33 +2233,38 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1195
2233
|
for (const frame of serializer.start()) {
|
|
1196
2234
|
yield frame;
|
|
1197
2235
|
}
|
|
2236
|
+
let collectedText = "";
|
|
1198
2237
|
for await (const chunk of streamResult.stream) {
|
|
1199
2238
|
const text = extractText(chunk);
|
|
1200
2239
|
if (text) {
|
|
2240
|
+
collectedText += text;
|
|
1201
2241
|
for (const frame of serializer.pushDelta(text)) {
|
|
1202
2242
|
yield frame;
|
|
1203
2243
|
}
|
|
1204
2244
|
}
|
|
1205
2245
|
}
|
|
1206
2246
|
// Emit tool_use blocks if model wants to call tools
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
2247
|
+
const toolCalls = streamResult.toolCalls ?? [];
|
|
2248
|
+
if (!hasTranslatedOutput(collectedText, toolCalls)) {
|
|
2249
|
+
throw new Error(`Translated provider ${fallback.provider}/${fallback.model} returned no content or tool calls`);
|
|
2250
|
+
}
|
|
2251
|
+
if (toolCalls.length) {
|
|
2252
|
+
for (const tc of toolCalls) {
|
|
2253
|
+
const toolName = tc.toolName ?? tc.name ?? "unknown";
|
|
2254
|
+
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
|
|
1216
2255
|
yield frame;
|
|
1217
2256
|
}
|
|
1218
2257
|
}
|
|
1219
2258
|
}
|
|
1220
2259
|
const reason = streamResult.finishReason ?? "end_turn";
|
|
1221
|
-
|
|
2260
|
+
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
|
|
2261
|
+
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
|
|
1222
2262
|
yield frame;
|
|
1223
2263
|
}
|
|
1224
2264
|
}
|
|
2265
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
2266
|
+
recordFinalSuccess();
|
|
2267
|
+
logFinalRequest(200, "", fallback.provider);
|
|
1225
2268
|
return sseGenerator();
|
|
1226
2269
|
}
|
|
1227
2270
|
const streamResult = await ctx.neurolink.stream(opts);
|
|
@@ -1232,66 +2275,50 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1232
2275
|
collectedText += text;
|
|
1233
2276
|
}
|
|
1234
2277
|
}
|
|
2278
|
+
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
|
|
2279
|
+
throw new Error(`Translated provider ${fallback.provider}/${fallback.model} returned no content or tool calls`);
|
|
2280
|
+
}
|
|
1235
2281
|
const internal = {
|
|
1236
2282
|
content: collectedText,
|
|
1237
2283
|
model: streamResult.model,
|
|
1238
2284
|
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1239
2285
|
reasoning: undefined,
|
|
1240
|
-
usage: streamResult.usage
|
|
1241
|
-
? {
|
|
1242
|
-
input: streamResult.usage.input ??
|
|
1243
|
-
0,
|
|
1244
|
-
output: streamResult.usage
|
|
1245
|
-
.output ?? 0,
|
|
1246
|
-
total: streamResult.usage.total ??
|
|
1247
|
-
0,
|
|
1248
|
-
}
|
|
1249
|
-
: undefined,
|
|
2286
|
+
usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
|
|
1250
2287
|
toolCalls: streamResult.toolCalls,
|
|
1251
2288
|
};
|
|
1252
|
-
|
|
2289
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
2290
|
+
recordFinalSuccess();
|
|
2291
|
+
const clientResponse = serializeClaudeResponse(internal, body.model);
|
|
2292
|
+
logFinalRequest(200, "", fallback.provider, undefined, undefined, {
|
|
2293
|
+
inputTokens: internal.usage?.input,
|
|
2294
|
+
outputTokens: internal.usage?.output,
|
|
2295
|
+
});
|
|
2296
|
+
const clientResponseText = JSON.stringify(clientResponse);
|
|
2297
|
+
logProxyBody({
|
|
2298
|
+
phase: "client_response",
|
|
2299
|
+
headers: { "content-type": "application/json" },
|
|
2300
|
+
body: clientResponseText,
|
|
2301
|
+
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
|
|
2302
|
+
contentType: "application/json",
|
|
2303
|
+
responseStatus: 200,
|
|
2304
|
+
durationMs: Date.now() - requestStartTime,
|
|
2305
|
+
});
|
|
2306
|
+
return clientResponse;
|
|
1253
2307
|
}
|
|
1254
2308
|
catch (fallbackErr) {
|
|
1255
2309
|
logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1256
|
-
continue;
|
|
1257
2310
|
}
|
|
1258
2311
|
}
|
|
1259
2312
|
// If no explicit fallback chain is configured, try SDK auto-provider fallback.
|
|
1260
|
-
|
|
2313
|
+
// Skip auto-provider when all accounts are rate-limited — the client
|
|
2314
|
+
// (e.g. Claude Code) understands 429 + Retry-After and will retry on
|
|
2315
|
+
// its own. Silently routing to a different provider (e.g. OpenAI)
|
|
2316
|
+
// produces confusing errors like "insufficient_quota".
|
|
2317
|
+
if (chain.length === 0 && !sawRateLimit) {
|
|
1261
2318
|
try {
|
|
1262
2319
|
logger.always("[proxy] fallback → auto-provider");
|
|
1263
2320
|
const parsed = parseClaudeRequest(body);
|
|
1264
|
-
const opts =
|
|
1265
|
-
input: {
|
|
1266
|
-
text: parsed.prompt,
|
|
1267
|
-
...(parsed.images.length > 0
|
|
1268
|
-
? { images: parsed.images }
|
|
1269
|
-
: {}),
|
|
1270
|
-
},
|
|
1271
|
-
systemPrompt: parsed.systemPrompt,
|
|
1272
|
-
maxTokens: parsed.maxTokens,
|
|
1273
|
-
...(parsed.temperature !== undefined
|
|
1274
|
-
? { temperature: parsed.temperature }
|
|
1275
|
-
: {}),
|
|
1276
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1277
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1278
|
-
...(parsed.stopSequences?.length
|
|
1279
|
-
? { stopSequences: parsed.stopSequences }
|
|
1280
|
-
: {}),
|
|
1281
|
-
tools: parsed.tools,
|
|
1282
|
-
...(parsed.toolChoice
|
|
1283
|
-
? { toolChoice: parsed.toolChoice }
|
|
1284
|
-
: {}),
|
|
1285
|
-
...(parsed.thinkingConfig
|
|
1286
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1287
|
-
: {}),
|
|
1288
|
-
...(parsed.conversationMessages?.length
|
|
1289
|
-
? {
|
|
1290
|
-
conversationMessages: parsed.conversationMessages.slice(0, -1),
|
|
1291
|
-
}
|
|
1292
|
-
: {}),
|
|
1293
|
-
maxSteps: 1,
|
|
1294
|
-
};
|
|
2321
|
+
const opts = buildProxyFallbackOptions(parsed);
|
|
1295
2322
|
if (body.stream) {
|
|
1296
2323
|
const streamResult = await ctx.neurolink.stream(opts);
|
|
1297
2324
|
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
@@ -1299,33 +2326,38 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1299
2326
|
for (const frame of serializer.start()) {
|
|
1300
2327
|
yield frame;
|
|
1301
2328
|
}
|
|
2329
|
+
let collectedText = "";
|
|
1302
2330
|
for await (const chunk of streamResult.stream) {
|
|
1303
2331
|
const text = extractText(chunk);
|
|
1304
2332
|
if (text) {
|
|
2333
|
+
collectedText += text;
|
|
1305
2334
|
for (const frame of serializer.pushDelta(text)) {
|
|
1306
2335
|
yield frame;
|
|
1307
2336
|
}
|
|
1308
2337
|
}
|
|
1309
2338
|
}
|
|
1310
2339
|
// Emit tool_use blocks if model wants to call tools
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
2340
|
+
const toolCalls = streamResult.toolCalls ?? [];
|
|
2341
|
+
if (!hasTranslatedOutput(collectedText, toolCalls)) {
|
|
2342
|
+
throw new Error("Translated provider auto-provider returned no content or tool calls");
|
|
2343
|
+
}
|
|
2344
|
+
if (toolCalls.length) {
|
|
2345
|
+
for (const tc of toolCalls) {
|
|
2346
|
+
const toolName = tc.toolName ?? tc.name ?? "unknown";
|
|
2347
|
+
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
|
|
1320
2348
|
yield frame;
|
|
1321
2349
|
}
|
|
1322
2350
|
}
|
|
1323
2351
|
}
|
|
1324
2352
|
const reason = streamResult.finishReason ?? "end_turn";
|
|
1325
|
-
|
|
2353
|
+
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
|
|
2354
|
+
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
|
|
1326
2355
|
yield frame;
|
|
1327
2356
|
}
|
|
1328
2357
|
}
|
|
2358
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
2359
|
+
recordFinalSuccess();
|
|
2360
|
+
logFinalRequest(200, "", "auto-provider");
|
|
1329
2361
|
return sseGenerator();
|
|
1330
2362
|
}
|
|
1331
2363
|
const streamResult = await ctx.neurolink.stream(opts);
|
|
@@ -1336,48 +2368,102 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1336
2368
|
collectedText += text;
|
|
1337
2369
|
}
|
|
1338
2370
|
}
|
|
2371
|
+
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
|
|
2372
|
+
throw new Error("Translated provider auto-provider returned no content or tool calls");
|
|
2373
|
+
}
|
|
1339
2374
|
const internal = {
|
|
1340
2375
|
content: collectedText,
|
|
1341
2376
|
model: streamResult.model,
|
|
1342
2377
|
finishReason: streamResult.finishReason ?? "end_turn",
|
|
1343
2378
|
reasoning: undefined,
|
|
1344
|
-
usage: streamResult.usage
|
|
1345
|
-
? {
|
|
1346
|
-
input: streamResult.usage.input ??
|
|
1347
|
-
0,
|
|
1348
|
-
output: streamResult.usage
|
|
1349
|
-
.output ?? 0,
|
|
1350
|
-
total: streamResult.usage.total ??
|
|
1351
|
-
0,
|
|
1352
|
-
}
|
|
1353
|
-
: undefined,
|
|
2379
|
+
usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
|
|
1354
2380
|
toolCalls: streamResult.toolCalls,
|
|
1355
2381
|
};
|
|
1356
|
-
|
|
2382
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
2383
|
+
recordFinalSuccess();
|
|
2384
|
+
const clientResponse = serializeClaudeResponse(internal, body.model);
|
|
2385
|
+
logFinalRequest(200, "", "auto-provider", undefined, undefined, {
|
|
2386
|
+
inputTokens: internal.usage?.input,
|
|
2387
|
+
outputTokens: internal.usage?.output,
|
|
2388
|
+
});
|
|
2389
|
+
const clientResponseText = JSON.stringify(clientResponse);
|
|
2390
|
+
logProxyBody({
|
|
2391
|
+
phase: "client_response",
|
|
2392
|
+
headers: { "content-type": "application/json" },
|
|
2393
|
+
body: clientResponseText,
|
|
2394
|
+
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
|
|
2395
|
+
contentType: "application/json",
|
|
2396
|
+
responseStatus: 200,
|
|
2397
|
+
durationMs: Date.now() - requestStartTime,
|
|
2398
|
+
});
|
|
2399
|
+
return clientResponse;
|
|
1357
2400
|
}
|
|
1358
2401
|
catch (fallbackErr) {
|
|
1359
|
-
logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error
|
|
1360
|
-
? fallbackErr.message
|
|
1361
|
-
: String(fallbackErr)}`);
|
|
2402
|
+
logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
|
|
1362
2403
|
}
|
|
1363
2404
|
}
|
|
1364
2405
|
if (authFailureMessage && !sawRateLimit) {
|
|
1365
|
-
|
|
2406
|
+
tracer?.setError("authentication_error", authFailureMessage);
|
|
2407
|
+
tracer?.end(401, Date.now() - requestStartTime);
|
|
2408
|
+
return buildLoggedClaudeError(401, authFailureMessage);
|
|
2409
|
+
}
|
|
2410
|
+
if (invalidRequestFailure) {
|
|
2411
|
+
tracer?.setError("invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
|
|
2412
|
+
tracer?.end(invalidRequestFailure.status, Date.now() - requestStartTime);
|
|
2413
|
+
recordFinalError(invalidRequestFailure.status);
|
|
2414
|
+
try {
|
|
2415
|
+
const parsedError = JSON.parse(invalidRequestFailure.body);
|
|
2416
|
+
logFinalRequest(invalidRequestFailure.status, "", "final", "invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
|
|
2417
|
+
logProxyBody({
|
|
2418
|
+
phase: "client_response",
|
|
2419
|
+
headers: {
|
|
2420
|
+
"content-type": invalidRequestFailure.contentType ?? "application/json",
|
|
2421
|
+
},
|
|
2422
|
+
body: invalidRequestFailure.body,
|
|
2423
|
+
bodySize: Buffer.byteLength(invalidRequestFailure.body, "utf8"),
|
|
2424
|
+
contentType: invalidRequestFailure.contentType ?? "application/json",
|
|
2425
|
+
responseStatus: invalidRequestFailure.status,
|
|
2426
|
+
durationMs: Date.now() - requestStartTime,
|
|
2427
|
+
});
|
|
2428
|
+
return parsedError;
|
|
2429
|
+
}
|
|
2430
|
+
catch {
|
|
2431
|
+
return buildLoggedClaudeError(invalidRequestFailure.status, summarizeErrorMessage(invalidRequestFailure.body), "invalid_request_error");
|
|
2432
|
+
}
|
|
1366
2433
|
}
|
|
1367
2434
|
if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
2435
|
+
const msg = `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error ? lastError.message : String(lastError ?? "unknown")}`;
|
|
2436
|
+
tracer?.setError("transient_error", msg.slice(0, 500));
|
|
2437
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
2438
|
+
return buildLoggedClaudeError(502, msg);
|
|
1371
2439
|
}
|
|
1372
2440
|
if (!sawRateLimit) {
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
2441
|
+
const msg = `All Anthropic accounts failed. Last error: ${lastError instanceof Error ? lastError.message : String(lastError ?? "unknown")}`;
|
|
2442
|
+
tracer?.setError("all_accounts_failed", msg.slice(0, 500));
|
|
2443
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
2444
|
+
return buildLoggedClaudeError(502, msg);
|
|
1376
2445
|
}
|
|
1377
2446
|
// All accounts AND all fallbacks exhausted — return 429 with Retry-After
|
|
1378
2447
|
logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
|
|
1379
2448
|
const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
|
|
1380
|
-
|
|
2449
|
+
tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
|
|
2450
|
+
tracer?.end(429, Date.now() - requestStartTime);
|
|
2451
|
+
recordFinalError(429);
|
|
2452
|
+
logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
|
|
2453
|
+
const errorBodyText = JSON.stringify(errorBody);
|
|
2454
|
+
logProxyBody({
|
|
2455
|
+
phase: "client_response",
|
|
2456
|
+
headers: {
|
|
2457
|
+
"content-type": "application/json",
|
|
2458
|
+
"retry-after": String(retryAfterSec),
|
|
2459
|
+
},
|
|
2460
|
+
body: errorBodyText,
|
|
2461
|
+
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
|
|
2462
|
+
contentType: "application/json",
|
|
2463
|
+
responseStatus: 429,
|
|
2464
|
+
durationMs: Date.now() - requestStartTime,
|
|
2465
|
+
});
|
|
2466
|
+
return new Response(errorBodyText, {
|
|
1381
2467
|
status: 429,
|
|
1382
2468
|
headers: {
|
|
1383
2469
|
"content-type": "application/json",
|
|
@@ -1387,40 +2473,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1387
2473
|
}
|
|
1388
2474
|
else {
|
|
1389
2475
|
// ─── TRANSLATION MODE (Claude → Other Provider) ───────
|
|
2476
|
+
tracer?.setMode("full");
|
|
1390
2477
|
// Parse into NeuroLink format, call generate/stream, serialize back
|
|
1391
2478
|
const parsed = parseClaudeRequest(body);
|
|
1392
|
-
const
|
|
1393
|
-
const options = {
|
|
1394
|
-
input: {
|
|
1395
|
-
text: parsed.prompt,
|
|
1396
|
-
...(parsed.images.length > 0
|
|
1397
|
-
? { images: parsed.images }
|
|
1398
|
-
: {}),
|
|
1399
|
-
},
|
|
2479
|
+
const attempts = buildProxyTranslationAttempts({
|
|
1400
2480
|
provider: route.provider,
|
|
1401
2481
|
model: route.model,
|
|
1402
|
-
|
|
1403
|
-
maxTokens: parsed.maxTokens,
|
|
1404
|
-
...(parsed.temperature !== undefined
|
|
1405
|
-
? { temperature: parsed.temperature }
|
|
1406
|
-
: {}),
|
|
1407
|
-
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
1408
|
-
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
1409
|
-
...(parsed.stopSequences?.length
|
|
1410
|
-
? { stopSequences: parsed.stopSequences }
|
|
1411
|
-
: {}),
|
|
1412
|
-
...(parsed.thinkingConfig
|
|
1413
|
-
? { thinkingConfig: parsed.thinkingConfig }
|
|
1414
|
-
: {}),
|
|
1415
|
-
tools: parsed.tools,
|
|
1416
|
-
...(parsed.toolChoice ? { toolChoice: parsed.toolChoice } : {}),
|
|
1417
|
-
maxSteps: 1,
|
|
1418
|
-
...(historyMessages.length > 0
|
|
1419
|
-
? { conversationMessages: historyMessages }
|
|
1420
|
-
: {}),
|
|
1421
|
-
};
|
|
2482
|
+
}, modelRouter);
|
|
1422
2483
|
if (body.stream) {
|
|
1423
|
-
const streamResult = await ctx.neurolink.stream(options);
|
|
1424
2484
|
const serializer = new ClaudeStreamSerializer(body.model, 0);
|
|
1425
2485
|
const KEEPALIVE_INTERVAL_MS = 15_000; // 15 seconds
|
|
1426
2486
|
// Return a ReadableStream that emits SSE keep-alive comments
|
|
@@ -1429,6 +2489,9 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1429
2489
|
const encoder = new TextEncoder();
|
|
1430
2490
|
let translationKeepAliveTimer;
|
|
1431
2491
|
let translationCancelled = false;
|
|
2492
|
+
let translationSucceeded = false;
|
|
2493
|
+
let translatedModel;
|
|
2494
|
+
let finalStreamError = "No translation providers succeeded";
|
|
1432
2495
|
// Hold a reference to the upstream async iterator so
|
|
1433
2496
|
// we can abort it when the client disconnects.
|
|
1434
2497
|
let upstreamIterator;
|
|
@@ -1448,59 +2511,85 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1448
2511
|
}
|
|
1449
2512
|
}, KEEPALIVE_INTERVAL_MS);
|
|
1450
2513
|
try {
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
if (translationCancelled) {
|
|
1456
|
-
break;
|
|
2514
|
+
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
|
|
2515
|
+
const attempt = attempts[attemptIndex];
|
|
2516
|
+
if (attemptIndex > 0) {
|
|
2517
|
+
logger.always(`[proxy] fallback → ${attempt.label}`);
|
|
1457
2518
|
}
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
2519
|
+
let collectedText = "";
|
|
2520
|
+
try {
|
|
2521
|
+
const options = buildProxyFallbackOptions(parsed, attempt.provider
|
|
2522
|
+
? {
|
|
2523
|
+
provider: attempt.provider,
|
|
2524
|
+
model: attempt.model,
|
|
2525
|
+
}
|
|
2526
|
+
: {});
|
|
2527
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
2528
|
+
const iterable = streamResult.stream;
|
|
2529
|
+
upstreamIterator = iterable[Symbol.asyncIterator]();
|
|
2530
|
+
while (true) {
|
|
2531
|
+
if (translationCancelled) {
|
|
2532
|
+
break;
|
|
2533
|
+
}
|
|
2534
|
+
const { value: chunk, done } = await upstreamIterator.next();
|
|
2535
|
+
if (done) {
|
|
2536
|
+
break;
|
|
2537
|
+
}
|
|
2538
|
+
if (translationCancelled) {
|
|
2539
|
+
break;
|
|
2540
|
+
}
|
|
2541
|
+
const text = extractText(chunk);
|
|
2542
|
+
if (text) {
|
|
2543
|
+
collectedText += text;
|
|
2544
|
+
for (const frame of serializer.pushDelta(text)) {
|
|
2545
|
+
controller.enqueue(encoder.encode(frame));
|
|
2546
|
+
}
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
const toolCalls = streamResult.toolCalls ?? [];
|
|
2550
|
+
if (!hasTranslatedOutput(collectedText, toolCalls)) {
|
|
2551
|
+
finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`;
|
|
2552
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
|
|
2553
|
+
continue;
|
|
1469
2554
|
}
|
|
2555
|
+
if (!translationCancelled && toolCalls.length) {
|
|
2556
|
+
for (const tc of toolCalls) {
|
|
2557
|
+
const toolName = tc.toolName ?? tc.name ?? "unknown";
|
|
2558
|
+
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
|
|
2559
|
+
controller.enqueue(encoder.encode(frame));
|
|
2560
|
+
}
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
if (!translationCancelled) {
|
|
2564
|
+
const reason = streamResult.finishReason ?? "end_turn";
|
|
2565
|
+
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
|
|
2566
|
+
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
|
|
2567
|
+
controller.enqueue(encoder.encode(frame));
|
|
2568
|
+
}
|
|
2569
|
+
}
|
|
2570
|
+
translatedModel = streamResult.model;
|
|
2571
|
+
translationSucceeded = true;
|
|
2572
|
+
return;
|
|
1470
2573
|
}
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
"
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
{};
|
|
1482
|
-
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
|
|
1483
|
-
controller.enqueue(encoder.encode(frame));
|
|
2574
|
+
catch (streamErr) {
|
|
2575
|
+
if (translationCancelled) {
|
|
2576
|
+
return;
|
|
2577
|
+
}
|
|
2578
|
+
finalStreamError = streamErr instanceof Error ? streamErr.message : String(streamErr);
|
|
2579
|
+
if (collectedText.trim().length > 0) {
|
|
2580
|
+
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
|
|
2581
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
|
|
2582
|
+
controller.enqueue(encoder.encode(errorEvent));
|
|
2583
|
+
return;
|
|
1484
2584
|
}
|
|
2585
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`);
|
|
1485
2586
|
}
|
|
1486
2587
|
}
|
|
1487
2588
|
if (!translationCancelled) {
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
}
|
|
1492
|
-
}
|
|
1493
|
-
}
|
|
1494
|
-
catch (streamErr) {
|
|
1495
|
-
if (translationCancelled) {
|
|
1496
|
-
return;
|
|
2589
|
+
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
|
|
2590
|
+
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
|
|
2591
|
+
controller.enqueue(encoder.encode(errorEvent));
|
|
1497
2592
|
}
|
|
1498
|
-
const errMsg = streamErr instanceof Error
|
|
1499
|
-
? streamErr.message
|
|
1500
|
-
: String(streamErr);
|
|
1501
|
-
logger.always(`[proxy] mid-stream error (translation mode): ${errMsg}`);
|
|
1502
|
-
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
|
|
1503
|
-
controller.enqueue(encoder.encode(errorEvent));
|
|
1504
2593
|
}
|
|
1505
2594
|
finally {
|
|
1506
2595
|
if (translationKeepAliveTimer) {
|
|
@@ -1509,6 +2598,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1509
2598
|
if (!translationCancelled) {
|
|
1510
2599
|
controller.close();
|
|
1511
2600
|
}
|
|
2601
|
+
// OTel: record model substitution if proxy routed to a different model
|
|
2602
|
+
if (tracer && translatedModel && translatedModel !== body.model) {
|
|
2603
|
+
tracer.setModelSubstitution(body.model, translatedModel);
|
|
2604
|
+
}
|
|
2605
|
+
if (!translationSucceeded) {
|
|
2606
|
+
tracer?.setError("generation_error", finalStreamError.slice(0, 500));
|
|
2607
|
+
}
|
|
2608
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
1512
2609
|
}
|
|
1513
2610
|
},
|
|
1514
2611
|
cancel() {
|
|
@@ -1533,34 +2630,72 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1533
2630
|
},
|
|
1534
2631
|
});
|
|
1535
2632
|
}
|
|
1536
|
-
|
|
1537
|
-
let
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
collectedText += text;
|
|
2633
|
+
let lastAttemptError = "No translation providers succeeded";
|
|
2634
|
+
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
|
|
2635
|
+
const attempt = attempts[attemptIndex];
|
|
2636
|
+
if (attemptIndex > 0) {
|
|
2637
|
+
logger.always(`[proxy] fallback → ${attempt.label}`);
|
|
1542
2638
|
}
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
2639
|
+
try {
|
|
2640
|
+
const options = buildProxyFallbackOptions(parsed, attempt.provider
|
|
2641
|
+
? {
|
|
2642
|
+
provider: attempt.provider,
|
|
2643
|
+
model: attempt.model,
|
|
2644
|
+
}
|
|
2645
|
+
: {});
|
|
2646
|
+
const streamResult = await ctx.neurolink.stream(options);
|
|
2647
|
+
let collectedText = "";
|
|
2648
|
+
for await (const chunk of streamResult.stream) {
|
|
2649
|
+
const text = extractText(chunk);
|
|
2650
|
+
if (text) {
|
|
2651
|
+
collectedText += text;
|
|
2652
|
+
}
|
|
1554
2653
|
}
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
2654
|
+
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
|
|
2655
|
+
lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`;
|
|
2656
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
|
|
2657
|
+
continue;
|
|
2658
|
+
}
|
|
2659
|
+
const internal = {
|
|
2660
|
+
content: collectedText,
|
|
2661
|
+
model: streamResult.model,
|
|
2662
|
+
finishReason: streamResult.finishReason ?? "end_turn",
|
|
2663
|
+
reasoning: undefined,
|
|
2664
|
+
usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
|
|
2665
|
+
toolCalls: streamResult.toolCalls,
|
|
2666
|
+
};
|
|
2667
|
+
// OTel: record model substitution if proxy routed to a different model
|
|
2668
|
+
if (tracer && streamResult.model && streamResult.model !== body.model) {
|
|
2669
|
+
tracer.setModelSubstitution(body.model, streamResult.model);
|
|
2670
|
+
}
|
|
2671
|
+
tracer?.end(200, Date.now() - requestStartTime);
|
|
2672
|
+
const clientResponse = serializeClaudeResponse(internal, body.model);
|
|
2673
|
+
const clientResponseText = JSON.stringify(clientResponse);
|
|
2674
|
+
logProxyBody({
|
|
2675
|
+
phase: "client_response",
|
|
2676
|
+
headers: { "content-type": "application/json" },
|
|
2677
|
+
body: clientResponseText,
|
|
2678
|
+
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
|
|
2679
|
+
contentType: "application/json",
|
|
2680
|
+
responseStatus: 200,
|
|
2681
|
+
durationMs: Date.now() - requestStartTime,
|
|
2682
|
+
});
|
|
2683
|
+
return clientResponse;
|
|
2684
|
+
}
|
|
2685
|
+
catch (attemptError) {
|
|
2686
|
+
lastAttemptError = attemptError instanceof Error ? attemptError.message : String(attemptError);
|
|
2687
|
+
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`);
|
|
2688
|
+
}
|
|
2689
|
+
}
|
|
2690
|
+
throw new Error(lastAttemptError);
|
|
1559
2691
|
}
|
|
1560
2692
|
}
|
|
1561
2693
|
catch (error) {
|
|
1562
|
-
|
|
1563
|
-
|
|
2694
|
+
const errMsg = error instanceof Error ? error.message : String(error);
|
|
2695
|
+
logger.error(`[claude-proxy] Generation error for ${body.model}: ${errMsg}`);
|
|
2696
|
+
tracer?.setError("generation_error", errMsg.slice(0, 500));
|
|
2697
|
+
tracer?.end(502, Date.now() - requestStartTime);
|
|
2698
|
+
return buildLoggedClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
|
|
1564
2699
|
}
|
|
1565
2700
|
},
|
|
1566
2701
|
description: "Claude-compatible messages endpoint routed through NeuroLink",
|
|
@@ -1606,9 +2741,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1606
2741
|
}
|
|
1607
2742
|
// Simple estimation using character-to-token heuristic
|
|
1608
2743
|
const text = body.messages
|
|
1609
|
-
.map((m) => typeof m.content === "string"
|
|
1610
|
-
? m.content
|
|
1611
|
-
: JSON.stringify(m.content))
|
|
2744
|
+
.map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
|
|
1612
2745
|
.join(" ");
|
|
1613
2746
|
return { input_tokens: Math.ceil(text.length / 4) };
|
|
1614
2747
|
},
|
|
@@ -1621,6 +2754,26 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
|
|
|
1621
2754
|
// ---------------------------------------------------------------------------
|
|
1622
2755
|
// Helpers
|
|
1623
2756
|
// ---------------------------------------------------------------------------
|
|
2757
|
+
/**
|
|
2758
|
+
* Extract token usage from a StreamResult.usage object, handling multiple
|
|
2759
|
+
* naming conventions across AI SDK versions and providers:
|
|
2760
|
+
* - AI SDK v6: inputTokens / outputTokens
|
|
2761
|
+
* - AI SDK v4: promptTokens / completionTokens
|
|
2762
|
+
* - NeuroLink internal: input / output
|
|
2763
|
+
*/
|
|
2764
|
+
function extractUsageFromStreamResult(usage) {
|
|
2765
|
+
if (!usage || typeof usage !== "object") {
|
|
2766
|
+
return { input: 0, output: 0, total: 0 };
|
|
2767
|
+
}
|
|
2768
|
+
const u = usage;
|
|
2769
|
+
const input = (typeof u.inputTokens === "number" ? u.inputTokens : 0) ||
|
|
2770
|
+
(typeof u.promptTokens === "number" ? u.promptTokens : 0) ||
|
|
2771
|
+
(typeof u.input === "number" ? u.input : 0);
|
|
2772
|
+
const output = (typeof u.outputTokens === "number" ? u.outputTokens : 0) ||
|
|
2773
|
+
(typeof u.completionTokens === "number" ? u.completionTokens : 0) ||
|
|
2774
|
+
(typeof u.output === "number" ? u.output : 0);
|
|
2775
|
+
return { input, output, total: input + output };
|
|
2776
|
+
}
|
|
1624
2777
|
/**
|
|
1625
2778
|
* Extract text content from a stream chunk (handles various chunk formats).
|
|
1626
2779
|
*/
|
|
@@ -1744,24 +2897,17 @@ function isRetryableNetworkError(error) {
|
|
|
1744
2897
|
normalized.includes("fetch failed") ||
|
|
1745
2898
|
normalized.includes("socket hang up"));
|
|
1746
2899
|
}
|
|
1747
|
-
const TRANSIENT_HTTP_STATUSES = new Set([
|
|
1748
|
-
408, 500, 502, 503, 504, 520, 521, 522, 523, 524, 525, 526, 529,
|
|
1749
|
-
]);
|
|
2900
|
+
const TRANSIENT_HTTP_STATUSES = new Set([408, 500, 502, 503, 504, 520, 521, 522, 523, 524, 525, 526, 529]);
|
|
1750
2901
|
/**
|
|
1751
2902
|
* Parse a Claude error payload when available.
|
|
1752
2903
|
*/
|
|
1753
2904
|
export function parseClaudeErrorBody(errBody) {
|
|
1754
2905
|
try {
|
|
1755
2906
|
const parsed = JSON.parse(errBody);
|
|
1756
|
-
if (parsed &&
|
|
1757
|
-
parsed.type === "error" &&
|
|
1758
|
-
parsed.error &&
|
|
1759
|
-
typeof parsed.error === "object") {
|
|
2907
|
+
if (parsed && parsed.type === "error" && parsed.error && typeof parsed.error === "object") {
|
|
1760
2908
|
return {
|
|
1761
2909
|
errorType: typeof parsed.error.type === "string" ? parsed.error.type : undefined,
|
|
1762
|
-
message: typeof parsed.error.message === "string"
|
|
1763
|
-
? parsed.error.message
|
|
1764
|
-
: undefined,
|
|
2910
|
+
message: typeof parsed.error.message === "string" ? parsed.error.message : undefined,
|
|
1765
2911
|
};
|
|
1766
2912
|
}
|
|
1767
2913
|
}
|
|
@@ -1778,8 +2924,91 @@ export function isInvalidRequestError(status, errBody) {
|
|
|
1778
2924
|
return true;
|
|
1779
2925
|
}
|
|
1780
2926
|
const parsed = parseClaudeErrorBody(errBody);
|
|
1781
|
-
return
|
|
1782
|
-
|
|
2927
|
+
return parsed.errorType === "invalid_request_error" || errBody.includes("invalid_request_error");
|
|
2928
|
+
}
|
|
2929
|
+
function normalizeClaudeRequestForAnthropic(body) {
|
|
2930
|
+
return {
|
|
2931
|
+
...body,
|
|
2932
|
+
messages: body.messages.map((msg) => {
|
|
2933
|
+
if (typeof msg.content !== "string") {
|
|
2934
|
+
return msg;
|
|
2935
|
+
}
|
|
2936
|
+
return {
|
|
2937
|
+
...msg,
|
|
2938
|
+
content: [{ type: "text", text: msg.content }],
|
|
2939
|
+
};
|
|
2940
|
+
}),
|
|
2941
|
+
};
|
|
2942
|
+
}
|
|
2943
|
+
export function buildProxyFallbackOptions(parsed, overrides = {}) {
|
|
2944
|
+
const historyMessages = parsed.conversationMessages.slice(0, -1);
|
|
2945
|
+
const toolNames = Object.keys(parsed.tools);
|
|
2946
|
+
const toolChoice = parsed.toolChoiceName
|
|
2947
|
+
? { type: "tool", toolName: parsed.toolChoiceName }
|
|
2948
|
+
: parsed.toolChoice;
|
|
2949
|
+
return {
|
|
2950
|
+
input: {
|
|
2951
|
+
text: parsed.prompt,
|
|
2952
|
+
...(parsed.images.length > 0 ? { images: parsed.images } : {}),
|
|
2953
|
+
},
|
|
2954
|
+
...(overrides.provider ? { provider: overrides.provider } : {}),
|
|
2955
|
+
...(overrides.model ? { model: overrides.model } : {}),
|
|
2956
|
+
systemPrompt: parsed.systemPrompt,
|
|
2957
|
+
maxTokens: parsed.maxTokens,
|
|
2958
|
+
...(parsed.temperature !== undefined ? { temperature: parsed.temperature } : {}),
|
|
2959
|
+
...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
|
|
2960
|
+
...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
|
|
2961
|
+
...(parsed.stopSequences?.length ? { stopSequences: parsed.stopSequences } : {}),
|
|
2962
|
+
...(parsed.thinkingConfig ? { thinkingConfig: parsed.thinkingConfig } : {}),
|
|
2963
|
+
...(toolNames.length === 0 ? { disableTools: true } : {}),
|
|
2964
|
+
// Claude-compatible requests already declare the exact tool contract.
|
|
2965
|
+
// Filter out NeuroLink's built-in agent tools so translated fallbacks only
|
|
2966
|
+
// expose the tools the client actually knows how to handle.
|
|
2967
|
+
...(toolNames.length > 0
|
|
2968
|
+
? {
|
|
2969
|
+
tools: parsed.tools,
|
|
2970
|
+
toolFilter: toolNames,
|
|
2971
|
+
}
|
|
2972
|
+
: {}),
|
|
2973
|
+
...(toolChoice ? { toolChoice } : {}),
|
|
2974
|
+
...(historyMessages.length > 0 ? { conversationMessages: historyMessages } : {}),
|
|
2975
|
+
disableInternalFallback: true,
|
|
2976
|
+
skipToolPromptInjection: true,
|
|
2977
|
+
maxSteps: 1,
|
|
2978
|
+
};
|
|
2979
|
+
}
|
|
2980
|
+
function buildProxyTranslationAttempts(primary, modelRouter) {
|
|
2981
|
+
const attempts = [
|
|
2982
|
+
{
|
|
2983
|
+
provider: primary.provider,
|
|
2984
|
+
model: primary.model,
|
|
2985
|
+
label: `${primary.provider}/${primary.model ?? "unknown"}`,
|
|
2986
|
+
},
|
|
2987
|
+
];
|
|
2988
|
+
const chain = modelRouter?.getFallbackChain() ?? [];
|
|
2989
|
+
for (const fallback of chain) {
|
|
2990
|
+
if (fallback.provider === primary.provider && fallback.model === primary.model) {
|
|
2991
|
+
continue;
|
|
2992
|
+
}
|
|
2993
|
+
attempts.push({
|
|
2994
|
+
provider: fallback.provider,
|
|
2995
|
+
model: fallback.model,
|
|
2996
|
+
label: `${fallback.provider}/${fallback.model}`,
|
|
2997
|
+
});
|
|
2998
|
+
}
|
|
2999
|
+
if (chain.length === 0) {
|
|
3000
|
+
attempts.push({ label: "auto-provider" });
|
|
3001
|
+
}
|
|
3002
|
+
return attempts;
|
|
3003
|
+
}
|
|
3004
|
+
function hasTranslatedOutput(collectedText, toolCalls) {
|
|
3005
|
+
return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
|
|
3006
|
+
}
|
|
3007
|
+
function extractToolArgs(toolCall) {
|
|
3008
|
+
return (toolCall.args ??
|
|
3009
|
+
toolCall.parameters ??
|
|
3010
|
+
toolCall.input ??
|
|
3011
|
+
{});
|
|
1783
3012
|
}
|
|
1784
3013
|
/**
|
|
1785
3014
|
* Detect transient upstream failures that should trigger account/provider failover.
|