@juspay/neurolink 9.41.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +149 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +354 -334
  11. package/dist/cli/commands/mcp.d.ts +6 -0
  12. package/dist/cli/commands/mcp.js +188 -181
  13. package/dist/cli/commands/proxy.d.ts +2 -1
  14. package/dist/cli/commands/proxy.js +713 -431
  15. package/dist/cli/commands/task.js +3 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +4 -3
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/baseProvider.d.ts +6 -1
  22. package/dist/core/baseProvider.js +208 -230
  23. package/dist/core/factory.d.ts +3 -0
  24. package/dist/core/factory.js +138 -188
  25. package/dist/core/modules/GenerationHandler.js +3 -2
  26. package/dist/core/redisConversationMemoryManager.js +7 -3
  27. package/dist/evaluation/BatchEvaluator.js +4 -1
  28. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
  31. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  32. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  33. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  34. package/dist/evaluation/scorers/scorerRegistry.js +353 -282
  35. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  36. package/dist/lib/auth/anthropicOAuth.js +149 -4
  37. package/dist/lib/auth/providers/firebase.js +5 -1
  38. package/dist/lib/auth/providers/jwt.js +5 -1
  39. package/dist/lib/auth/providers/workos.js +5 -1
  40. package/dist/lib/auth/sessionManager.d.ts +1 -1
  41. package/dist/lib/auth/sessionManager.js +58 -27
  42. package/dist/lib/client/aiSdkAdapter.js +3 -0
  43. package/dist/lib/client/streamingClient.js +30 -10
  44. package/dist/lib/core/baseProvider.d.ts +6 -1
  45. package/dist/lib/core/baseProvider.js +208 -230
  46. package/dist/lib/core/factory.d.ts +3 -0
  47. package/dist/lib/core/factory.js +138 -188
  48. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  49. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  50. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  51. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  52. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  53. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
  54. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  55. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  56. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  57. package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
  58. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  59. package/dist/lib/mcp/toolRegistry.js +32 -31
  60. package/dist/lib/neurolink.d.ts +41 -2
  61. package/dist/lib/neurolink.js +1616 -1681
  62. package/dist/lib/observability/otelBridge.d.ts +2 -2
  63. package/dist/lib/observability/otelBridge.js +12 -3
  64. package/dist/lib/providers/amazonBedrock.js +2 -4
  65. package/dist/lib/providers/anthropic.d.ts +9 -5
  66. package/dist/lib/providers/anthropic.js +19 -14
  67. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  68. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  69. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  70. package/dist/lib/providers/azureOpenai.js +5 -4
  71. package/dist/lib/providers/googleAiStudio.js +30 -6
  72. package/dist/lib/providers/googleVertex.d.ts +10 -0
  73. package/dist/lib/providers/googleVertex.js +437 -423
  74. package/dist/lib/providers/huggingFace.d.ts +3 -3
  75. package/dist/lib/providers/huggingFace.js +6 -8
  76. package/dist/lib/providers/litellm.d.ts +1 -0
  77. package/dist/lib/providers/litellm.js +76 -55
  78. package/dist/lib/providers/mistral.js +2 -1
  79. package/dist/lib/providers/ollama.js +93 -23
  80. package/dist/lib/providers/openAI.d.ts +2 -0
  81. package/dist/lib/providers/openAI.js +141 -141
  82. package/dist/lib/providers/openRouter.js +2 -1
  83. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  84. package/dist/lib/providers/openaiCompatible.js +4 -4
  85. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  86. package/dist/lib/proxy/claudeFormat.js +27 -14
  87. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  88. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  89. package/dist/lib/proxy/modelRouter.js +3 -0
  90. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  91. package/dist/lib/proxy/oauthFetch.js +289 -316
  92. package/dist/lib/proxy/proxyConfig.js +46 -24
  93. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  94. package/dist/lib/proxy/proxyEnv.js +73 -0
  95. package/dist/lib/proxy/proxyFetch.js +291 -217
  96. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  97. package/dist/lib/proxy/proxyTracer.js +645 -0
  98. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  99. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  100. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  101. package/dist/lib/proxy/requestLogger.js +503 -47
  102. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  103. package/dist/lib/proxy/sseInterceptor.js +427 -0
  104. package/dist/lib/proxy/usageStats.d.ts +4 -3
  105. package/dist/lib/proxy/usageStats.js +25 -12
  106. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  107. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  108. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
  109. package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
  110. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  111. package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
  112. package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
  113. package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
  114. package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
  115. package/dist/lib/tasks/store/redisTaskStore.js +54 -39
  116. package/dist/lib/tasks/taskManager.d.ts +5 -0
  117. package/dist/lib/tasks/taskManager.js +158 -30
  118. package/dist/lib/telemetry/index.d.ts +2 -1
  119. package/dist/lib/telemetry/index.js +2 -1
  120. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  121. package/dist/lib/telemetry/telemetryService.js +69 -5
  122. package/dist/lib/types/cli.d.ts +10 -0
  123. package/dist/lib/types/proxyTypes.d.ts +160 -5
  124. package/dist/lib/types/streamTypes.d.ts +25 -3
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +19 -0
  127. package/dist/lib/utils/providerHealth.js +279 -33
  128. package/dist/lib/utils/providerUtils.js +17 -22
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/mcp/toolRegistry.d.ts +2 -0
  132. package/dist/mcp/toolRegistry.js +32 -31
  133. package/dist/neurolink.d.ts +41 -2
  134. package/dist/neurolink.js +1616 -1681
  135. package/dist/observability/otelBridge.d.ts +2 -2
  136. package/dist/observability/otelBridge.js +12 -3
  137. package/dist/providers/amazonBedrock.js +2 -4
  138. package/dist/providers/anthropic.d.ts +9 -5
  139. package/dist/providers/anthropic.js +19 -14
  140. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  141. package/dist/providers/anthropicBaseProvider.js +5 -4
  142. package/dist/providers/azureOpenai.d.ts +1 -1
  143. package/dist/providers/azureOpenai.js +5 -4
  144. package/dist/providers/googleAiStudio.js +30 -6
  145. package/dist/providers/googleVertex.d.ts +10 -0
  146. package/dist/providers/googleVertex.js +437 -423
  147. package/dist/providers/huggingFace.d.ts +3 -3
  148. package/dist/providers/huggingFace.js +6 -7
  149. package/dist/providers/litellm.d.ts +1 -0
  150. package/dist/providers/litellm.js +76 -55
  151. package/dist/providers/mistral.js +2 -1
  152. package/dist/providers/ollama.js +93 -23
  153. package/dist/providers/openAI.d.ts +2 -0
  154. package/dist/providers/openAI.js +141 -141
  155. package/dist/providers/openRouter.js +2 -1
  156. package/dist/providers/openaiCompatible.d.ts +4 -4
  157. package/dist/providers/openaiCompatible.js +4 -3
  158. package/dist/proxy/claudeFormat.d.ts +3 -2
  159. package/dist/proxy/claudeFormat.js +27 -14
  160. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  161. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  162. package/dist/proxy/modelRouter.js +3 -0
  163. package/dist/proxy/oauthFetch.d.ts +1 -1
  164. package/dist/proxy/oauthFetch.js +289 -316
  165. package/dist/proxy/proxyConfig.js +46 -24
  166. package/dist/proxy/proxyEnv.d.ts +19 -0
  167. package/dist/proxy/proxyEnv.js +72 -0
  168. package/dist/proxy/proxyFetch.js +291 -217
  169. package/dist/proxy/proxyTracer.d.ts +133 -0
  170. package/dist/proxy/proxyTracer.js +644 -0
  171. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  172. package/dist/proxy/rawStreamCapture.js +82 -0
  173. package/dist/proxy/requestLogger.d.ts +32 -5
  174. package/dist/proxy/requestLogger.js +503 -47
  175. package/dist/proxy/sseInterceptor.d.ts +97 -0
  176. package/dist/proxy/sseInterceptor.js +426 -0
  177. package/dist/proxy/usageStats.d.ts +4 -3
  178. package/dist/proxy/usageStats.js +25 -12
  179. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  180. package/dist/rag/chunking/markdownChunker.js +15 -6
  181. package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
  182. package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
  183. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  184. package/dist/services/server/ai/observability/instrumentation.js +337 -161
  185. package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
  186. package/dist/tasks/backends/bullmqBackend.js +35 -22
  187. package/dist/tasks/store/redisTaskStore.d.ts +1 -0
  188. package/dist/tasks/store/redisTaskStore.js +54 -39
  189. package/dist/tasks/taskManager.d.ts +5 -0
  190. package/dist/tasks/taskManager.js +158 -30
  191. package/dist/telemetry/index.d.ts +2 -1
  192. package/dist/telemetry/index.js +2 -1
  193. package/dist/telemetry/telemetryService.d.ts +3 -0
  194. package/dist/telemetry/telemetryService.js +69 -5
  195. package/dist/types/cli.d.ts +10 -0
  196. package/dist/types/proxyTypes.d.ts +160 -5
  197. package/dist/types/streamTypes.d.ts +25 -3
  198. package/dist/utils/messageBuilder.js +3 -2
  199. package/dist/utils/providerHealth.d.ts +19 -0
  200. package/dist/utils/providerHealth.js +279 -33
  201. package/dist/utils/providerUtils.js +18 -22
  202. package/dist/utils/toolChoice.d.ts +4 -0
  203. package/dist/utils/toolChoice.js +6 -0
  204. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  205. package/docs/changelog.md +252 -0
  206. package/package.json +19 -2
  207. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  208. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  209. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  210. package/scripts/observability/manage-local-openobserve.sh +215 -0
  211. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  212. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -9,20 +9,24 @@
9
9
  * provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
10
10
  * Without a router, models are passed through to the Anthropic provider.
11
11
  */
12
- import { readFile, access } from "node:fs/promises";
13
- import { join } from "node:path";
12
+ import { randomUUID } from "node:crypto";
13
+ import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
14
14
  import { homedir } from "node:os";
15
- import { parseClaudeRequest, serializeClaudeResponse, ClaudeStreamSerializer, buildClaudeError, generateToolUseId, } from "../../proxy/claudeFormat.js";
16
- import { logger } from "../../utils/logger.js";
17
- import { recordRequest, recordSuccess, recordError, recordCooldown, } from "../../proxy/usageStats.js";
18
- import { logRequest, logFullRequestResponse, logStreamError, } from "../../proxy/requestLogger.js";
15
+ import { join } from "node:path";
16
+ import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
19
17
  import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
20
- import { needsRefresh, refreshToken, persistTokens, } from "../../proxy/tokenRefresh.js";
18
+ import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
19
+ import { ProxyTracer } from "../../proxy/proxyTracer.js";
20
+ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
21
+ import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
22
+ import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
23
+ import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
24
+ import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
25
+ import { logger } from "../../utils/logger.js";
26
+ import { ProviderHealthChecker } from "../../utils/providerHealth.js";
21
27
  // ---------------------------------------------------------------------------
22
28
  // Helpers
23
29
  // ---------------------------------------------------------------------------
24
- /** Header names whose values must be masked in debug logs. */
25
- const SENSITIVE_HEADERS = new Set(["authorization", "x-api-key"]);
26
30
  /** Headers that must never be forwarded upstream to Anthropic. */
27
31
  const BLOCKED_UPSTREAM_HEADERS = new Set([
28
32
  "cookie",
@@ -32,22 +36,6 @@ const BLOCKED_UPSTREAM_HEADERS = new Set([
32
36
  "content-length",
33
37
  "transfer-encoding",
34
38
  ]);
35
- /** Return a shallow copy of `headers` with sensitive values redacted. */
36
- function redactSensitiveHeaders(headers) {
37
- const redacted = {};
38
- for (const [key, value] of Object.entries(headers)) {
39
- if (SENSITIVE_HEADERS.has(key.toLowerCase()) && value.length > 8) {
40
- redacted[key] = value.substring(0, 8) + "...";
41
- }
42
- else if (SENSITIVE_HEADERS.has(key.toLowerCase())) {
43
- redacted[key] = "***";
44
- }
45
- else {
46
- redacted[key] = value;
47
- }
48
- }
49
- return redacted;
50
- }
51
39
  // ---------------------------------------------------------------------------
52
40
  // Module-level state
53
41
  // ---------------------------------------------------------------------------
@@ -83,65 +71,196 @@ function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
83
71
  }
84
72
  primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount;
85
73
  }
86
- // ---------------------------------------------------------------------------
87
- // OAuth polyfill helpers (extracted to reduce block nesting)
88
- // ---------------------------------------------------------------------------
89
74
  const snapshotCache = new Map();
90
75
  const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
91
- /**
92
- * Load a header snapshot captured from a real Claude Code session and apply
93
- * any headers the client didn't send. This makes non-Claude-Code requests
94
- * (e.g. from Curator, custom apps) appear identical to Claude Code.
95
- */
96
- async function applyHeaderSnapshot(headers, accountLabel) {
76
+ const SNAPSHOT_STABLE_HEADERS = new Set([
77
+ "accept",
78
+ "accept-encoding",
79
+ "accept-language",
80
+ "anthropic-beta",
81
+ "anthropic-dangerous-direct-browser-access",
82
+ "anthropic-version",
83
+ "sec-fetch-mode",
84
+ "user-agent",
85
+ "x-app",
86
+ "x-stainless-arch",
87
+ "x-stainless-lang",
88
+ "x-stainless-os",
89
+ "x-stainless-package-version",
90
+ "x-stainless-retry-count",
91
+ "x-stainless-runtime",
92
+ "x-stainless-runtime-version",
93
+ "x-stainless-timeout",
94
+ "x-subscription-tier",
95
+ ]);
96
+ const NON_CLAUDE_OAUTH_BETAS = [
97
+ "oauth-2025-04-20",
98
+ "claude-code-20250219",
99
+ "fine-grained-tool-streaming-2025-05-14",
100
+ ];
101
+ function getSnapshotSafeLabel(accountLabel) {
102
+ return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
103
+ }
104
+ function getSnapshotPath(accountLabel) {
105
+ return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`);
106
+ }
107
+ function applySnapshotHeaders(headers, snapshot) {
108
+ if (!snapshot?.headers) {
109
+ return;
110
+ }
111
+ for (const [sk, sv] of Object.entries(snapshot.headers)) {
112
+ const lower = sk.toLowerCase();
113
+ if (typeof sv === "string" &&
114
+ !headers[lower] &&
115
+ !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
116
+ lower !== "authorization" &&
117
+ lower !== "x-api-key" &&
118
+ lower !== "x-claude-code-session-id") {
119
+ headers[lower] = sv;
120
+ }
121
+ }
122
+ }
123
+ async function loadClaudeSnapshot(accountLabel) {
97
124
  try {
98
- // Sanitize accountLabel to prevent directory traversal
99
- const safeLabel = accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
100
- // Check cache first
125
+ const safeLabel = getSnapshotSafeLabel(accountLabel);
101
126
  const cached = snapshotCache.get(safeLabel);
102
127
  if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) {
103
- for (const [sk, sv] of Object.entries(cached.headers)) {
104
- const lower = sk.toLowerCase();
105
- if (typeof sv === "string" &&
106
- !headers[lower] &&
107
- !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
108
- lower !== "authorization" &&
109
- lower !== "x-api-key") {
110
- headers[lower] = sv;
111
- }
112
- }
113
- return;
128
+ return cached.snapshot;
114
129
  }
115
- const snapshotPath = join(homedir(), ".neurolink", "header-snapshots", `anthropic_${safeLabel}.json`);
130
+ const snapshotPath = getSnapshotPath(accountLabel);
116
131
  try {
117
132
  await access(snapshotPath);
118
133
  }
119
134
  catch {
120
- return;
135
+ return null;
121
136
  }
122
137
  const snapshot = JSON.parse(await readFile(snapshotPath, "utf8"));
123
- if (!snapshot.headers) {
124
- return;
138
+ if (!snapshot || typeof snapshot !== "object") {
139
+ return null;
140
+ }
141
+ const normalized = {
142
+ accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string"
143
+ ? snapshot.accountKey
144
+ : `anthropic:${accountLabel}`,
145
+ capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string"
146
+ ? snapshot.capturedAt
147
+ : new Date(0).toISOString(),
148
+ source: "claude-code",
149
+ headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {},
150
+ ...(snapshot.body ? { body: snapshot.body } : {}),
151
+ };
152
+ if (Object.keys(normalized.headers).length === 0 &&
153
+ Object.keys(normalized.body ?? {}).length === 0) {
154
+ return null;
125
155
  }
126
- // Store in cache
127
156
  snapshotCache.set(safeLabel, {
128
- headers: snapshot.headers,
157
+ snapshot: normalized,
129
158
  loadedAt: Date.now(),
130
159
  });
131
- for (const [sk, sv] of Object.entries(snapshot.headers)) {
132
- const lower = sk.toLowerCase();
133
- if (typeof sv === "string" &&
134
- !headers[lower] &&
135
- !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
136
- lower !== "authorization" &&
137
- lower !== "x-api-key") {
138
- headers[lower] = sv;
139
- }
160
+ return normalized;
161
+ }
162
+ catch {
163
+ return null;
164
+ }
165
+ }
166
+ function buildSnapshotHeaders(headers, existingHeaders) {
167
+ const merged = { ...(existingHeaders ?? {}) };
168
+ for (const [key, value] of Object.entries(headers)) {
169
+ const lower = key.toLowerCase();
170
+ if (typeof value === "string" &&
171
+ SNAPSHOT_STABLE_HEADERS.has(lower) &&
172
+ !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
173
+ lower !== "authorization" &&
174
+ lower !== "x-api-key" &&
175
+ lower !== "x-claude-code-session-id") {
176
+ merged[lower] = value;
140
177
  }
141
178
  }
179
+ return merged;
180
+ }
181
+ function extractSnapshotBody(body) {
182
+ if (!body || typeof body !== "object") {
183
+ return undefined;
184
+ }
185
+ const parsed = body;
186
+ const identity = parseClaudeCodeUserId(parsed.metadata?.user_id);
187
+ const systemBlocks = Array.isArray(parsed.system)
188
+ ? parsed.system
189
+ : typeof parsed.system === "string"
190
+ ? [{ type: "text", text: parsed.system }]
191
+ : [];
192
+ const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" &&
193
+ block.text.includes("x-anthropic-billing-header"))?.text;
194
+ const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" &&
195
+ block.text.includes("Claude Agent SDK"))?.text;
196
+ if (!identity && !billingHeader && !agentBlock) {
197
+ return undefined;
198
+ }
199
+ return {
200
+ ...(identity ? { metadataUserId: identity.metadataUserId } : {}),
201
+ ...(identity ? { sessionId: identity.sessionId } : {}),
202
+ ...(billingHeader ? { billingHeader } : {}),
203
+ ...(agentBlock ? { agentBlock } : {}),
204
+ };
205
+ }
206
+ function isLikelyClaudeClient(headers, snapshotBody) {
207
+ return (typeof headers["x-claude-code-session-id"] === "string" ||
208
+ headers["user-agent"]?.startsWith("claude-cli/") ||
209
+ !!snapshotBody?.metadataUserId ||
210
+ !!snapshotBody?.billingHeader ||
211
+ !!snapshotBody?.agentBlock);
212
+ }
213
+ function snapshotsMatch(existing, next) {
214
+ if (!existing) {
215
+ return false;
216
+ }
217
+ return (JSON.stringify(existing.headers ?? {}) ===
218
+ JSON.stringify(next.headers ?? {}) &&
219
+ JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {}));
220
+ }
221
+ async function persistClaudeSnapshot(accountLabel, snapshot) {
222
+ const snapshotPath = getSnapshotPath(accountLabel);
223
+ const dirPath = join(homedir(), ".neurolink", "header-snapshots");
224
+ await mkdir(dirPath, { recursive: true });
225
+ const tmpPath = `${snapshotPath}.${process.pid}.${randomUUID()}.tmp`;
226
+ await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
227
+ await rename(tmpPath, snapshotPath);
228
+ snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
229
+ snapshot,
230
+ loadedAt: Date.now(),
231
+ });
232
+ }
233
+ async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) {
234
+ const existing = await loadClaudeSnapshot(accountLabel);
235
+ let parsedBody;
236
+ try {
237
+ parsedBody = JSON.parse(bodyStr);
238
+ }
142
239
  catch {
143
- // Snapshot missing or corrupt — continue without it
240
+ return existing;
241
+ }
242
+ const body = extractSnapshotBody(parsedBody);
243
+ if (!isLikelyClaudeClient(headers, body)) {
244
+ return existing;
245
+ }
246
+ const next = {
247
+ accountKey,
248
+ capturedAt: new Date().toISOString(),
249
+ source: "claude-code",
250
+ headers: buildSnapshotHeaders(headers, existing?.headers),
251
+ body: {
252
+ ...(existing?.body ?? {}),
253
+ ...(body ?? {}),
254
+ ...(typeof headers["x-claude-code-session-id"] === "string"
255
+ ? { sessionId: headers["x-claude-code-session-id"] }
256
+ : {}),
257
+ },
258
+ };
259
+ if (snapshotsMatch(existing, next)) {
260
+ return existing;
144
261
  }
262
+ await persistClaudeSnapshot(accountLabel, next);
263
+ return next;
145
264
  }
146
265
  /**
147
266
  * Polyfill the request body for OAuth accounts.
@@ -149,59 +268,78 @@ async function applyHeaderSnapshot(headers, accountLabel) {
149
268
  * into the body. Non-CC clients (Curator, custom apps) don't send these —
150
269
  * Anthropic rejects without them.
151
270
  */
152
- function polyfillOAuthBody(bodyStr, accountToken) {
271
+ function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) {
153
272
  try {
154
273
  const parsed = JSON.parse(bodyStr);
155
274
  // Billing header block (required by Anthropic for OAuth)
156
- const randomHex = Math.random().toString(16).substring(2, 5);
157
- const billingBlock = {
158
- type: "text",
159
- text: `x-anthropic-billing-header: cc_version=2.1.86.${randomHex}; cc_entrypoint=cli; cch=proxy;`,
160
- };
275
+ // NOTE: This block MUST be deterministic (no random values) to preserve
276
+ // Anthropic's prompt caching prefix chain. We keep the real Claude Code
277
+ // version/entrypoint shape when present, but stabilize the volatile cch.
161
278
  const agentBlock = {
162
279
  type: "text",
163
- text: "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
280
+ text: snapshot?.body?.agentBlock ||
281
+ "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
164
282
  };
165
- // Normalise system to array and prepend billing + agent
283
+ // Normalise system to array and APPEND billing + agent blocks.
284
+ // IMPORTANT: We append (not prepend) to preserve the client's cache
285
+ // prefix chain. Anthropic's prompt caching uses prefix matching — if we
286
+ // insert anything before the client's system blocks, we invalidate all
287
+ // cached content (tools, system prompt, message history).
288
+ //
289
+ // Claude Code sends a billing block with a `cch=<hash>` value that changes
290
+ // on every request. We fix this by:
291
+ // 1. Removing the client's billing block from its current position
292
+ // 2. Stabilizing it while keeping the official Claude Code shape
293
+ // 3. Appending it at the END so the cacheable system blocks stay
294
+ // at the front of the prefix chain
166
295
  if (parsed.system) {
167
296
  if (typeof parsed.system === "string") {
168
297
  parsed.system = [{ type: "text", text: parsed.system }];
169
298
  }
170
299
  if (Array.isArray(parsed.system)) {
171
- const hasBilling = parsed.system.some((b) => typeof b.text === "string" &&
300
+ // Find and remove existing billing/agent blocks from wherever
301
+ // the client placed them (typically at system[0])
302
+ const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" &&
172
303
  b.text.includes("x-anthropic-billing-header"));
173
- const hasAgent = parsed.system.some((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
174
- const toInsert = [];
175
- if (!hasBilling) {
176
- toInsert.push(billingBlock);
177
- }
178
- if (!hasAgent) {
179
- toInsert.push(agentBlock);
180
- }
181
- if (toInsert.length > 0) {
182
- parsed.system = [...toInsert, ...parsed.system];
304
+ const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
305
+ const billingBlock = {
306
+ type: "text",
307
+ text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader),
308
+ };
309
+ // Remove in reverse index order so indices stay valid
310
+ const indicesToRemove = [billingIdx, agentIdx]
311
+ .filter((i) => i >= 0)
312
+ .sort((a, b) => b - a);
313
+ for (const idx of indicesToRemove) {
314
+ parsed.system.splice(idx, 1);
183
315
  }
316
+ // Always append a deterministic billing block at the end.
317
+ // If the client sent one, we stripped its dynamic cch= and use
318
+ // our stable version instead. If not, we add ours.
319
+ parsed.system = [...parsed.system, billingBlock, agentBlock];
184
320
  }
185
321
  }
186
322
  else {
187
- parsed.system = [billingBlock, agentBlock];
188
- }
189
- // Inject metadata.user_id (required for OAuth)
190
- if (!parsed.metadata?.user_id) {
191
- const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
192
- const hash = Array.from(new TextEncoder().encode(tokenPrefix))
193
- .reduce((a, b) => ((a << 5) - a + b) | 0, 0)
194
- .toString(16)
195
- .replace("-", "");
196
- parsed.metadata = {
197
- ...parsed.metadata,
198
- user_id: `proxy-${hash}`,
323
+ const billingBlock = {
324
+ type: "text",
325
+ text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader),
199
326
  };
327
+ parsed.system = [billingBlock, agentBlock];
200
328
  }
201
- return JSON.stringify(parsed);
329
+ // Inject Claude-Code-shaped metadata.user_id (required for OAuth).
330
+ const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
331
+ const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, {
332
+ existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId,
333
+ preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId,
334
+ });
335
+ parsed.metadata = {
336
+ ...parsed.metadata,
337
+ user_id: identity.metadataUserId,
338
+ };
339
+ return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId };
202
340
  }
203
341
  catch {
204
- return bodyStr; // JSON parse failed — use original body
342
+ return { bodyStr }; // JSON parse failed — use original body
205
343
  }
206
344
  }
207
345
  // ---------------------------------------------------------------------------
@@ -256,6 +394,2617 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
256
394
  persistTarget: { credPath: legacyCredPath },
257
395
  };
258
396
  }
397
+ async function handleTranslatedClaudeRequest(args) {
398
+ const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args;
399
+ tracer?.setMode("full");
400
+ const parsed = parseClaudeRequest(body);
401
+ const attempts = buildProxyTranslationAttempts({
402
+ provider: route.provider,
403
+ model: route.model,
404
+ }, modelRouter, parsed);
405
+ if (body.stream) {
406
+ return handleTranslatedClaudeStreamRequest({
407
+ ctx,
408
+ body,
409
+ attempts,
410
+ parsed,
411
+ tracer,
412
+ requestStartTime,
413
+ });
414
+ }
415
+ return handleTranslatedClaudeJsonRequest({
416
+ ctx,
417
+ body,
418
+ attempts,
419
+ parsed,
420
+ tracer,
421
+ requestStartTime,
422
+ logProxyBody,
423
+ });
424
+ }
425
+ async function handleTranslatedClaudeStreamRequest(args) {
426
+ const { ctx, body, attempts, parsed, tracer, requestStartTime } = args;
427
+ const serializer = new ClaudeStreamSerializer(body.model, 0);
428
+ const KEEPALIVE_INTERVAL_MS = 15_000;
429
+ const encoder = new TextEncoder();
430
+ let translationKeepAliveTimer;
431
+ let translationCancelled = false;
432
+ let translationSucceeded = false;
433
+ let translatedModel;
434
+ let finalStreamError = "No translation providers succeeded";
435
+ let upstreamIterator;
436
+ const translationStream = new ReadableStream({
437
+ async start(controller) {
438
+ for (const frame of serializer.start()) {
439
+ controller.enqueue(encoder.encode(frame));
440
+ }
441
+ translationKeepAliveTimer = setInterval(() => {
442
+ try {
443
+ controller.enqueue(encoder.encode(": keep-alive\n\n"));
444
+ }
445
+ catch {
446
+ // Controller already closed.
447
+ }
448
+ }, KEEPALIVE_INTERVAL_MS);
449
+ try {
450
+ for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
451
+ const attempt = attempts[attemptIndex];
452
+ if (attemptIndex > 0) {
453
+ logger.always(`[proxy] fallback → ${attempt.label}`);
454
+ }
455
+ let collectedText = "";
456
+ try {
457
+ const options = buildProxyFallbackOptions(parsed, attempt.provider
458
+ ? {
459
+ provider: attempt.provider,
460
+ model: attempt.model,
461
+ }
462
+ : {});
463
+ const streamResult = await ctx.neurolink.stream(options);
464
+ const iterable = streamResult.stream;
465
+ upstreamIterator = iterable[Symbol.asyncIterator]();
466
+ while (true) {
467
+ if (translationCancelled) {
468
+ break;
469
+ }
470
+ const { value: chunk, done } = await upstreamIterator.next();
471
+ if (done || translationCancelled) {
472
+ break;
473
+ }
474
+ const text = extractText(chunk);
475
+ if (text) {
476
+ collectedText += text;
477
+ for (const frame of serializer.pushDelta(text)) {
478
+ controller.enqueue(encoder.encode(frame));
479
+ }
480
+ }
481
+ }
482
+ const toolCalls = streamResult.toolCalls ?? [];
483
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
484
+ finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`;
485
+ logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
486
+ continue;
487
+ }
488
+ if (!translationCancelled && toolCalls.length) {
489
+ for (const toolCall of toolCalls) {
490
+ const toolName = toolCall.toolName ??
491
+ toolCall.name ??
492
+ "unknown";
493
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
494
+ controller.enqueue(encoder.encode(frame));
495
+ }
496
+ }
497
+ }
498
+ if (!translationCancelled) {
499
+ const reason = streamResult.finishReason ?? "end_turn";
500
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
501
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
502
+ controller.enqueue(encoder.encode(frame));
503
+ }
504
+ }
505
+ translatedModel = streamResult.model;
506
+ translationSucceeded = true;
507
+ return;
508
+ }
509
+ catch (streamErr) {
510
+ if (translationCancelled) {
511
+ return;
512
+ }
513
+ finalStreamError =
514
+ streamErr instanceof Error
515
+ ? streamErr.message
516
+ : String(streamErr);
517
+ if (collectedText.trim().length > 0) {
518
+ logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
519
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
520
+ controller.enqueue(encoder.encode(errorEvent));
521
+ return;
522
+ }
523
+ logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`);
524
+ }
525
+ }
526
+ if (!translationCancelled) {
527
+ logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
528
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
529
+ controller.enqueue(encoder.encode(errorEvent));
530
+ }
531
+ }
532
+ finally {
533
+ if (translationKeepAliveTimer) {
534
+ clearInterval(translationKeepAliveTimer);
535
+ }
536
+ if (!translationCancelled) {
537
+ controller.close();
538
+ }
539
+ if (tracer && translatedModel && translatedModel !== body.model) {
540
+ tracer.setModelSubstitution(body.model, translatedModel);
541
+ }
542
+ if (!translationSucceeded) {
543
+ tracer?.setError("generation_error", finalStreamError.slice(0, 500));
544
+ }
545
+ tracer?.end(200, Date.now() - requestStartTime);
546
+ }
547
+ },
548
+ cancel() {
549
+ translationCancelled = true;
550
+ if (translationKeepAliveTimer) {
551
+ clearInterval(translationKeepAliveTimer);
552
+ translationKeepAliveTimer = undefined;
553
+ }
554
+ if (upstreamIterator?.return) {
555
+ upstreamIterator.return(undefined).catch((cancelErr) => {
556
+ logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`);
557
+ });
558
+ }
559
+ },
560
+ });
561
+ return new Response(translationStream, {
562
+ headers: {
563
+ "content-type": "text/event-stream",
564
+ "cache-control": "no-cache",
565
+ connection: "keep-alive",
566
+ },
567
+ });
568
+ }
569
+ async function handleTranslatedClaudeJsonRequest(args) {
570
+ const { ctx, body, attempts, parsed, tracer, requestStartTime, logProxyBody, } = args;
571
+ let lastAttemptError = "No translation providers succeeded";
572
+ for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
573
+ const attempt = attempts[attemptIndex];
574
+ if (attemptIndex > 0) {
575
+ logger.always(`[proxy] fallback → ${attempt.label}`);
576
+ }
577
+ try {
578
+ const options = buildProxyFallbackOptions(parsed, attempt.provider
579
+ ? {
580
+ provider: attempt.provider,
581
+ model: attempt.model,
582
+ }
583
+ : {});
584
+ const streamResult = await ctx.neurolink.stream(options);
585
+ let collectedText = "";
586
+ for await (const chunk of streamResult.stream) {
587
+ const text = extractText(chunk);
588
+ if (text) {
589
+ collectedText += text;
590
+ }
591
+ }
592
+ if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
593
+ lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`;
594
+ logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
595
+ continue;
596
+ }
597
+ const internal = {
598
+ content: collectedText,
599
+ model: streamResult.model,
600
+ finishReason: streamResult.finishReason ?? "end_turn",
601
+ reasoning: undefined,
602
+ usage: streamResult.usage
603
+ ? extractUsageFromStreamResult(streamResult.usage)
604
+ : undefined,
605
+ toolCalls: streamResult.toolCalls,
606
+ };
607
+ if (tracer && streamResult.model && streamResult.model !== body.model) {
608
+ tracer.setModelSubstitution(body.model, streamResult.model);
609
+ }
610
+ tracer?.end(200, Date.now() - requestStartTime);
611
+ const clientResponse = serializeClaudeResponse(internal, body.model);
612
+ const clientResponseText = JSON.stringify(clientResponse);
613
+ logProxyBody({
614
+ phase: "client_response",
615
+ headers: { "content-type": "application/json" },
616
+ body: clientResponseText,
617
+ bodySize: Buffer.byteLength(clientResponseText, "utf8"),
618
+ contentType: "application/json",
619
+ responseStatus: 200,
620
+ durationMs: Date.now() - requestStartTime,
621
+ });
622
+ return clientResponse;
623
+ }
624
+ catch (attemptError) {
625
+ lastAttemptError =
626
+ attemptError instanceof Error
627
+ ? attemptError.message
628
+ : String(attemptError);
629
+ logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`);
630
+ }
631
+ }
632
+ throw new Error(lastAttemptError);
633
+ }
634
+ async function handleClaudePassthroughRequest(args) {
635
+ const { ctx, body, clientRequestBody, tracer, requestStartTime, logProxyBody, } = args;
636
+ tracer?.setMode("passthrough-cli");
637
+ const bodyStr = clientRequestBody;
638
+ const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
639
+ const upstreamHeaders = {};
640
+ for (const [key, value] of Object.entries(ctx.headers)) {
641
+ if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) {
642
+ upstreamHeaders[key] = value;
643
+ }
644
+ }
645
+ if (!upstreamHeaders["content-type"]) {
646
+ upstreamHeaders["content-type"] = "application/json";
647
+ }
648
+ const upstreamSpan = tracer?.startUpstreamAttempt({
649
+ account: "passthrough",
650
+ attempt: 1,
651
+ polyfillHeaders: false,
652
+ polyfillBody: false,
653
+ upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true",
654
+ });
655
+ tracer?.logUpstreamRequestHeaders(upstreamHeaders);
656
+ tracer?.logUpstreamRequestBody(bodyStr);
657
+ logProxyBody({
658
+ phase: "upstream_request",
659
+ headers: upstreamHeaders,
660
+ body: bodyStr,
661
+ bodySize: Buffer.byteLength(bodyStr, "utf8"),
662
+ contentType: upstreamHeaders["content-type"] ?? "application/json",
663
+ account: "passthrough",
664
+ accountType: "passthrough",
665
+ attempt: 1,
666
+ });
667
+ let response;
668
+ try {
669
+ response = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
670
+ method: "POST",
671
+ headers: upstreamHeaders,
672
+ body: bodyStr,
673
+ signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
674
+ });
675
+ }
676
+ catch (fetchErr) {
677
+ const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
678
+ tracer?.setError("network_error", errMsg);
679
+ upstreamSpan?.end();
680
+ tracer?.end(502, Date.now() - requestStartTime);
681
+ logRequest({
682
+ timestamp: new Date().toISOString(),
683
+ requestId: ctx.requestId,
684
+ method: ctx.method,
685
+ path: ctx.path,
686
+ model: body.model,
687
+ stream: body.stream ?? false,
688
+ toolCount,
689
+ account: "passthrough",
690
+ accountType: "passthrough",
691
+ responseStatus: 502,
692
+ responseTimeMs: Date.now() - requestStartTime,
693
+ errorType: "network_error",
694
+ errorMessage: errMsg,
695
+ });
696
+ const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`);
697
+ const errorBodyText = JSON.stringify(errorBody);
698
+ logProxyBody({
699
+ phase: "client_response",
700
+ headers: { "content-type": "application/json" },
701
+ body: errorBodyText,
702
+ bodySize: Buffer.byteLength(errorBodyText, "utf8"),
703
+ contentType: "application/json",
704
+ account: "passthrough",
705
+ accountType: "passthrough",
706
+ attempt: 1,
707
+ responseStatus: 502,
708
+ durationMs: Date.now() - requestStartTime,
709
+ });
710
+ return errorBody;
711
+ }
712
+ const upstreamResponseHeaders = {};
713
+ response.headers.forEach((value, key) => {
714
+ upstreamResponseHeaders[key] = value;
715
+ });
716
+ tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders);
717
+ if (!response.ok) {
718
+ const errorText = await response.text();
719
+ tracer?.logUpstreamResponseBody(errorText);
720
+ logProxyBody({
721
+ phase: "upstream_response",
722
+ headers: upstreamResponseHeaders,
723
+ body: errorText,
724
+ bodySize: Buffer.byteLength(errorText, "utf8"),
725
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
726
+ account: "passthrough",
727
+ accountType: "passthrough",
728
+ attempt: 1,
729
+ responseStatus: response.status,
730
+ durationMs: Date.now() - requestStartTime,
731
+ });
732
+ logProxyBody({
733
+ phase: "client_response",
734
+ headers: upstreamResponseHeaders,
735
+ body: errorText,
736
+ bodySize: Buffer.byteLength(errorText, "utf8"),
737
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
738
+ account: "passthrough",
739
+ accountType: "passthrough",
740
+ attempt: 1,
741
+ responseStatus: response.status,
742
+ durationMs: Date.now() - requestStartTime,
743
+ });
744
+ upstreamSpan?.end();
745
+ tracer?.setError("api_error", errorText.slice(0, 500));
746
+ tracer?.end(response.status, Date.now() - requestStartTime);
747
+ try {
748
+ return JSON.parse(errorText);
749
+ }
750
+ catch {
751
+ return buildClaudeError(response.status, errorText);
752
+ }
753
+ }
754
+ if (body.stream && response.body) {
755
+ return handleClaudePassthroughStreamResponse({
756
+ ctx,
757
+ body,
758
+ bodyStr,
759
+ response,
760
+ tracer,
761
+ requestStartTime,
762
+ toolCount,
763
+ upstreamSpan,
764
+ upstreamResponseHeaders,
765
+ logProxyBody,
766
+ });
767
+ }
768
+ return handleClaudePassthroughJsonResponse({
769
+ ctx,
770
+ body,
771
+ bodyStr,
772
+ response,
773
+ tracer,
774
+ requestStartTime,
775
+ toolCount,
776
+ upstreamSpan,
777
+ upstreamResponseHeaders,
778
+ logProxyBody,
779
+ });
780
+ }
781
+ async function handleClaudePassthroughStreamResponse(args) {
782
+ const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
783
+ const responseHeaders = { ...upstreamResponseHeaders };
784
+ const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
785
+ const responseBody = response.body;
786
+ if (!responseBody) {
787
+ throw new Error("Expected passthrough stream response body");
788
+ }
789
+ let streamSource = responseBody;
790
+ if (tracer) {
791
+ try {
792
+ const { stream: interceptor, telemetry } = createSSEInterceptor({
793
+ captureRawText: true,
794
+ });
795
+ streamSource = streamSource.pipeThrough(interceptor);
796
+ const capturedTracer = tracer;
797
+ const capturedUpstreamSpan = upstreamSpan;
798
+ const capturedResponse = response;
799
+ const capturedRequestBytes = bodyStr.length;
800
+ Promise.all([telemetry, clientCapture])
801
+ .then(([data, clientBody]) => {
802
+ capturedTracer.setUsage({
803
+ inputTokens: data.usage.inputTokens,
804
+ outputTokens: data.usage.outputTokens,
805
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
806
+ cacheReadTokens: data.usage.cacheReadInputTokens,
807
+ });
808
+ capturedTracer.logStreamEvents(data.events);
809
+ const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
810
+ const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
811
+ const usageUpdate = {
812
+ inputTokens: data.usage.inputTokens,
813
+ outputTokens: data.usage.outputTokens,
814
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
815
+ cacheReadTokens: data.usage.cacheReadInputTokens,
816
+ };
817
+ if (!isNaN(rateLimit5h)) {
818
+ usageUpdate.rateLimitAfter5h = rateLimit5h;
819
+ }
820
+ if (!isNaN(rateLimit7d)) {
821
+ usageUpdate.rateLimitAfter7d = rateLimit7d;
822
+ }
823
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
824
+ capturedTracer.setUsage(usageUpdate);
825
+ }
826
+ capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
827
+ capturedTracer.recordMetrics();
828
+ capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
829
+ capturedUpstreamSpan?.end();
830
+ capturedTracer.end(200, Date.now() - requestStartTime);
831
+ const traceCtx = capturedTracer.getTraceContext();
832
+ logRequest({
833
+ timestamp: new Date().toISOString(),
834
+ requestId: ctx.requestId,
835
+ method: ctx.method,
836
+ path: ctx.path,
837
+ model: body.model,
838
+ stream: true,
839
+ toolCount,
840
+ account: "passthrough",
841
+ accountType: "passthrough",
842
+ responseStatus: 200,
843
+ responseTimeMs: Date.now() - requestStartTime,
844
+ inputTokens: data.usage.inputTokens,
845
+ outputTokens: data.usage.outputTokens,
846
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
847
+ cacheReadTokens: data.usage.cacheReadInputTokens,
848
+ traceId: traceCtx.traceId,
849
+ spanId: traceCtx.spanId,
850
+ });
851
+ logProxyBody({
852
+ phase: "upstream_response",
853
+ headers: responseHeaders,
854
+ body: data.rawText ?? "",
855
+ bodySize: data.totalBytesReceived,
856
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
857
+ account: "passthrough",
858
+ accountType: "passthrough",
859
+ attempt: 1,
860
+ responseStatus: 200,
861
+ durationMs: Date.now() - requestStartTime,
862
+ });
863
+ logProxyBody({
864
+ phase: "client_response",
865
+ headers: responseHeaders,
866
+ body: clientBody.text,
867
+ bodySize: clientBody.totalBytes,
868
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
869
+ account: "passthrough",
870
+ accountType: "passthrough",
871
+ attempt: 1,
872
+ responseStatus: 200,
873
+ durationMs: Date.now() - requestStartTime,
874
+ });
875
+ })
876
+ .catch((error) => {
877
+ capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
878
+ capturedUpstreamSpan?.end();
879
+ capturedTracer.end(500, Date.now() - requestStartTime);
880
+ const traceCtx = capturedTracer.getTraceContext();
881
+ logRequest({
882
+ timestamp: new Date().toISOString(),
883
+ requestId: ctx.requestId,
884
+ method: ctx.method,
885
+ path: ctx.path,
886
+ model: body.model,
887
+ stream: true,
888
+ toolCount,
889
+ account: "passthrough",
890
+ accountType: "passthrough",
891
+ responseStatus: 500,
892
+ responseTimeMs: Date.now() - requestStartTime,
893
+ errorType: "stream_error",
894
+ errorMessage: error instanceof Error ? error.message : String(error),
895
+ traceId: traceCtx.traceId,
896
+ spanId: traceCtx.spanId,
897
+ });
898
+ });
899
+ }
900
+ catch {
901
+ // Streaming capture is best-effort.
902
+ }
903
+ }
904
+ else {
905
+ clientCapture
906
+ .then((clientBody) => {
907
+ logProxyBody({
908
+ phase: "upstream_response",
909
+ headers: responseHeaders,
910
+ body: clientBody.text,
911
+ bodySize: clientBody.totalBytes,
912
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
913
+ account: "passthrough",
914
+ accountType: "passthrough",
915
+ attempt: 1,
916
+ responseStatus: 200,
917
+ durationMs: Date.now() - requestStartTime,
918
+ });
919
+ logProxyBody({
920
+ phase: "client_response",
921
+ headers: responseHeaders,
922
+ body: clientBody.text,
923
+ bodySize: clientBody.totalBytes,
924
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
925
+ account: "passthrough",
926
+ accountType: "passthrough",
927
+ attempt: 1,
928
+ responseStatus: 200,
929
+ durationMs: Date.now() - requestStartTime,
930
+ });
931
+ })
932
+ .catch(() => {
933
+ // Non-fatal
934
+ });
935
+ }
936
+ const clientStream = streamSource.pipeThrough(clientCaptureStream);
937
+ return new Response(clientStream, {
938
+ status: response.status,
939
+ headers: responseHeaders,
940
+ });
941
+ }
942
+ async function handleClaudePassthroughJsonResponse(args) {
943
+ const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
944
+ const responseText = await response.text();
945
+ tracer?.logUpstreamResponseBody(responseText);
946
+ logProxyBody({
947
+ phase: "upstream_response",
948
+ headers: upstreamResponseHeaders,
949
+ body: responseText,
950
+ bodySize: Buffer.byteLength(responseText, "utf8"),
951
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
952
+ account: "passthrough",
953
+ accountType: "passthrough",
954
+ attempt: 1,
955
+ responseStatus: response.status,
956
+ durationMs: Date.now() - requestStartTime,
957
+ });
958
+ logProxyBody({
959
+ phase: "client_response",
960
+ headers: upstreamResponseHeaders,
961
+ body: responseText,
962
+ bodySize: Buffer.byteLength(responseText, "utf8"),
963
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
964
+ account: "passthrough",
965
+ accountType: "passthrough",
966
+ attempt: 1,
967
+ responseStatus: response.status,
968
+ durationMs: Date.now() - requestStartTime,
969
+ });
970
+ const responseJson = JSON.parse(responseText);
971
+ if (tracer && responseJson && typeof responseJson === "object") {
972
+ const usage = responseJson.usage;
973
+ if (usage) {
974
+ tracer.setUsage({
975
+ inputTokens: usage.input_tokens ?? 0,
976
+ outputTokens: usage.output_tokens ?? 0,
977
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
978
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
979
+ });
980
+ const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ??
981
+ "");
982
+ const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ??
983
+ "");
984
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
985
+ const usageWithRates = {
986
+ inputTokens: usage.input_tokens ?? 0,
987
+ outputTokens: usage.output_tokens ?? 0,
988
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
989
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
990
+ };
991
+ if (!isNaN(rateLimit5h)) {
992
+ usageWithRates.rateLimitAfter5h = rateLimit5h;
993
+ }
994
+ if (!isNaN(rateLimit7d)) {
995
+ usageWithRates.rateLimitAfter7d = rateLimit7d;
996
+ }
997
+ tracer.setUsage(usageWithRates);
998
+ }
999
+ }
1000
+ tracer.recordMetrics();
1001
+ const responseJsonStr = JSON.stringify(responseJson);
1002
+ tracer.recordBodySizes(bodyStr.length, responseJsonStr.length);
1003
+ upstreamSpan?.end();
1004
+ tracer.end(response.status, Date.now() - requestStartTime);
1005
+ const traceCtx = tracer.getTraceContext();
1006
+ logRequest({
1007
+ timestamp: new Date().toISOString(),
1008
+ requestId: ctx.requestId,
1009
+ method: ctx.method,
1010
+ path: ctx.path,
1011
+ model: body.model,
1012
+ stream: false,
1013
+ toolCount,
1014
+ account: "passthrough",
1015
+ accountType: "passthrough",
1016
+ responseStatus: response.status,
1017
+ responseTimeMs: Date.now() - requestStartTime,
1018
+ inputTokens: usage?.input_tokens,
1019
+ outputTokens: usage?.output_tokens,
1020
+ cacheCreationTokens: usage?.cache_creation_input_tokens,
1021
+ cacheReadTokens: usage?.cache_read_input_tokens,
1022
+ traceId: traceCtx.traceId,
1023
+ spanId: traceCtx.spanId,
1024
+ });
1025
+ }
1026
+ else {
1027
+ upstreamSpan?.end();
1028
+ tracer?.end(response.status, Date.now() - requestStartTime);
1029
+ logRequest({
1030
+ timestamp: new Date().toISOString(),
1031
+ requestId: ctx.requestId,
1032
+ method: ctx.method,
1033
+ path: ctx.path,
1034
+ model: body.model,
1035
+ stream: false,
1036
+ toolCount,
1037
+ account: "passthrough",
1038
+ accountType: "passthrough",
1039
+ responseStatus: response.status,
1040
+ responseTimeMs: Date.now() - requestStartTime,
1041
+ });
1042
+ }
1043
+ return responseJson;
1044
+ }
1045
+ async function loadClaudeProxyAccounts(args) {
1046
+ const { ctx, body, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, } = args;
1047
+ const fs = await import("fs");
1048
+ const os = await import("os");
1049
+ const accounts = [];
1050
+ const legacyCredPath = `${os.homedir()}/.neurolink/anthropic-credentials.json`;
1051
+ const { tokenStore } = await import("../../auth/tokenStore.js");
1052
+ if (!startupPruneDone) {
1053
+ await tokenStore.pruneExpired();
1054
+ startupPruneDone = true;
1055
+ }
1056
+ const compoundKeys = await tokenStore.listByPrefix("anthropic:");
1057
+ for (const key of compoundKeys) {
1058
+ if (await tokenStore.isDisabled(key)) {
1059
+ const existingState = getOrCreateRuntimeState(key);
1060
+ const tokens = await tokenStore.loadTokens(key);
1061
+ const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== "";
1062
+ const tokenChanged = tokens &&
1063
+ hasTrackedTokens &&
1064
+ (existingState.lastToken !== tokens.accessToken ||
1065
+ existingState.lastRefreshToken !== tokens.refreshToken);
1066
+ if (tokenChanged) {
1067
+ await tokenStore.markEnabled(key);
1068
+ logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials changed)`);
1069
+ existingState.permanentlyDisabled = false;
1070
+ existingState.coolingUntil = undefined;
1071
+ existingState.backoffLevel = 0;
1072
+ existingState.consecutiveRefreshFailures = 0;
1073
+ }
1074
+ else {
1075
+ logger.debug(`[proxy] skipping disabled account=${key.split(":")[1] ?? key}`);
1076
+ existingState.permanentlyDisabled = true;
1077
+ continue;
1078
+ }
1079
+ }
1080
+ const tokens = await tokenStore.loadTokens(key);
1081
+ if (!tokens) {
1082
+ continue;
1083
+ }
1084
+ let accessToken = tokens.accessToken;
1085
+ let refreshTok = tokens.refreshToken;
1086
+ let expiresAt = tokens.expiresAt;
1087
+ const isExpired = expiresAt ? expiresAt < Date.now() : false;
1088
+ if (isExpired) {
1089
+ const label = key.split(":")[1] ?? key;
1090
+ const existingState = getOrCreateRuntimeState(key);
1091
+ if (existingState.permanentlyDisabled) {
1092
+ continue;
1093
+ }
1094
+ if (!refreshTok) {
1095
+ logger.always(`[proxy] skipping account=${label} (expired, no refresh token)`);
1096
+ await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
1097
+ continue;
1098
+ }
1099
+ const tempAccount = {
1100
+ token: accessToken,
1101
+ refreshToken: refreshTok,
1102
+ expiresAt,
1103
+ label,
1104
+ };
1105
+ const refreshed = await refreshToken(tempAccount);
1106
+ if (!refreshed.success) {
1107
+ logger.always(`[proxy] skipping account=${label} (expired, refresh failed: ${refreshed.error?.slice(0, 200) ?? "unknown"})`);
1108
+ await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
1109
+ continue;
1110
+ }
1111
+ accessToken = tempAccount.token;
1112
+ refreshTok = tempAccount.refreshToken;
1113
+ expiresAt = tempAccount.expiresAt;
1114
+ await tokenStore.saveTokens(key, {
1115
+ accessToken,
1116
+ refreshToken: refreshTok,
1117
+ expiresAt: expiresAt ?? Date.now() + 3600_000,
1118
+ tokenType: "Bearer",
1119
+ });
1120
+ logger.always(`[proxy] refreshed expired account=${key.split(":")[1] ?? key} at startup`);
1121
+ }
1122
+ const accountType = tokens.tokenType === "Bearer" ? "oauth" : "api_key";
1123
+ accounts.push({
1124
+ key,
1125
+ label: key.split(":")[1] ?? key,
1126
+ token: accessToken,
1127
+ refreshToken: refreshTok,
1128
+ expiresAt,
1129
+ type: accountType,
1130
+ persistTarget: { providerKey: key },
1131
+ });
1132
+ }
1133
+ if (accounts.length === 0) {
1134
+ try {
1135
+ const creds = JSON.parse(fs.readFileSync(legacyCredPath, "utf8"));
1136
+ const legacyAccount = await tryLoadLegacyAccount(creds, legacyCredPath);
1137
+ if (legacyAccount) {
1138
+ accounts.push(legacyAccount);
1139
+ }
1140
+ }
1141
+ catch {
1142
+ // file absent or invalid
1143
+ }
1144
+ }
1145
+ if (process.env.ANTHROPIC_API_KEY && accounts.length === 0) {
1146
+ accounts.push({
1147
+ key: "anthropic:env",
1148
+ label: "env",
1149
+ token: process.env.ANTHROPIC_API_KEY,
1150
+ type: "api_key",
1151
+ });
1152
+ }
1153
+ if (accounts.length === 0) {
1154
+ tracer?.setError("authentication_error", "No Anthropic credentials found");
1155
+ tracer?.end(401, Date.now() - requestStartTime);
1156
+ return {
1157
+ response: buildLoggedClaudeError(401, "No Anthropic credentials found"),
1158
+ };
1159
+ }
1160
+ for (const account of accounts) {
1161
+ const state = getOrCreateRuntimeState(account.key);
1162
+ const tokenChanged = state.lastToken !== account.token ||
1163
+ state.lastRefreshToken !== account.refreshToken;
1164
+ if (tokenChanged) {
1165
+ if (state.permanentlyDisabled) {
1166
+ logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
1167
+ }
1168
+ state.coolingUntil = undefined;
1169
+ state.backoffLevel = 0;
1170
+ state.consecutiveRefreshFailures = 0;
1171
+ state.permanentlyDisabled = false;
1172
+ }
1173
+ state.lastToken = account.token;
1174
+ state.lastRefreshToken = account.refreshToken;
1175
+ }
1176
+ const enabledAccounts = accounts.filter((account) => {
1177
+ return !getOrCreateRuntimeState(account.key).permanentlyDisabled;
1178
+ });
1179
+ if (enabledAccounts.length === 0) {
1180
+ const reauthMsg = formatReauthMessage(accounts.map((account) => account.label));
1181
+ tracer?.setError("authentication_error", reauthMsg);
1182
+ tracer?.end(401, Date.now() - requestStartTime);
1183
+ return { response: buildLoggedClaudeError(401, reauthMsg) };
1184
+ }
1185
+ const orderedAccounts = [...enabledAccounts];
1186
+ if (accountStrategy === "round-robin" &&
1187
+ orderedAccounts.length !== lastKnownAccountCount) {
1188
+ primaryAccountIndex = 0;
1189
+ lastKnownAccountCount = orderedAccounts.length;
1190
+ }
1191
+ if (orderedAccounts.length > 1) {
1192
+ const idx = primaryAccountIndex % orderedAccounts.length;
1193
+ if (accountStrategy === "round-robin") {
1194
+ primaryAccountIndex = (primaryAccountIndex + 1) % orderedAccounts.length;
1195
+ }
1196
+ if (idx > 0) {
1197
+ const head = orderedAccounts.splice(0, idx);
1198
+ orderedAccounts.push(...head);
1199
+ }
1200
+ }
1201
+ const normalizedAnthropicBody = normalizeClaudeRequestForAnthropic(body);
1202
+ const bodyStr = JSON.stringify(normalizedAnthropicBody);
1203
+ const requestStart = Date.now();
1204
+ const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
1205
+ const url = "https://api.anthropic.com/v1/messages?beta=true";
1206
+ const clientHeaders = ctx.headers ?? {};
1207
+ const clientSnapshotBody = extractSnapshotBody(body);
1208
+ return {
1209
+ accounts,
1210
+ enabledAccounts,
1211
+ orderedAccounts,
1212
+ bodyStr,
1213
+ requestStart,
1214
+ toolCount,
1215
+ url,
1216
+ clientHeaders,
1217
+ isClaudeClientRequest: isLikelyClaudeClient(clientHeaders, clientSnapshotBody),
1218
+ };
1219
+ }
1220
+ async function executeClaudeFallbackTranslation(args) {
1221
+ const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest, options, providerLabel, } = args;
1222
+ if (body.stream) {
1223
+ const streamResult = await ctx.neurolink.stream(options);
1224
+ const serializer = new ClaudeStreamSerializer(body.model, 0);
1225
+ async function* sseGenerator() {
1226
+ for (const frame of serializer.start()) {
1227
+ yield frame;
1228
+ }
1229
+ let collectedText = "";
1230
+ for await (const chunk of streamResult.stream) {
1231
+ const text = extractText(chunk);
1232
+ if (text) {
1233
+ collectedText += text;
1234
+ for (const frame of serializer.pushDelta(text)) {
1235
+ yield frame;
1236
+ }
1237
+ }
1238
+ }
1239
+ const toolCalls = streamResult.toolCalls ?? [];
1240
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
1241
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1242
+ }
1243
+ if (toolCalls.length) {
1244
+ for (const toolCall of toolCalls) {
1245
+ const toolName = toolCall.toolName ??
1246
+ toolCall.name ??
1247
+ "unknown";
1248
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1249
+ yield frame;
1250
+ }
1251
+ }
1252
+ }
1253
+ const reason = streamResult.finishReason ?? "end_turn";
1254
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1255
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1256
+ yield frame;
1257
+ }
1258
+ }
1259
+ tracer?.end(200, Date.now() - requestStartTime);
1260
+ recordFinalSuccess();
1261
+ logFinalRequest(200, "", providerLabel);
1262
+ return sseGenerator();
1263
+ }
1264
+ const streamResult = await ctx.neurolink.stream(options);
1265
+ let collectedText = "";
1266
+ for await (const chunk of streamResult.stream) {
1267
+ const text = extractText(chunk);
1268
+ if (text) {
1269
+ collectedText += text;
1270
+ }
1271
+ }
1272
+ if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
1273
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1274
+ }
1275
+ const internal = {
1276
+ content: collectedText,
1277
+ model: streamResult.model,
1278
+ finishReason: streamResult.finishReason ?? "end_turn",
1279
+ reasoning: undefined,
1280
+ usage: streamResult.usage
1281
+ ? extractUsageFromStreamResult(streamResult.usage)
1282
+ : undefined,
1283
+ toolCalls: streamResult.toolCalls,
1284
+ };
1285
+ tracer?.end(200, Date.now() - requestStartTime);
1286
+ recordFinalSuccess();
1287
+ const clientResponse = serializeClaudeResponse(internal, body.model);
1288
+ logFinalRequest(200, "", providerLabel, undefined, undefined, {
1289
+ inputTokens: internal.usage?.input,
1290
+ outputTokens: internal.usage?.output,
1291
+ });
1292
+ const clientResponseText = JSON.stringify(clientResponse);
1293
+ logProxyBody({
1294
+ phase: "client_response",
1295
+ headers: { "content-type": "application/json" },
1296
+ body: clientResponseText,
1297
+ bodySize: Buffer.byteLength(clientResponseText, "utf8"),
1298
+ contentType: "application/json",
1299
+ responseStatus: 200,
1300
+ durationMs: Date.now() - requestStartTime,
1301
+ });
1302
+ return clientResponse;
1303
+ }
1304
+ async function tryConfiguredClaudeFallbackChain(args) {
1305
+ const { ctx, body, modelRouter, tracer, requestStartTime, logProxyBody, logFinalRequest, } = args;
1306
+ const parsedFallbackRequest = parseClaudeRequest(body);
1307
+ const chain = modelRouter?.getFallbackChain() ?? [];
1308
+ for (const fallback of chain) {
1309
+ if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsedFallbackRequest)) {
1310
+ logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: incompatible with request shape`);
1311
+ continue;
1312
+ }
1313
+ const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
1314
+ if (!availability.available) {
1315
+ logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: ${availability.reason ?? "provider unavailable"}`);
1316
+ continue;
1317
+ }
1318
+ try {
1319
+ logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1320
+ const options = buildProxyFallbackOptions(parsedFallbackRequest, {
1321
+ provider: fallback.provider,
1322
+ model: fallback.model,
1323
+ });
1324
+ return await executeClaudeFallbackTranslation({
1325
+ ctx,
1326
+ body,
1327
+ tracer,
1328
+ requestStartTime,
1329
+ logProxyBody,
1330
+ logFinalRequest,
1331
+ options: options,
1332
+ providerLabel: fallback.provider,
1333
+ });
1334
+ }
1335
+ catch (fallbackErr) {
1336
+ logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1337
+ }
1338
+ }
1339
+ return null;
1340
+ }
1341
+ async function tryAutoClaudeFallback(args) {
1342
+ const { ctx, body, tracer, requestStartTime, logProxyBody, logFinalRequest } = args;
1343
+ try {
1344
+ logger.always("[proxy] fallback → auto-provider");
1345
+ const parsed = parseClaudeRequest(body);
1346
+ const options = buildProxyFallbackOptions(parsed);
1347
+ return await executeClaudeFallbackTranslation({
1348
+ ctx,
1349
+ body,
1350
+ tracer,
1351
+ requestStartTime,
1352
+ logProxyBody,
1353
+ logFinalRequest,
1354
+ options: options,
1355
+ providerLabel: "auto-provider",
1356
+ });
1357
+ }
1358
+ catch (fallbackErr) {
1359
+ logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1360
+ return null;
1361
+ }
1362
+ }
1363
+ function buildClaudeAnthropicFailureResponse(args) {
1364
+ const { tracer, requestStartTime, authFailureMessage, invalidRequestFailure, sawNetworkError, sawTransientFailure, sawRateLimit, lastError, orderedAccounts, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
1365
+ if (authFailureMessage && !sawRateLimit) {
1366
+ tracer?.setError("authentication_error", authFailureMessage);
1367
+ tracer?.end(401, Date.now() - requestStartTime);
1368
+ return buildLoggedClaudeError(401, authFailureMessage);
1369
+ }
1370
+ if (invalidRequestFailure) {
1371
+ tracer?.setError("invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
1372
+ tracer?.end(invalidRequestFailure.status, Date.now() - requestStartTime);
1373
+ recordFinalError(invalidRequestFailure.status);
1374
+ try {
1375
+ const parsedError = JSON.parse(invalidRequestFailure.body);
1376
+ logFinalRequest(invalidRequestFailure.status, "", "final", "invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
1377
+ logProxyBody({
1378
+ phase: "client_response",
1379
+ headers: {
1380
+ "content-type": invalidRequestFailure.contentType ?? "application/json",
1381
+ },
1382
+ body: invalidRequestFailure.body,
1383
+ bodySize: Buffer.byteLength(invalidRequestFailure.body, "utf8"),
1384
+ contentType: invalidRequestFailure.contentType ?? "application/json",
1385
+ responseStatus: invalidRequestFailure.status,
1386
+ durationMs: Date.now() - requestStartTime,
1387
+ });
1388
+ return parsedError;
1389
+ }
1390
+ catch {
1391
+ return buildLoggedClaudeError(invalidRequestFailure.status, summarizeErrorMessage(invalidRequestFailure.body), "invalid_request_error");
1392
+ }
1393
+ }
1394
+ if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
1395
+ const msg = `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error
1396
+ ? lastError.message
1397
+ : String(lastError ?? "unknown")}`;
1398
+ tracer?.setError("transient_error", msg.slice(0, 500));
1399
+ tracer?.end(502, Date.now() - requestStartTime);
1400
+ return buildLoggedClaudeError(502, msg);
1401
+ }
1402
+ if (!sawRateLimit) {
1403
+ const msg = `All Anthropic accounts failed. Last error: ${lastError instanceof Error
1404
+ ? lastError.message
1405
+ : String(lastError ?? "unknown")}`;
1406
+ tracer?.setError("all_accounts_failed", msg.slice(0, 500));
1407
+ tracer?.end(502, Date.now() - requestStartTime);
1408
+ return buildLoggedClaudeError(502, msg);
1409
+ }
1410
+ const earliestRecovery = orderedAccounts.reduce((min, account) => {
1411
+ const coolingUntil = getOrCreateRuntimeState(account.key).coolingUntil;
1412
+ return coolingUntil ? Math.min(min, coolingUntil) : min;
1413
+ }, Infinity);
1414
+ const retryAfterSec = Number.isFinite(earliestRecovery)
1415
+ ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
1416
+ : 60;
1417
+ logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1418
+ const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
1419
+ tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
1420
+ tracer?.end(429, Date.now() - requestStartTime);
1421
+ recordFinalError(429);
1422
+ logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
1423
+ const errorBodyText = JSON.stringify(errorBody);
1424
+ logProxyBody({
1425
+ phase: "client_response",
1426
+ headers: {
1427
+ "content-type": "application/json",
1428
+ "retry-after": String(retryAfterSec),
1429
+ },
1430
+ body: errorBodyText,
1431
+ bodySize: Buffer.byteLength(errorBodyText, "utf8"),
1432
+ contentType: "application/json",
1433
+ responseStatus: 429,
1434
+ durationMs: Date.now() - requestStartTime,
1435
+ });
1436
+ return new Response(errorBodyText, {
1437
+ status: 429,
1438
+ headers: {
1439
+ "content-type": "application/json",
1440
+ "retry-after": String(retryAfterSec),
1441
+ },
1442
+ });
1443
+ }
1444
+ async function handleAnthropicSuccessfulResponse(args) {
1445
+ const { ctx, body, account, accountState, response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1446
+ accountState.backoffLevel = 0;
1447
+ accountState.coolingUntil = undefined;
1448
+ accountState.consecutiveRefreshFailures = 0;
1449
+ logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1450
+ const quota = parseQuotaHeaders(response.headers);
1451
+ if (quota) {
1452
+ saveAccountQuota(account.label, quota).catch(() => {
1453
+ // Non-fatal: quota persistence is best-effort
1454
+ });
1455
+ }
1456
+ const responseHeaders = {};
1457
+ response.headers.forEach((value, key) => {
1458
+ responseHeaders[key] = value;
1459
+ });
1460
+ tracer?.logUpstreamResponseHeaders(responseHeaders);
1461
+ if (body.stream) {
1462
+ return handleAnthropicStreamingSuccessResponse({
1463
+ ctx,
1464
+ body,
1465
+ account,
1466
+ accountState,
1467
+ response,
1468
+ responseHeaders,
1469
+ tracer,
1470
+ requestStartTime,
1471
+ fetchStartMs,
1472
+ attemptNumber,
1473
+ finalBodyStr,
1474
+ upstreamSpan,
1475
+ logProxyBody,
1476
+ logFinalRequest,
1477
+ });
1478
+ }
1479
+ return handleAnthropicJsonSuccessResponse({
1480
+ account,
1481
+ response,
1482
+ responseHeaders,
1483
+ tracer,
1484
+ requestStartTime,
1485
+ fetchStartMs,
1486
+ attemptNumber,
1487
+ finalBodyStr,
1488
+ upstreamSpan,
1489
+ logProxyBody,
1490
+ logFinalRequest,
1491
+ });
1492
+ }
1493
+ async function handleAnthropicStreamingSuccessResponse(args) {
1494
+ const { ctx, body, account, accountState, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1495
+ if (!response.body) {
1496
+ upstreamSpan?.end();
1497
+ tracer?.setError("stream_error", "No response body from upstream");
1498
+ tracer?.end(502, Date.now() - requestStartTime);
1499
+ recordFinalError(502, account.label, account.type);
1500
+ logFinalRequest(502, account.label, account.type, "stream_error", "No response body from upstream");
1501
+ const clientError = buildClaudeError(502, "No response body from upstream");
1502
+ const clientErrorBody = JSON.stringify(clientError);
1503
+ logProxyBody({
1504
+ phase: "client_response",
1505
+ headers: { "content-type": "application/json" },
1506
+ body: clientErrorBody,
1507
+ bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
1508
+ contentType: "application/json",
1509
+ account: account.label,
1510
+ accountType: account.type,
1511
+ attempt: attemptNumber,
1512
+ responseStatus: 502,
1513
+ durationMs: Date.now() - requestStartTime,
1514
+ });
1515
+ return { response: clientError };
1516
+ }
1517
+ const reader = response.body.getReader();
1518
+ const firstChunk = await reader.read();
1519
+ if (firstChunk.done || !firstChunk.value || firstChunk.value.length === 0) {
1520
+ reader.cancel();
1521
+ accountState.coolingUntil = Date.now() + 10_000;
1522
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
1523
+ logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
1524
+ tracer?.recordRetry(account.label, "empty_stream");
1525
+ upstreamSpan?.end();
1526
+ return { retryNextAccount: true };
1527
+ }
1528
+ let mainStreamClosed = false;
1529
+ const remainingStream = new ReadableStream({
1530
+ start(controller) {
1531
+ controller.enqueue(firstChunk.value);
1532
+ },
1533
+ async pull(controller) {
1534
+ if (mainStreamClosed) {
1535
+ return;
1536
+ }
1537
+ try {
1538
+ const { done, value } = await reader.read();
1539
+ if (mainStreamClosed) {
1540
+ return;
1541
+ }
1542
+ if (done) {
1543
+ mainStreamClosed = true;
1544
+ controller.close();
1545
+ return;
1546
+ }
1547
+ controller.enqueue(value);
1548
+ }
1549
+ catch (streamErr) {
1550
+ const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
1551
+ logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
1552
+ logStreamError({
1553
+ timestamp: new Date().toISOString(),
1554
+ requestId: ctx.requestId,
1555
+ account: account.label,
1556
+ model: body.model,
1557
+ errorMessage: errMsg,
1558
+ durationMs: Date.now() - fetchStartMs,
1559
+ });
1560
+ if (!mainStreamClosed) {
1561
+ mainStreamClosed = true;
1562
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
1563
+ controller.enqueue(new TextEncoder().encode(errorEvent));
1564
+ controller.close();
1565
+ }
1566
+ }
1567
+ },
1568
+ cancel() {
1569
+ mainStreamClosed = true;
1570
+ reader.cancel();
1571
+ },
1572
+ });
1573
+ const result = attachAnthropicSuccessStreamTelemetry({
1574
+ account,
1575
+ response,
1576
+ responseHeaders,
1577
+ remainingStream,
1578
+ tracer,
1579
+ requestStartTime,
1580
+ attemptNumber,
1581
+ finalBodyStr,
1582
+ upstreamSpan,
1583
+ logProxyBody,
1584
+ logFinalRequest,
1585
+ });
1586
+ return { response: result };
1587
+ }
1588
+ function attachAnthropicSuccessStreamTelemetry(args) {
1589
+ const { account, response, responseHeaders, remainingStream, tracer, requestStartTime, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1590
+ const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
1591
+ let streamSource = remainingStream;
1592
+ if (tracer) {
1593
+ try {
1594
+ const { stream: interceptor, telemetry } = createSSEInterceptor({
1595
+ captureRawText: true,
1596
+ });
1597
+ streamSource = streamSource.pipeThrough(interceptor);
1598
+ const capturedTracer = tracer;
1599
+ const capturedUpstreamSpan = upstreamSpan;
1600
+ const capturedResponse = response;
1601
+ const capturedRequestBytes = finalBodyStr.length;
1602
+ const capturedAccountLabel = account.label;
1603
+ Promise.all([telemetry, clientCapture])
1604
+ .then(([data, clientBody]) => {
1605
+ capturedTracer.setUsage({
1606
+ inputTokens: data.usage.inputTokens,
1607
+ outputTokens: data.usage.outputTokens,
1608
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1609
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1610
+ });
1611
+ capturedTracer.logStreamEvents(data.events);
1612
+ const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
1613
+ const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
1614
+ const usageUpdate = {
1615
+ inputTokens: data.usage.inputTokens,
1616
+ outputTokens: data.usage.outputTokens,
1617
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1618
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1619
+ };
1620
+ if (!isNaN(rateLimit5h)) {
1621
+ usageUpdate.rateLimitAfter5h = rateLimit5h;
1622
+ }
1623
+ if (!isNaN(rateLimit7d)) {
1624
+ usageUpdate.rateLimitAfter7d = rateLimit7d;
1625
+ }
1626
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
1627
+ capturedTracer.setUsage(usageUpdate);
1628
+ }
1629
+ capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
1630
+ capturedTracer.recordMetrics();
1631
+ capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
1632
+ capturedUpstreamSpan?.end();
1633
+ capturedTracer.end(200, Date.now() - requestStartTime);
1634
+ recordFinalSuccess(capturedAccountLabel, account.type);
1635
+ logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
1636
+ inputTokens: data.usage.inputTokens,
1637
+ outputTokens: data.usage.outputTokens,
1638
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1639
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1640
+ });
1641
+ logProxyBody({
1642
+ phase: "upstream_response",
1643
+ headers: responseHeaders,
1644
+ body: data.rawText ?? "",
1645
+ bodySize: data.totalBytesReceived,
1646
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1647
+ account: capturedAccountLabel,
1648
+ accountType: account.type,
1649
+ attempt: attemptNumber,
1650
+ responseStatus: 200,
1651
+ durationMs: Date.now() - requestStartTime,
1652
+ });
1653
+ logProxyBody({
1654
+ phase: "client_response",
1655
+ headers: responseHeaders,
1656
+ body: clientBody.text,
1657
+ bodySize: clientBody.totalBytes,
1658
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1659
+ account: capturedAccountLabel,
1660
+ accountType: account.type,
1661
+ attempt: attemptNumber,
1662
+ responseStatus: 200,
1663
+ durationMs: Date.now() - requestStartTime,
1664
+ });
1665
+ })
1666
+ .catch((error) => {
1667
+ capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
1668
+ capturedUpstreamSpan?.end();
1669
+ capturedTracer.end(500, Date.now() - requestStartTime);
1670
+ recordFinalError(500, capturedAccountLabel, account.type);
1671
+ logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", error instanceof Error ? error.message : String(error));
1672
+ });
1673
+ }
1674
+ catch {
1675
+ // Interceptor attachment failed after stream setup; response handling continues.
1676
+ }
1677
+ }
1678
+ else {
1679
+ upstreamSpan?.end();
1680
+ try {
1681
+ const { stream: noTracerInterceptor, telemetry: noTracerTelemetry } = createSSEInterceptor({
1682
+ captureRawText: true,
1683
+ });
1684
+ streamSource = streamSource.pipeThrough(noTracerInterceptor);
1685
+ const capturedAccountLabel = account.label;
1686
+ Promise.all([noTracerTelemetry, clientCapture])
1687
+ .then(([data, clientBody]) => {
1688
+ recordFinalSuccess(capturedAccountLabel, account.type);
1689
+ logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
1690
+ inputTokens: data.usage.inputTokens,
1691
+ outputTokens: data.usage.outputTokens,
1692
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1693
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1694
+ });
1695
+ logProxyBody({
1696
+ phase: "upstream_response",
1697
+ headers: responseHeaders,
1698
+ body: data.rawText ?? "",
1699
+ bodySize: data.totalBytesReceived,
1700
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1701
+ account: capturedAccountLabel,
1702
+ accountType: account.type,
1703
+ attempt: attemptNumber,
1704
+ responseStatus: 200,
1705
+ durationMs: Date.now() - requestStartTime,
1706
+ });
1707
+ logProxyBody({
1708
+ phase: "client_response",
1709
+ headers: responseHeaders,
1710
+ body: clientBody.text,
1711
+ bodySize: clientBody.totalBytes,
1712
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1713
+ account: capturedAccountLabel,
1714
+ accountType: account.type,
1715
+ attempt: attemptNumber,
1716
+ responseStatus: 200,
1717
+ durationMs: Date.now() - requestStartTime,
1718
+ });
1719
+ })
1720
+ .catch(() => {
1721
+ recordFinalSuccess(account.label, account.type);
1722
+ logFinalRequest(response.status, account.label, account.type);
1723
+ });
1724
+ }
1725
+ catch {
1726
+ clientCapture
1727
+ .then((clientBody) => {
1728
+ logProxyBody({
1729
+ phase: "client_response",
1730
+ headers: responseHeaders,
1731
+ body: clientBody.text,
1732
+ bodySize: clientBody.totalBytes,
1733
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1734
+ account: account.label,
1735
+ accountType: account.type,
1736
+ attempt: attemptNumber,
1737
+ responseStatus: 200,
1738
+ durationMs: Date.now() - requestStartTime,
1739
+ });
1740
+ })
1741
+ .catch(() => {
1742
+ // Non-fatal
1743
+ });
1744
+ recordFinalSuccess(account.label, account.type);
1745
+ logFinalRequest(response.status, account.label, account.type);
1746
+ }
1747
+ }
1748
+ const clientStream = streamSource.pipeThrough(clientCaptureStream);
1749
+ const clientResponseHeaders = {
1750
+ "content-type": "text/event-stream",
1751
+ "cache-control": "no-cache",
1752
+ connection: "keep-alive",
1753
+ };
1754
+ for (const headerName of [
1755
+ "retry-after",
1756
+ "anthropic-ratelimit-requests-remaining",
1757
+ "anthropic-ratelimit-requests-limit",
1758
+ "anthropic-ratelimit-tokens-remaining",
1759
+ "anthropic-ratelimit-tokens-limit",
1760
+ ]) {
1761
+ const value = response.headers.get(headerName);
1762
+ if (value) {
1763
+ clientResponseHeaders[headerName] = value;
1764
+ }
1765
+ }
1766
+ return new Response(clientStream, {
1767
+ status: response.status,
1768
+ headers: clientResponseHeaders,
1769
+ });
1770
+ }
1771
+ async function handleAnthropicJsonSuccessResponse(args) {
1772
+ const { account, response, responseHeaders, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1773
+ const responseText = await response.text();
1774
+ tracer?.logUpstreamResponseBody(responseText);
1775
+ logProxyBody({
1776
+ phase: "upstream_response",
1777
+ headers: responseHeaders,
1778
+ body: responseText,
1779
+ bodySize: Buffer.byteLength(responseText, "utf8"),
1780
+ contentType: responseHeaders["content-type"] ?? "application/json",
1781
+ account: account.label,
1782
+ accountType: account.type,
1783
+ attempt: attemptNumber,
1784
+ responseStatus: response.status,
1785
+ durationMs: Date.now() - fetchStartMs,
1786
+ });
1787
+ logProxyBody({
1788
+ phase: "client_response",
1789
+ headers: responseHeaders,
1790
+ body: responseText,
1791
+ bodySize: Buffer.byteLength(responseText, "utf8"),
1792
+ contentType: responseHeaders["content-type"] ?? "application/json",
1793
+ account: account.label,
1794
+ accountType: account.type,
1795
+ attempt: attemptNumber,
1796
+ responseStatus: response.status,
1797
+ durationMs: Date.now() - requestStartTime,
1798
+ });
1799
+ const responseJson = JSON.parse(responseText);
1800
+ if (tracer && responseJson && typeof responseJson === "object") {
1801
+ const usage = responseJson.usage;
1802
+ if (usage) {
1803
+ tracer.setUsage({
1804
+ inputTokens: usage.input_tokens ?? 0,
1805
+ outputTokens: usage.output_tokens ?? 0,
1806
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
1807
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
1808
+ });
1809
+ const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ??
1810
+ "");
1811
+ const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ??
1812
+ "");
1813
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
1814
+ const usageWithRates = {
1815
+ inputTokens: usage.input_tokens ?? 0,
1816
+ outputTokens: usage.output_tokens ?? 0,
1817
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
1818
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
1819
+ };
1820
+ if (!isNaN(rateLimit5h)) {
1821
+ usageWithRates.rateLimitAfter5h = rateLimit5h;
1822
+ }
1823
+ if (!isNaN(rateLimit7d)) {
1824
+ usageWithRates.rateLimitAfter7d = rateLimit7d;
1825
+ }
1826
+ tracer.setUsage(usageWithRates);
1827
+ }
1828
+ }
1829
+ tracer.recordMetrics();
1830
+ const responseJsonStr = JSON.stringify(responseJson);
1831
+ tracer.recordBodySizes(finalBodyStr.length, responseJsonStr.length);
1832
+ upstreamSpan?.end();
1833
+ tracer.end(response.status, Date.now() - requestStartTime);
1834
+ recordFinalSuccess(account.label, account.type);
1835
+ logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
1836
+ inputTokens: usage?.input_tokens,
1837
+ outputTokens: usage?.output_tokens,
1838
+ cacheCreationTokens: usage?.cache_creation_input_tokens,
1839
+ cacheReadTokens: usage?.cache_read_input_tokens,
1840
+ });
1841
+ }
1842
+ else {
1843
+ upstreamSpan?.end();
1844
+ const noTracerUsage = responseJson && typeof responseJson === "object"
1845
+ ? responseJson.usage
1846
+ : undefined;
1847
+ recordFinalSuccess(account.label, account.type);
1848
+ logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
1849
+ inputTokens: noTracerUsage?.input_tokens,
1850
+ outputTokens: noTracerUsage?.output_tokens,
1851
+ cacheCreationTokens: noTracerUsage?.cache_creation_input_tokens,
1852
+ cacheReadTokens: noTracerUsage?.cache_read_input_tokens,
1853
+ });
1854
+ }
1855
+ return { response: responseJson };
1856
+ }
1857
+ async function handleAnthropicSuccessfulRetryResponse(args) {
1858
+ const { ctx, body, account, retryResp, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logProxyBody, logFinalRequest, } = args;
1859
+ const retryQuota = parseQuotaHeaders(retryResp.headers);
1860
+ if (retryQuota) {
1861
+ saveAccountQuota(account.label, retryQuota).catch((error) => {
1862
+ logger.debug("[proxy] Failed to persist account quota after auth retry", {
1863
+ account: account.label,
1864
+ error: error instanceof Error ? error.message : String(error),
1865
+ });
1866
+ });
1867
+ }
1868
+ if (body.stream && retryResp.body) {
1869
+ const retryReader = retryResp.body.getReader();
1870
+ let retryStreamClosed = false;
1871
+ const retryStream = new ReadableStream({
1872
+ async pull(controller) {
1873
+ if (retryStreamClosed) {
1874
+ return;
1875
+ }
1876
+ try {
1877
+ const { done, value } = await retryReader.read();
1878
+ if (retryStreamClosed) {
1879
+ return;
1880
+ }
1881
+ if (done) {
1882
+ retryStreamClosed = true;
1883
+ controller.close();
1884
+ return;
1885
+ }
1886
+ controller.enqueue(value);
1887
+ }
1888
+ catch (streamErr) {
1889
+ const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
1890
+ logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
1891
+ logStreamError({
1892
+ timestamp: new Date().toISOString(),
1893
+ requestId: ctx.requestId,
1894
+ account: account.label,
1895
+ model: body.model,
1896
+ errorMessage: errMsg,
1897
+ durationMs: Date.now() - fetchStartMs,
1898
+ });
1899
+ if (!retryStreamClosed) {
1900
+ retryStreamClosed = true;
1901
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
1902
+ controller.enqueue(new TextEncoder().encode(errorEvent));
1903
+ controller.close();
1904
+ }
1905
+ }
1906
+ },
1907
+ cancel() {
1908
+ retryStreamClosed = true;
1909
+ retryReader.cancel();
1910
+ },
1911
+ });
1912
+ let retryClientStream = retryStream;
1913
+ if (tracer) {
1914
+ try {
1915
+ const { stream: retryInterceptor, telemetry: retryTelemetry } = createSSEInterceptor();
1916
+ retryClientStream = retryStream.pipeThrough(retryInterceptor);
1917
+ const capturedTracer = tracer;
1918
+ const capturedUpstreamSpan = upstreamSpan;
1919
+ const capturedRetryResp = retryResp;
1920
+ const capturedRetryRequestBytes = finalBodyStr.length;
1921
+ const capturedAccountLabel = account.label;
1922
+ retryTelemetry
1923
+ .then((data) => {
1924
+ capturedTracer.setUsage({
1925
+ inputTokens: data.usage.inputTokens,
1926
+ outputTokens: data.usage.outputTokens,
1927
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1928
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1929
+ });
1930
+ capturedTracer.logStreamEvents(data.events);
1931
+ capturedTracer.logUpstreamResponseHeaders(Object.fromEntries([...capturedRetryResp.headers.entries()]));
1932
+ capturedTracer.recordMetrics();
1933
+ capturedTracer.recordBodySizes(capturedRetryRequestBytes, data.totalBytesReceived);
1934
+ capturedUpstreamSpan?.end();
1935
+ capturedTracer.end(200, Date.now() - requestStartTime);
1936
+ recordFinalSuccess(capturedAccountLabel, account.type);
1937
+ logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
1938
+ inputTokens: data.usage.inputTokens,
1939
+ outputTokens: data.usage.outputTokens,
1940
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1941
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1942
+ });
1943
+ })
1944
+ .catch((error) => {
1945
+ capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
1946
+ capturedUpstreamSpan?.end();
1947
+ capturedTracer.end(500, Date.now() - requestStartTime);
1948
+ recordFinalError(500, capturedAccountLabel, account.type);
1949
+ logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", error instanceof Error ? error.message : String(error));
1950
+ });
1951
+ }
1952
+ catch {
1953
+ retryClientStream = retryStream;
1954
+ }
1955
+ }
1956
+ const responseHeaders = {
1957
+ "content-type": "text/event-stream",
1958
+ "cache-control": "no-cache",
1959
+ connection: "keep-alive",
1960
+ };
1961
+ for (const headerName of [
1962
+ "retry-after",
1963
+ "anthropic-ratelimit-requests-remaining",
1964
+ "anthropic-ratelimit-requests-limit",
1965
+ "anthropic-ratelimit-tokens-remaining",
1966
+ "anthropic-ratelimit-tokens-limit",
1967
+ ]) {
1968
+ const value = retryResp.headers.get(headerName);
1969
+ if (value) {
1970
+ responseHeaders[headerName] = value;
1971
+ }
1972
+ }
1973
+ return new Response(retryClientStream, {
1974
+ status: retryResp.status,
1975
+ headers: responseHeaders,
1976
+ });
1977
+ }
1978
+ const retryRespHeaders = Object.fromEntries([...retryResp.headers.entries()]);
1979
+ const retryText = await retryResp.text();
1980
+ tracer?.logUpstreamResponseHeaders(retryRespHeaders);
1981
+ tracer?.logUpstreamResponseBody(retryText);
1982
+ logProxyBody({
1983
+ phase: "upstream_response",
1984
+ headers: retryRespHeaders,
1985
+ body: retryText,
1986
+ bodySize: Buffer.byteLength(retryText, "utf8"),
1987
+ contentType: retryRespHeaders["content-type"] ?? "application/json",
1988
+ account: account.label,
1989
+ accountType: account.type,
1990
+ attempt: attemptNumber,
1991
+ responseStatus: retryResp.status,
1992
+ durationMs: Date.now() - fetchStartMs,
1993
+ });
1994
+ logProxyBody({
1995
+ phase: "client_response",
1996
+ headers: retryRespHeaders,
1997
+ body: retryText,
1998
+ bodySize: Buffer.byteLength(retryText, "utf8"),
1999
+ contentType: retryRespHeaders["content-type"] ?? "application/json",
2000
+ account: account.label,
2001
+ accountType: account.type,
2002
+ attempt: attemptNumber,
2003
+ responseStatus: retryResp.status,
2004
+ durationMs: Date.now() - requestStartTime,
2005
+ });
2006
+ const retryJson = JSON.parse(retryText);
2007
+ if (tracer && retryJson && typeof retryJson === "object") {
2008
+ const retryUsage = retryJson.usage;
2009
+ if (retryUsage) {
2010
+ tracer.setUsage({
2011
+ inputTokens: retryUsage.input_tokens ?? 0,
2012
+ outputTokens: retryUsage.output_tokens ?? 0,
2013
+ cacheCreationTokens: retryUsage.cache_creation_input_tokens ?? 0,
2014
+ cacheReadTokens: retryUsage.cache_read_input_tokens ?? 0,
2015
+ });
2016
+ }
2017
+ tracer.recordMetrics();
2018
+ const retryJsonStr = JSON.stringify(retryJson);
2019
+ tracer.recordBodySizes(finalBodyStr.length, retryJsonStr.length);
2020
+ upstreamSpan?.end();
2021
+ tracer.end(retryResp.status, Date.now() - requestStartTime);
2022
+ recordFinalSuccess(account.label, account.type);
2023
+ logFinalRequest(retryResp.status, account.label, account.type, undefined, undefined, {
2024
+ inputTokens: retryUsage?.input_tokens,
2025
+ outputTokens: retryUsage?.output_tokens,
2026
+ cacheCreationTokens: retryUsage?.cache_creation_input_tokens,
2027
+ cacheReadTokens: retryUsage?.cache_read_input_tokens,
2028
+ });
2029
+ }
2030
+ else {
2031
+ upstreamSpan?.end();
2032
+ recordFinalSuccess(account.label, account.type);
2033
+ logFinalRequest(retryResp.status, account.label, account.type);
2034
+ }
2035
+ return retryJson;
2036
+ }
2037
+ async function handleAnthropicAuthRetry(args) {
2038
+ const { ctx, body, account, accountState, headers, buildUpstreamBody, enabledAccounts, orderedAccounts, response: _response, tracer, requestStartTime, fetchStartMs, attemptNumber, finalBodyStr, upstreamSpan, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawRateLimit, sawTransientFailure, sawNetworkError, } = args;
2039
+ recordAttemptError(account.label, account.type, 401);
2040
+ let currentLastError = lastError;
2041
+ let currentAuthFailureMessage = authFailureMessage;
2042
+ let currentSawRateLimit = sawRateLimit;
2043
+ let currentSawTransientFailure = sawTransientFailure;
2044
+ let currentSawNetworkError = sawNetworkError;
2045
+ let currentUpstreamSpan = upstreamSpan;
2046
+ let authRetrySucceeded = false;
2047
+ let authRetryError = "received 401 from Anthropic";
2048
+ for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
2049
+ logger.always(`[proxy] ← 401 account=${account.label} refreshing (attempt ${authRetry + 1}/${MAX_AUTH_RETRIES})`);
2050
+ const refreshSucceeded = await refreshToken(account);
2051
+ if (!refreshSucceeded.success) {
2052
+ accountState.consecutiveRefreshFailures += 1;
2053
+ authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
2054
+ currentLastError = authRetryError;
2055
+ logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
2056
+ if (accountState.consecutiveRefreshFailures >=
2057
+ MAX_CONSECUTIVE_REFRESH_FAILURES) {
2058
+ await disableAccountUntilReauth(account, accountState);
2059
+ currentAuthFailureMessage = formatReauthMessage(account.label);
2060
+ break;
2061
+ }
2062
+ if (authRetry < MAX_AUTH_RETRIES - 1) {
2063
+ await sleep(2000);
2064
+ }
2065
+ continue;
2066
+ }
2067
+ if (account.persistTarget) {
2068
+ await persistTokens(account.persistTarget, account);
2069
+ }
2070
+ headers.authorization = `Bearer ${account.token}`;
2071
+ try {
2072
+ const retryResp = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
2073
+ method: "POST",
2074
+ headers,
2075
+ body: buildUpstreamBody(account.token).bodyStr,
2076
+ signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
2077
+ });
2078
+ if (retryResp.ok) {
2079
+ authRetrySucceeded = true;
2080
+ accountState.consecutiveRefreshFailures = 0;
2081
+ accountState.backoffLevel = 0;
2082
+ accountState.coolingUntil = undefined;
2083
+ logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
2084
+ const successResponse = await handleAnthropicSuccessfulRetryResponse({
2085
+ ctx,
2086
+ body,
2087
+ account,
2088
+ retryResp,
2089
+ tracer,
2090
+ requestStartTime,
2091
+ fetchStartMs,
2092
+ attemptNumber,
2093
+ finalBodyStr,
2094
+ upstreamSpan: currentUpstreamSpan,
2095
+ logProxyBody,
2096
+ logFinalRequest,
2097
+ });
2098
+ return {
2099
+ response: successResponse,
2100
+ continueLoop: false,
2101
+ lastError: currentLastError,
2102
+ authFailureMessage: currentAuthFailureMessage,
2103
+ sawRateLimit: currentSawRateLimit,
2104
+ sawTransientFailure: currentSawTransientFailure,
2105
+ sawNetworkError: currentSawNetworkError,
2106
+ upstreamSpan: undefined,
2107
+ };
2108
+ }
2109
+ const retryStatus = retryResp.status;
2110
+ const retryBody = await retryResp.text();
2111
+ authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
2112
+ currentLastError = retryBody;
2113
+ logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
2114
+ recordAttemptError(account.label, account.type, retryStatus);
2115
+ if (retryStatus === 429) {
2116
+ currentSawRateLimit = true;
2117
+ const retryAfter = retryResp.headers.get("retry-after");
2118
+ const parsedRetryAfter = parseInt(retryAfter ?? "", 10);
2119
+ const cooldownMs = Number.isNaN(parsedRetryAfter)
2120
+ ? 60_000
2121
+ : Math.max(1, parsedRetryAfter) * 1000;
2122
+ accountState.coolingUntil = Date.now() + cooldownMs;
2123
+ advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
2124
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2125
+ break;
2126
+ }
2127
+ if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
2128
+ if (authRetry < MAX_AUTH_RETRIES - 1) {
2129
+ await sleep(1000);
2130
+ }
2131
+ continue;
2132
+ }
2133
+ if (isTransientHttpFailure(retryStatus, retryBody)) {
2134
+ currentSawTransientFailure = true;
2135
+ break;
2136
+ }
2137
+ logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
2138
+ recordFinalError(retryStatus, account.label, account.type);
2139
+ try {
2140
+ logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
2141
+ return {
2142
+ response: JSON.parse(retryBody),
2143
+ continueLoop: false,
2144
+ lastError: currentLastError,
2145
+ authFailureMessage: currentAuthFailureMessage,
2146
+ sawRateLimit: currentSawRateLimit,
2147
+ sawTransientFailure: currentSawTransientFailure,
2148
+ sawNetworkError: currentSawNetworkError,
2149
+ upstreamSpan: currentUpstreamSpan,
2150
+ };
2151
+ }
2152
+ catch {
2153
+ logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
2154
+ return {
2155
+ response: buildClaudeError(retryStatus, retryBody),
2156
+ continueLoop: false,
2157
+ lastError: currentLastError,
2158
+ authFailureMessage: currentAuthFailureMessage,
2159
+ sawRateLimit: currentSawRateLimit,
2160
+ sawTransientFailure: currentSawTransientFailure,
2161
+ sawNetworkError: currentSawNetworkError,
2162
+ upstreamSpan: currentUpstreamSpan,
2163
+ };
2164
+ }
2165
+ }
2166
+ catch (retryFetchErr) {
2167
+ currentSawNetworkError = true;
2168
+ recordAttemptError(account.label, account.type, 502);
2169
+ const message = retryFetchErr instanceof Error
2170
+ ? retryFetchErr.message
2171
+ : String(retryFetchErr);
2172
+ authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
2173
+ currentLastError = authRetryError;
2174
+ logger.debug(`[proxy] ${authRetryError}`);
2175
+ break;
2176
+ }
2177
+ }
2178
+ if (!authRetrySucceeded) {
2179
+ if (!accountState.permanentlyDisabled) {
2180
+ if (!accountState.coolingUntil ||
2181
+ accountState.coolingUntil <= Date.now()) {
2182
+ accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
2183
+ }
2184
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2185
+ }
2186
+ currentLastError = authRetryError;
2187
+ logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
2188
+ logAttempt(401, "authentication_error", authRetryError);
2189
+ tracer?.setError("authentication_error", authRetryError);
2190
+ tracer?.recordRetry(account.label, "auth_exhausted");
2191
+ currentUpstreamSpan?.end();
2192
+ currentUpstreamSpan = undefined;
2193
+ }
2194
+ return {
2195
+ continueLoop: true,
2196
+ lastError: currentLastError,
2197
+ authFailureMessage: currentAuthFailureMessage,
2198
+ sawRateLimit: currentSawRateLimit,
2199
+ sawTransientFailure: currentSawTransientFailure,
2200
+ sawNetworkError: currentSawNetworkError,
2201
+ upstreamSpan: currentUpstreamSpan,
2202
+ };
2203
+ }
2204
+ function buildAnthropicTerminalErrorResponse(args) {
2205
+ const { responseStatus, account, errBody, errRespHeaders, requestStartTime, attemptNumber, logProxyBody, logFinalRequest, errorType, } = args;
2206
+ try {
2207
+ const parsedError = JSON.parse(errBody);
2208
+ logFinalRequest(responseStatus, account.label, account.type, errorType, summarizeErrorMessage(errBody));
2209
+ logProxyBody({
2210
+ phase: "client_response",
2211
+ headers: {
2212
+ "content-type": errRespHeaders["content-type"] ?? "application/json",
2213
+ },
2214
+ body: errBody,
2215
+ bodySize: Buffer.byteLength(errBody, "utf8"),
2216
+ contentType: errRespHeaders["content-type"] ?? "application/json",
2217
+ account: account.label,
2218
+ accountType: account.type,
2219
+ attempt: attemptNumber,
2220
+ responseStatus,
2221
+ durationMs: Date.now() - requestStartTime,
2222
+ });
2223
+ return parsedError;
2224
+ }
2225
+ catch {
2226
+ logFinalRequest(responseStatus, account.label, account.type, errorType, summarizeErrorMessage(errBody));
2227
+ const clientError = buildClaudeError(responseStatus, errBody);
2228
+ const clientErrorBody = JSON.stringify(clientError);
2229
+ logProxyBody({
2230
+ phase: "client_response",
2231
+ headers: { "content-type": "application/json" },
2232
+ body: clientErrorBody,
2233
+ bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
2234
+ contentType: "application/json",
2235
+ account: account.label,
2236
+ accountType: account.type,
2237
+ attempt: attemptNumber,
2238
+ responseStatus,
2239
+ durationMs: Date.now() - requestStartTime,
2240
+ });
2241
+ return clientError;
2242
+ }
2243
+ }
2244
+ async function handleAnthropicNonOkResponse(args) {
2245
+ const { response, account, accountState, tracer, requestStartTime, fetchStartMs, attemptNumber, logAttempt, logProxyBody, logFinalRequest, lastError, authFailureMessage, sawTransientFailure, invalidRequestFailure, maxConsecutiveRefreshFailures, } = args;
2246
+ let currentLastError = lastError;
2247
+ let currentAuthFailureMessage = authFailureMessage;
2248
+ let currentSawTransientFailure = sawTransientFailure;
2249
+ let currentInvalidRequestFailure = invalidRequestFailure;
2250
+ const errBody = await response.text();
2251
+ const errRespHeaders = {};
2252
+ response.headers.forEach((value, key) => {
2253
+ errRespHeaders[key] = value;
2254
+ });
2255
+ tracer?.logUpstreamResponseHeaders(errRespHeaders);
2256
+ tracer?.logUpstreamResponseBody(errBody);
2257
+ logProxyBody({
2258
+ phase: "upstream_response",
2259
+ headers: errRespHeaders,
2260
+ body: errBody,
2261
+ bodySize: Buffer.byteLength(errBody, "utf8"),
2262
+ contentType: errRespHeaders["content-type"] ?? "application/json",
2263
+ account: account.label,
2264
+ accountType: account.type,
2265
+ attempt: attemptNumber,
2266
+ responseStatus: response.status,
2267
+ durationMs: Date.now() - fetchStartMs,
2268
+ });
2269
+ if (isInvalidRequestError(response.status, errBody)) {
2270
+ logger.always(`[proxy] ← ${response.status} upstream invalid_request_error`);
2271
+ logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
2272
+ tracer?.setError("invalid_request_error", summarizeErrorMessage(errBody));
2273
+ currentInvalidRequestFailure = {
2274
+ status: response.status,
2275
+ body: errBody,
2276
+ contentType: errRespHeaders["content-type"],
2277
+ };
2278
+ currentLastError = summarizeErrorMessage(errBody);
2279
+ return {
2280
+ continueLoop: false,
2281
+ lastError: currentLastError,
2282
+ authFailureMessage: currentAuthFailureMessage,
2283
+ sawTransientFailure: currentSawTransientFailure,
2284
+ invalidRequestFailure: currentInvalidRequestFailure,
2285
+ upstreamSpan: undefined,
2286
+ };
2287
+ }
2288
+ if ((response.status === 401 ||
2289
+ response.status === 402 ||
2290
+ response.status === 403) &&
2291
+ account.type === "oauth" &&
2292
+ !account.refreshToken) {
2293
+ recordAttemptError(account.label, account.type, response.status);
2294
+ accountState.consecutiveRefreshFailures += 1;
2295
+ accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
2296
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2297
+ if (accountState.consecutiveRefreshFailures >= maxConsecutiveRefreshFailures) {
2298
+ await disableAccountUntilReauth(account, accountState);
2299
+ }
2300
+ currentAuthFailureMessage = formatReauthMessage(account.label);
2301
+ logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
2302
+ currentLastError = errBody;
2303
+ logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
2304
+ tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
2305
+ tracer?.recordRetry(account.label, "auth_no_refresh");
2306
+ return {
2307
+ continueLoop: true,
2308
+ lastError: currentLastError,
2309
+ authFailureMessage: currentAuthFailureMessage,
2310
+ sawTransientFailure: currentSawTransientFailure,
2311
+ invalidRequestFailure: currentInvalidRequestFailure,
2312
+ upstreamSpan: undefined,
2313
+ };
2314
+ }
2315
+ if ((response.status === 401 ||
2316
+ response.status === 402 ||
2317
+ response.status === 403) &&
2318
+ account.type === "api_key") {
2319
+ recordAttemptError(account.label, account.type, response.status);
2320
+ currentAuthFailureMessage =
2321
+ "Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
2322
+ accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
2323
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2324
+ logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
2325
+ currentLastError = errBody;
2326
+ logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
2327
+ tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
2328
+ tracer?.recordRetry(account.label, "auth_api_key");
2329
+ return {
2330
+ continueLoop: true,
2331
+ lastError: currentLastError,
2332
+ authFailureMessage: currentAuthFailureMessage,
2333
+ sawTransientFailure: currentSawTransientFailure,
2334
+ invalidRequestFailure: currentInvalidRequestFailure,
2335
+ upstreamSpan: undefined,
2336
+ };
2337
+ }
2338
+ if (response.status === 404) {
2339
+ recordFinalError(response.status, account.label, account.type);
2340
+ logger.always(`[proxy] ← 404 account=${account.label}`);
2341
+ logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
2342
+ tracer?.setError("not_found_error", summarizeErrorMessage(errBody));
2343
+ tracer?.end(404, Date.now() - requestStartTime);
2344
+ return {
2345
+ response: buildAnthropicTerminalErrorResponse({
2346
+ responseStatus: 404,
2347
+ account,
2348
+ errBody,
2349
+ errRespHeaders,
2350
+ requestStartTime,
2351
+ attemptNumber,
2352
+ logProxyBody,
2353
+ logFinalRequest,
2354
+ errorType: "not_found_error",
2355
+ }),
2356
+ continueLoop: false,
2357
+ lastError: currentLastError,
2358
+ authFailureMessage: currentAuthFailureMessage,
2359
+ sawTransientFailure: currentSawTransientFailure,
2360
+ invalidRequestFailure: currentInvalidRequestFailure,
2361
+ upstreamSpan: undefined,
2362
+ };
2363
+ }
2364
+ if (isTransientHttpFailure(response.status, errBody)) {
2365
+ recordAttemptError(account.label, account.type, response.status);
2366
+ currentSawTransientFailure = true;
2367
+ logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
2368
+ currentLastError = errBody;
2369
+ logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
2370
+ tracer?.setError("transient_error", summarizeErrorMessage(errBody));
2371
+ tracer?.recordRetry(account.label, "transient");
2372
+ return {
2373
+ continueLoop: true,
2374
+ lastError: currentLastError,
2375
+ authFailureMessage: currentAuthFailureMessage,
2376
+ sawTransientFailure: currentSawTransientFailure,
2377
+ invalidRequestFailure: currentInvalidRequestFailure,
2378
+ upstreamSpan: undefined,
2379
+ };
2380
+ }
2381
+ recordFinalError(response.status, account.label, account.type);
2382
+ logger.always(`[proxy] ← ${response.status} account=${account.label}`);
2383
+ logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
2384
+ logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
2385
+ tracer?.setError("api_error", summarizeErrorMessage(errBody));
2386
+ tracer?.end(response.status, Date.now() - requestStartTime);
2387
+ return {
2388
+ response: buildAnthropicTerminalErrorResponse({
2389
+ responseStatus: response.status,
2390
+ account,
2391
+ errBody,
2392
+ errRespHeaders,
2393
+ requestStartTime,
2394
+ attemptNumber,
2395
+ logProxyBody,
2396
+ logFinalRequest,
2397
+ errorType: "api_error",
2398
+ }),
2399
+ continueLoop: false,
2400
+ lastError: currentLastError,
2401
+ authFailureMessage: currentAuthFailureMessage,
2402
+ sawTransientFailure: currentSawTransientFailure,
2403
+ invalidRequestFailure: currentInvalidRequestFailure,
2404
+ upstreamSpan: undefined,
2405
+ };
2406
+ }
2407
+ function createClaudeRequestRuntimeContext(args) {
2408
+ const { ctx, body, clientRequestBody } = args;
2409
+ let tracer;
2410
+ try {
2411
+ tracer = ProxyTracer.startRequest({
2412
+ requestId: ctx.requestId,
2413
+ method: ctx.method,
2414
+ path: ctx.path,
2415
+ model: body.model,
2416
+ stream: body.stream ?? false,
2417
+ toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
2418
+ sessionId: ctx.headers["x-neurolink-session-id"] ??
2419
+ ctx.headers["x-claude-code-session-id"] ??
2420
+ undefined,
2421
+ userAgent: ctx.headers["user-agent"] ?? undefined,
2422
+ }, ctx.headers);
2423
+ const receiveSpan = tracer.startReceive();
2424
+ tracer.logRequestHeaders(ctx.headers);
2425
+ tracer.logRequestBody(clientRequestBody);
2426
+ receiveSpan.end();
2427
+ }
2428
+ catch {
2429
+ tracer = undefined;
2430
+ }
2431
+ const requestStartTime = Date.now();
2432
+ const logProxyBody = (capture) => {
2433
+ const traceCtx = tracer?.getTraceContext();
2434
+ void logBodyCapture({
2435
+ timestamp: new Date().toISOString(),
2436
+ requestId: ctx.requestId,
2437
+ model: body.model,
2438
+ stream: body.stream ?? false,
2439
+ ...capture,
2440
+ ...(traceCtx
2441
+ ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
2442
+ : {}),
2443
+ });
2444
+ };
2445
+ const logFinalRequest = (status, accountLabel, accountType, errorType, errorMessage, extra) => {
2446
+ const traceCtx = tracer?.getTraceContext();
2447
+ logRequest({
2448
+ timestamp: new Date().toISOString(),
2449
+ requestId: ctx.requestId,
2450
+ method: ctx.method,
2451
+ path: ctx.path,
2452
+ model: body.model,
2453
+ stream: !!body.stream,
2454
+ toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
2455
+ account: accountLabel,
2456
+ accountType,
2457
+ responseStatus: status,
2458
+ responseTimeMs: Date.now() - requestStartTime,
2459
+ ...(errorType ? { errorType } : {}),
2460
+ ...(errorMessage ? { errorMessage } : {}),
2461
+ ...(extra?.inputTokens !== undefined
2462
+ ? { inputTokens: extra.inputTokens }
2463
+ : {}),
2464
+ ...(extra?.outputTokens !== undefined
2465
+ ? { outputTokens: extra.outputTokens }
2466
+ : {}),
2467
+ ...(extra?.cacheCreationTokens !== undefined
2468
+ ? { cacheCreationTokens: extra.cacheCreationTokens }
2469
+ : {}),
2470
+ ...(extra?.cacheReadTokens !== undefined
2471
+ ? { cacheReadTokens: extra.cacheReadTokens }
2472
+ : {}),
2473
+ ...(traceCtx
2474
+ ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
2475
+ : {}),
2476
+ });
2477
+ };
2478
+ const buildLoggedClaudeError = (status, message, errorType, extra) => {
2479
+ const errorBody = buildClaudeError(status, message, errorType);
2480
+ const errorBodyText = JSON.stringify(errorBody);
2481
+ recordFinalError(status, extra?.account, extra?.accountType);
2482
+ logFinalRequest(status, extra?.account ?? "", extra?.accountType ?? "final", errorType, message);
2483
+ logProxyBody({
2484
+ phase: "client_response",
2485
+ headers: { "content-type": "application/json" },
2486
+ body: errorBodyText,
2487
+ bodySize: Buffer.byteLength(errorBodyText, "utf8"),
2488
+ contentType: "application/json",
2489
+ responseStatus: status,
2490
+ durationMs: Date.now() - requestStartTime,
2491
+ ...extra,
2492
+ });
2493
+ return errorBody;
2494
+ };
2495
+ logProxyBody({
2496
+ phase: "client_request",
2497
+ headers: ctx.headers,
2498
+ body: clientRequestBody,
2499
+ bodySize: Buffer.byteLength(clientRequestBody, "utf8"),
2500
+ contentType: ctx.headers["content-type"] ?? "application/json",
2501
+ });
2502
+ return {
2503
+ tracer,
2504
+ requestStartTime,
2505
+ logProxyBody,
2506
+ logFinalRequest,
2507
+ buildLoggedClaudeError,
2508
+ };
2509
+ }
2510
+ function createAnthropicAttemptLogger(args) {
2511
+ const { ctx, body, toolCount, requestStart, tracer, account, attemptNumber } = args;
2512
+ return (status, errorType, errorMessage, extra) => {
2513
+ const traceCtx = tracer?.getTraceContext();
2514
+ logRequestAttempt({
2515
+ timestamp: new Date().toISOString(),
2516
+ requestId: ctx.requestId,
2517
+ attempt: attemptNumber,
2518
+ method: ctx.method,
2519
+ path: ctx.path,
2520
+ model: body.model,
2521
+ stream: !!body.stream,
2522
+ toolCount,
2523
+ account: account.label,
2524
+ accountType: account.type,
2525
+ responseStatus: status,
2526
+ responseTimeMs: Date.now() - requestStart,
2527
+ ...(errorType ? { errorType } : {}),
2528
+ ...(errorMessage ? { errorMessage } : {}),
2529
+ ...(extra?.inputTokens !== undefined
2530
+ ? { inputTokens: extra.inputTokens }
2531
+ : {}),
2532
+ ...(extra?.outputTokens !== undefined
2533
+ ? { outputTokens: extra.outputTokens }
2534
+ : {}),
2535
+ ...(extra?.cacheCreationTokens !== undefined
2536
+ ? { cacheCreationTokens: extra.cacheCreationTokens }
2537
+ : {}),
2538
+ ...(extra?.cacheReadTokens !== undefined
2539
+ ? { cacheReadTokens: extra.cacheReadTokens }
2540
+ : {}),
2541
+ ...(traceCtx
2542
+ ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId }
2543
+ : {}),
2544
+ });
2545
+ };
2546
+ }
2547
+ async function prepareAnthropicAccountAttempt(args) {
2548
+ const { account, accountState, bodyStr, clientHeaders, isClaudeClientRequest, url, tracer, attemptNumber, currentLastError, currentAuthFailureMessage, logAttempt, logProxyBody, } = args;
2549
+ let lastError = currentLastError;
2550
+ let authFailureMessage = currentAuthFailureMessage;
2551
+ if (needsRefresh(account)) {
2552
+ const refreshed = await refreshToken(account);
2553
+ if (refreshed.success) {
2554
+ if (account.persistTarget) {
2555
+ await persistTokens(account.persistTarget, account);
2556
+ }
2557
+ accountState.consecutiveRefreshFailures = 0;
2558
+ }
2559
+ else {
2560
+ accountState.consecutiveRefreshFailures += 1;
2561
+ lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
2562
+ logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
2563
+ if (accountState.consecutiveRefreshFailures >=
2564
+ MAX_CONSECUTIVE_REFRESH_FAILURES) {
2565
+ await disableAccountUntilReauth(account, accountState);
2566
+ authFailureMessage = formatReauthMessage(account.label);
2567
+ logAttempt(401, "authentication_error", String(lastError));
2568
+ return {
2569
+ continueLoop: true,
2570
+ lastError,
2571
+ authFailureMessage,
2572
+ };
2573
+ }
2574
+ }
2575
+ }
2576
+ const isOAuth = account.type === "oauth";
2577
+ const filteredHeaders = {};
2578
+ for (const [k, v] of Object.entries(clientHeaders)) {
2579
+ if (typeof v === "string") {
2580
+ filteredHeaders[k] = v;
2581
+ }
2582
+ }
2583
+ const snapshot = isOAuth
2584
+ ? await maybeRefreshClaudeSnapshot(account.label, account.key, filteredHeaders, bodyStr)
2585
+ : null;
2586
+ const headers = {};
2587
+ for (const [headerKey, headerValue] of Object.entries(clientHeaders)) {
2588
+ const lower = headerKey.toLowerCase();
2589
+ if (typeof headerValue === "string" &&
2590
+ !BLOCKED_UPSTREAM_HEADERS.has(lower)) {
2591
+ headers[lower] = headerValue;
2592
+ }
2593
+ }
2594
+ headers["content-type"] = "application/json";
2595
+ if (isOAuth) {
2596
+ headers.authorization = `Bearer ${account.token}`;
2597
+ delete headers["x-api-key"];
2598
+ applySnapshotHeaders(headers, snapshot);
2599
+ }
2600
+ else {
2601
+ headers["x-api-key"] = account.token;
2602
+ delete headers.authorization;
2603
+ }
2604
+ if (!headers["user-agent"]) {
2605
+ headers["user-agent"] = CLAUDE_CLI_USER_AGENT;
2606
+ }
2607
+ if (!headers["anthropic-version"]) {
2608
+ headers["anthropic-version"] = "2023-06-01";
2609
+ }
2610
+ if (!headers["anthropic-dangerous-direct-browser-access"]) {
2611
+ headers["anthropic-dangerous-direct-browser-access"] = "true";
2612
+ }
2613
+ if (!headers["x-app"]) {
2614
+ headers["x-app"] = "cli";
2615
+ }
2616
+ if (!headers.accept) {
2617
+ headers.accept = "application/json";
2618
+ }
2619
+ if (isOAuth) {
2620
+ const betaSeed = isClaudeClientRequest
2621
+ ? (headers["anthropic-beta"] ?? "")
2622
+ : (clientHeaders["anthropic-beta"] ?? "");
2623
+ const existing = new Set(betaSeed
2624
+ .split(",")
2625
+ .map((value) => value.trim())
2626
+ .filter(Boolean));
2627
+ for (const beta of isClaudeClientRequest
2628
+ ? CLAUDE_CODE_OAUTH_BETAS
2629
+ : NON_CLAUDE_OAUTH_BETAS) {
2630
+ existing.add(beta);
2631
+ }
2632
+ headers["anthropic-beta"] = [...existing].join(",");
2633
+ }
2634
+ else {
2635
+ const cleaned = (headers["anthropic-beta"] ?? "")
2636
+ .split(",")
2637
+ .map((value) => value.trim())
2638
+ .filter((value) => value && !CLAUDE_CODE_OAUTH_BETAS.includes(value))
2639
+ .join(",");
2640
+ if (cleaned) {
2641
+ headers["anthropic-beta"] = cleaned;
2642
+ }
2643
+ else {
2644
+ delete headers["anthropic-beta"];
2645
+ }
2646
+ }
2647
+ const buildUpstreamBody = (token) => isOAuth
2648
+ ? polyfillOAuthBody(bodyStr, token, snapshot, headers["x-claude-code-session-id"])
2649
+ : { bodyStr };
2650
+ const polyfilledBody = buildUpstreamBody(account.token);
2651
+ if (isOAuth &&
2652
+ polyfilledBody.sessionId &&
2653
+ !headers["x-claude-code-session-id"]) {
2654
+ headers["x-claude-code-session-id"] = polyfilledBody.sessionId;
2655
+ }
2656
+ const finalBodyStr = polyfilledBody.bodyStr;
2657
+ logger.always(`[proxy] → account=${account.label} (${account.type})`);
2658
+ recordAttempt(account.label, account.type);
2659
+ const fetchStartMs = Date.now();
2660
+ let upstreamSpan;
2661
+ if (tracer) {
2662
+ upstreamSpan = tracer.startUpstreamAttempt({
2663
+ attempt: attemptNumber,
2664
+ account: account.label,
2665
+ polyfillHeaders: isOAuth,
2666
+ polyfillBody: isOAuth,
2667
+ upstreamUrl: url,
2668
+ });
2669
+ tracer.logUpstreamRequestHeaders(headers);
2670
+ tracer.logUpstreamRequestBody(finalBodyStr);
2671
+ Object.assign(headers, tracer.getTraceHeaders());
2672
+ }
2673
+ logProxyBody({
2674
+ phase: "upstream_request",
2675
+ headers,
2676
+ body: finalBodyStr,
2677
+ bodySize: Buffer.byteLength(finalBodyStr, "utf8"),
2678
+ contentType: headers["content-type"] ?? "application/json",
2679
+ account: account.label,
2680
+ accountType: account.type,
2681
+ attempt: attemptNumber,
2682
+ });
2683
+ return {
2684
+ continueLoop: false,
2685
+ lastError,
2686
+ authFailureMessage,
2687
+ headers,
2688
+ buildUpstreamBody,
2689
+ finalBodyStr,
2690
+ fetchStartMs,
2691
+ upstreamSpan,
2692
+ };
2693
+ }
2694
+ async function fetchAnthropicAccountResponse(args) {
2695
+ const { url, headers, finalBodyStr, account, accountState, enabledAccounts, orderedAccounts, tracer, logAttempt, currentLastError, currentSawRateLimit, currentSawNetworkError, upstreamSpan, } = args;
2696
+ let lastError = currentLastError;
2697
+ let sawRateLimit = currentSawRateLimit;
2698
+ let sawNetworkError = currentSawNetworkError;
2699
+ const currentUpstreamSpan = upstreamSpan;
2700
+ let response;
2701
+ try {
2702
+ response = await fetch(url, {
2703
+ method: "POST",
2704
+ headers,
2705
+ body: finalBodyStr,
2706
+ signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
2707
+ });
2708
+ }
2709
+ catch (fetchErr) {
2710
+ if (!isRetryableNetworkError(fetchErr)) {
2711
+ throw fetchErr;
2712
+ }
2713
+ sawNetworkError = true;
2714
+ recordAttemptError(account.label, account.type, 502);
2715
+ const errorCode = getErrorCode(fetchErr) ?? "unknown";
2716
+ const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
2717
+ lastError = errorMessage;
2718
+ logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
2719
+ logAttempt(502, "network_error", errorMessage);
2720
+ tracer?.setError("network_error", errorMessage);
2721
+ tracer?.recordRetry(account.label, "network_error");
2722
+ currentUpstreamSpan?.end();
2723
+ return {
2724
+ continueLoop: true,
2725
+ lastError,
2726
+ sawRateLimit,
2727
+ sawNetworkError,
2728
+ upstreamSpan: undefined,
2729
+ };
2730
+ }
2731
+ if (response.status === 429) {
2732
+ sawRateLimit = true;
2733
+ const retryAfter = response.headers.get("retry-after");
2734
+ let cooldownMs = 0;
2735
+ if (retryAfter) {
2736
+ const seconds = parseInt(retryAfter, 10);
2737
+ if (!Number.isNaN(seconds)) {
2738
+ cooldownMs = seconds * 1000;
2739
+ }
2740
+ else {
2741
+ const date = new Date(retryAfter);
2742
+ if (!Number.isNaN(date.getTime())) {
2743
+ cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
2744
+ }
2745
+ }
2746
+ }
2747
+ const level = accountState.backoffLevel;
2748
+ const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
2749
+ const backoffMs = Math.min(baseCooldown * 2 ** level, RATE_LIMIT_BACKOFF_CAP_MS);
2750
+ accountState.coolingUntil = Date.now() + backoffMs;
2751
+ accountState.backoffLevel += 1;
2752
+ advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
2753
+ recordAttemptError(account.label, account.type, 429);
2754
+ recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
2755
+ lastError = await response.text();
2756
+ logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
2757
+ logAttempt(429, "rate_limit_error", String(lastError));
2758
+ tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
2759
+ tracer?.recordRetry(account.label, "rate_limit");
2760
+ currentUpstreamSpan?.end();
2761
+ return {
2762
+ continueLoop: true,
2763
+ lastError,
2764
+ sawRateLimit,
2765
+ sawNetworkError,
2766
+ upstreamSpan: undefined,
2767
+ };
2768
+ }
2769
+ return {
2770
+ continueLoop: false,
2771
+ response,
2772
+ lastError,
2773
+ sawRateLimit,
2774
+ sawNetworkError,
2775
+ upstreamSpan: currentUpstreamSpan,
2776
+ };
2777
+ }
2778
+ async function handleAnthropicRoutedClaudeRequest(args) {
2779
+ const { ctx, body, modelRouter, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, logProxyBody, logFinalRequest, } = args;
2780
+ const loadedAccounts = await loadClaudeProxyAccounts({
2781
+ ctx,
2782
+ body,
2783
+ tracer,
2784
+ requestStartTime,
2785
+ accountStrategy,
2786
+ buildLoggedClaudeError,
2787
+ });
2788
+ if ("response" in loadedAccounts) {
2789
+ return loadedAccounts.response;
2790
+ }
2791
+ const { accounts, enabledAccounts, orderedAccounts, bodyStr, requestStart, toolCount, url, clientHeaders, isClaudeClientRequest, } = loadedAccounts;
2792
+ const loopState = {
2793
+ lastError: undefined,
2794
+ sawRateLimit: false,
2795
+ sawNetworkError: false,
2796
+ sawTransientFailure: false,
2797
+ invalidRequestFailure: null,
2798
+ authFailureMessage: null,
2799
+ attemptNumber: 0,
2800
+ };
2801
+ const acctSelectionSpan = tracer?.startAccountSelection();
2802
+ for (const account of orderedAccounts) {
2803
+ const accountState = getOrCreateRuntimeState(account.key);
2804
+ if (accountState.coolingUntil && accountState.coolingUntil > Date.now()) {
2805
+ continue;
2806
+ }
2807
+ loopState.attemptNumber += 1;
2808
+ if (tracer && loopState.attemptNumber === 1 && acctSelectionSpan) {
2809
+ tracer.setAccountSelection({
2810
+ strategy: accountStrategy,
2811
+ accountsTotal: accounts.length,
2812
+ accountsHealthy: enabledAccounts.length,
2813
+ selectedAccount: account.label,
2814
+ accountType: account.type,
2815
+ });
2816
+ acctSelectionSpan.end();
2817
+ }
2818
+ const logAttempt = createAnthropicAttemptLogger({
2819
+ ctx,
2820
+ body,
2821
+ toolCount,
2822
+ requestStart,
2823
+ tracer,
2824
+ account,
2825
+ attemptNumber: loopState.attemptNumber,
2826
+ });
2827
+ const preparedAttempt = await prepareAnthropicAccountAttempt({
2828
+ account,
2829
+ accountState,
2830
+ bodyStr,
2831
+ clientHeaders,
2832
+ isClaudeClientRequest,
2833
+ url,
2834
+ tracer,
2835
+ attemptNumber: loopState.attemptNumber,
2836
+ currentLastError: loopState.lastError,
2837
+ currentAuthFailureMessage: loopState.authFailureMessage,
2838
+ logAttempt,
2839
+ logProxyBody,
2840
+ });
2841
+ loopState.lastError = preparedAttempt.lastError;
2842
+ loopState.authFailureMessage = preparedAttempt.authFailureMessage;
2843
+ if (preparedAttempt.continueLoop ||
2844
+ !preparedAttempt.headers ||
2845
+ !preparedAttempt.buildUpstreamBody ||
2846
+ !preparedAttempt.finalBodyStr ||
2847
+ preparedAttempt.fetchStartMs === undefined) {
2848
+ continue;
2849
+ }
2850
+ const fetchResult = await fetchAnthropicAccountResponse({
2851
+ url,
2852
+ headers: preparedAttempt.headers,
2853
+ finalBodyStr: preparedAttempt.finalBodyStr,
2854
+ account,
2855
+ accountState,
2856
+ enabledAccounts,
2857
+ orderedAccounts,
2858
+ tracer,
2859
+ logAttempt,
2860
+ currentLastError: loopState.lastError,
2861
+ currentSawRateLimit: loopState.sawRateLimit,
2862
+ currentSawNetworkError: loopState.sawNetworkError,
2863
+ upstreamSpan: preparedAttempt.upstreamSpan,
2864
+ });
2865
+ loopState.lastError = fetchResult.lastError;
2866
+ loopState.sawRateLimit = fetchResult.sawRateLimit;
2867
+ loopState.sawNetworkError = fetchResult.sawNetworkError;
2868
+ if (fetchResult.continueLoop || !fetchResult.response) {
2869
+ continue;
2870
+ }
2871
+ let upstreamSpan = fetchResult.upstreamSpan;
2872
+ const response = fetchResult.response;
2873
+ if (response.status === 401 &&
2874
+ account.type === "oauth" &&
2875
+ account.refreshToken) {
2876
+ const authRetryResult = await handleAnthropicAuthRetry({
2877
+ ctx,
2878
+ body,
2879
+ account,
2880
+ accountState,
2881
+ headers: preparedAttempt.headers,
2882
+ buildUpstreamBody: preparedAttempt.buildUpstreamBody,
2883
+ enabledAccounts,
2884
+ orderedAccounts,
2885
+ response,
2886
+ tracer,
2887
+ requestStartTime,
2888
+ fetchStartMs: preparedAttempt.fetchStartMs,
2889
+ attemptNumber: loopState.attemptNumber,
2890
+ finalBodyStr: preparedAttempt.finalBodyStr,
2891
+ upstreamSpan,
2892
+ logAttempt,
2893
+ logProxyBody,
2894
+ logFinalRequest,
2895
+ lastError: loopState.lastError,
2896
+ authFailureMessage: loopState.authFailureMessage,
2897
+ sawRateLimit: loopState.sawRateLimit,
2898
+ sawTransientFailure: loopState.sawTransientFailure,
2899
+ sawNetworkError: loopState.sawNetworkError,
2900
+ });
2901
+ loopState.lastError = authRetryResult.lastError;
2902
+ loopState.authFailureMessage = authRetryResult.authFailureMessage;
2903
+ loopState.sawRateLimit = authRetryResult.sawRateLimit;
2904
+ loopState.sawTransientFailure = authRetryResult.sawTransientFailure;
2905
+ loopState.sawNetworkError = authRetryResult.sawNetworkError;
2906
+ upstreamSpan = authRetryResult.upstreamSpan;
2907
+ if (authRetryResult.response !== undefined) {
2908
+ return authRetryResult.response;
2909
+ }
2910
+ if (authRetryResult.continueLoop) {
2911
+ continue;
2912
+ }
2913
+ }
2914
+ if (!response.ok) {
2915
+ const nonOkResult = await handleAnthropicNonOkResponse({
2916
+ response,
2917
+ account,
2918
+ accountState,
2919
+ tracer,
2920
+ requestStartTime,
2921
+ fetchStartMs: preparedAttempt.fetchStartMs,
2922
+ attemptNumber: loopState.attemptNumber,
2923
+ logAttempt,
2924
+ logProxyBody,
2925
+ logFinalRequest,
2926
+ lastError: loopState.lastError,
2927
+ authFailureMessage: loopState.authFailureMessage,
2928
+ sawTransientFailure: loopState.sawTransientFailure,
2929
+ invalidRequestFailure: loopState.invalidRequestFailure,
2930
+ maxConsecutiveRefreshFailures: MAX_CONSECUTIVE_REFRESH_FAILURES,
2931
+ });
2932
+ loopState.lastError = nonOkResult.lastError;
2933
+ loopState.authFailureMessage = nonOkResult.authFailureMessage;
2934
+ loopState.sawTransientFailure = nonOkResult.sawTransientFailure;
2935
+ loopState.invalidRequestFailure = nonOkResult.invalidRequestFailure;
2936
+ if (nonOkResult.response !== undefined) {
2937
+ return nonOkResult.response;
2938
+ }
2939
+ if (nonOkResult.continueLoop) {
2940
+ continue;
2941
+ }
2942
+ break;
2943
+ }
2944
+ const successResult = await handleAnthropicSuccessfulResponse({
2945
+ ctx,
2946
+ body,
2947
+ account,
2948
+ accountState,
2949
+ response,
2950
+ tracer,
2951
+ requestStartTime,
2952
+ fetchStartMs: preparedAttempt.fetchStartMs,
2953
+ attemptNumber: loopState.attemptNumber,
2954
+ finalBodyStr: preparedAttempt.finalBodyStr,
2955
+ upstreamSpan,
2956
+ logProxyBody,
2957
+ logFinalRequest,
2958
+ });
2959
+ if ("retryNextAccount" in successResult) {
2960
+ continue;
2961
+ }
2962
+ return successResult.response;
2963
+ }
2964
+ if (loopState.attemptNumber === 0) {
2965
+ acctSelectionSpan?.end();
2966
+ }
2967
+ const configuredFallbackResponse = await tryConfiguredClaudeFallbackChain({
2968
+ ctx,
2969
+ body,
2970
+ modelRouter,
2971
+ tracer,
2972
+ requestStartTime,
2973
+ logProxyBody,
2974
+ logFinalRequest,
2975
+ });
2976
+ if (configuredFallbackResponse) {
2977
+ return configuredFallbackResponse;
2978
+ }
2979
+ const configuredChain = modelRouter?.getFallbackChain() ?? [];
2980
+ if (configuredChain.length === 0 && !loopState.sawRateLimit) {
2981
+ const autoFallbackResponse = await tryAutoClaudeFallback({
2982
+ ctx,
2983
+ body,
2984
+ tracer,
2985
+ requestStartTime,
2986
+ logProxyBody,
2987
+ logFinalRequest,
2988
+ });
2989
+ if (autoFallbackResponse) {
2990
+ return autoFallbackResponse;
2991
+ }
2992
+ }
2993
+ return buildClaudeAnthropicFailureResponse({
2994
+ tracer,
2995
+ requestStartTime,
2996
+ authFailureMessage: loopState.authFailureMessage,
2997
+ invalidRequestFailure: loopState.invalidRequestFailure,
2998
+ sawNetworkError: loopState.sawNetworkError,
2999
+ sawTransientFailure: loopState.sawTransientFailure,
3000
+ sawRateLimit: loopState.sawRateLimit,
3001
+ lastError: loopState.lastError,
3002
+ orderedAccounts,
3003
+ buildLoggedClaudeError,
3004
+ logProxyBody,
3005
+ logFinalRequest,
3006
+ });
3007
+ }
259
3008
  // ---------------------------------------------------------------------------
260
3009
  // Route factory
261
3010
  // ---------------------------------------------------------------------------
@@ -269,7 +3018,7 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
269
3018
  * @param basePath - Base path prefix (default: "" since Claude API uses /v1/...).
270
3019
  * @returns RouteGroup with Claude-compatible endpoints.
271
3020
  */
272
- export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first") {
3021
+ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first", passthroughMode = false) {
273
3022
  return {
274
3023
  prefix: `${basePath}/v1`,
275
3024
  routes: [
@@ -298,1269 +3047,65 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
298
3047
  provider: "anthropic",
299
3048
  model: body.model,
300
3049
  };
3050
+ const clientRequestBody = JSON.stringify(body);
3051
+ // 3. Create request runtime context (tracer, loggers, error builder)
3052
+ const { tracer, requestStartTime, logProxyBody, logFinalRequest, buildLoggedClaudeError, } = createClaudeRequestRuntimeContext({
3053
+ ctx,
3054
+ body,
3055
+ clientRequestBody,
3056
+ });
301
3057
  try {
302
- // 3. Route based on target provider
3058
+ // 4. Route based on target provider
303
3059
  if (route.provider === null) {
304
- return buildClaudeError(404, `Model '${body.model}' is not a Claude model. ` +
305
- `Use a model router to route it to another provider.`);
3060
+ tracer?.setError("not_found_error", `Model '${body.model}' is not a Claude model.`);
3061
+ tracer?.end(404, Date.now() - requestStartTime);
3062
+ return buildLoggedClaudeError(404, `Model '${body.model}' is not a Claude model. Use a model router to route it to another provider.`);
306
3063
  }
307
- const isClaudeTarget = route.provider === "anthropic";
308
- if (isClaudeTarget) {
309
- // ─── PASSTHROUGH MODE (Claude → Claude) ───────────────
310
- const fs = await import("fs");
311
- const os = await import("os");
312
- const accounts = [];
313
- const legacyCredPath = `${os.homedir()}/.neurolink/anthropic-credentials.json`;
314
- // 1. Compound keys from TokenStore
315
- // Skip accounts with expired tokens and no refresh token.
316
- // For expired tokens WITH a refresh token, attempt ONE refresh
317
- // before adding — if it fails, skip the account entirely.
318
- const { tokenStore } = await import("../../auth/tokenStore.js");
319
- // Decision 10D: Auto-prune dead entries once on first request (startup)
320
- if (!startupPruneDone) {
321
- await tokenStore.pruneExpired();
322
- startupPruneDone = true;
323
- }
324
- const compoundKeys = await tokenStore.listByPrefix("anthropic:");
325
- for (const key of compoundKeys) {
326
- // Decision 10D + Hot-reload: Skip disabled accounts UNLESS credentials changed
327
- if (await tokenStore.isDisabled(key)) {
328
- const existingState = getOrCreateRuntimeState(key);
329
- // Check if credentials were refreshed/re-authed since disable.
330
- // On cold start, lastToken is empty — don't treat that as a
331
- // credential change; only compare on subsequent reloads.
332
- const tokens = await tokenStore.loadTokens(key);
333
- const hasTrackedTokens = existingState.lastToken !== undefined &&
334
- existingState.lastToken !== "";
335
- const tokenChanged = tokens &&
336
- hasTrackedTokens &&
337
- (existingState.lastToken !== tokens.accessToken ||
338
- existingState.lastRefreshToken !== tokens.refreshToken);
339
- if (tokenChanged) {
340
- // Credentials changed — auto-enable and use this account
341
- await tokenStore.markEnabled(key);
342
- logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials changed)`);
343
- existingState.permanentlyDisabled = false;
344
- existingState.coolingUntil = undefined;
345
- existingState.backoffLevel = 0;
346
- existingState.consecutiveRefreshFailures = 0;
347
- }
348
- else {
349
- logger.debug(`[proxy] skipping disabled account=${key.split(":")[1] ?? key}`);
350
- existingState.permanentlyDisabled = true;
351
- continue;
352
- }
353
- }
354
- const tokens = await tokenStore.loadTokens(key);
355
- if (!tokens) {
356
- continue;
357
- }
358
- let accessToken = tokens.accessToken;
359
- let refreshTok = tokens.refreshToken;
360
- let expiresAt = tokens.expiresAt;
361
- // Check if token is expired
362
- const isExpired = expiresAt ? expiresAt < Date.now() : false;
363
- if (isExpired) {
364
- const label = key.split(":")[1] ?? key;
365
- // Check if already marked dead from a previous request
366
- const existingState = getOrCreateRuntimeState(key);
367
- if (existingState.permanentlyDisabled) {
368
- // Already known dead — skip silently (no log spam)
369
- continue;
370
- }
371
- if (!refreshTok) {
372
- logger.always(`[proxy] skipping account=${label} (expired, no refresh token)`);
373
- await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
374
- continue;
375
- }
376
- // Try ONE refresh before adding
377
- const tempAccount = {
378
- token: accessToken,
379
- refreshToken: refreshTok,
380
- expiresAt,
381
- label,
382
- };
383
- const refreshed = await refreshToken(tempAccount);
384
- if (!refreshed.success) {
385
- logger.always(`[proxy] skipping account=${label} (expired, refresh failed: ${refreshed.error?.slice(0, 200) ?? "unknown"})`);
386
- await disableAccountUntilReauth({ key, label, token: accessToken, type: "oauth" }, existingState);
387
- continue;
388
- }
389
- // Refresh succeeded — use new token and persist
390
- accessToken = tempAccount.token;
391
- refreshTok = tempAccount.refreshToken;
392
- expiresAt = tempAccount.expiresAt;
393
- await tokenStore.saveTokens(key, {
394
- accessToken,
395
- refreshToken: refreshTok,
396
- expiresAt: expiresAt ?? Date.now() + 3600_000,
397
- tokenType: "Bearer",
398
- });
399
- logger.always(`[proxy] refreshed expired account=${key.split(":")[1] ?? key} at startup`);
400
- }
401
- // Detect whether this is an API key or an OAuth token.
402
- // Use the stored tokenType (set at auth time) rather than a
403
- // prefix heuristic — both API keys (sk-ant-api03-…) and OAuth
404
- // access tokens (sk-ant-oat01-…) share the "sk-ant-" prefix.
405
- const accountType = tokens.tokenType === "Bearer" ? "oauth" : "api_key";
406
- accounts.push({
407
- key,
408
- label: key.split(":")[1] ?? key,
409
- token: accessToken,
410
- refreshToken: refreshTok,
411
- expiresAt,
412
- type: accountType,
413
- persistTarget: { providerKey: key },
414
- });
415
- }
416
- // 2. Legacy credentials file (only if no usable compound account was loaded)
417
- if (accounts.length === 0) {
418
- try {
419
- const creds = JSON.parse(fs.readFileSync(legacyCredPath, "utf8"));
420
- const legacyAccount = await tryLoadLegacyAccount(creds, legacyCredPath);
421
- if (legacyAccount) {
422
- accounts.push(legacyAccount);
423
- }
424
- }
425
- catch {
426
- // no-op: file absent or invalid
427
- }
428
- }
429
- // 3. Env var — only use as fallback when no OAuth accounts are available.
430
- if (process.env.ANTHROPIC_API_KEY && accounts.length === 0) {
431
- accounts.push({
432
- key: "anthropic:env",
433
- label: "env",
434
- token: process.env.ANTHROPIC_API_KEY,
435
- type: "api_key",
3064
+ if (route.provider === "anthropic") {
3065
+ tracer?.setMode("passthrough");
3066
+ if (passthroughMode) {
3067
+ return handleClaudePassthroughRequest({
3068
+ ctx,
3069
+ body,
3070
+ clientRequestBody,
3071
+ tracer,
3072
+ requestStartTime,
3073
+ logProxyBody,
436
3074
  });
437
3075
  }
438
- if (accounts.length === 0) {
439
- return buildClaudeError(401, "No Anthropic credentials found");
440
- }
441
- // Sync in-memory runtime state with current token material.
442
- for (const account of accounts) {
443
- const state = getOrCreateRuntimeState(account.key);
444
- const tokenChanged = state.lastToken !== account.token ||
445
- state.lastRefreshToken !== account.refreshToken;
446
- if (tokenChanged) {
447
- if (state.permanentlyDisabled) {
448
- logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
449
- }
450
- state.coolingUntil = undefined;
451
- state.backoffLevel = 0;
452
- state.consecutiveRefreshFailures = 0;
453
- state.permanentlyDisabled = false;
454
- }
455
- state.lastToken = account.token;
456
- state.lastRefreshToken = account.refreshToken;
457
- }
458
- const enabledAccounts = accounts.filter((account) => {
459
- return !getOrCreateRuntimeState(account.key)
460
- .permanentlyDisabled;
461
- });
462
- if (enabledAccounts.length === 0) {
463
- return buildClaudeError(401, formatReauthMessage(accounts.map((account) => account.label)));
464
- }
465
- // Order accounts based on the configured strategy.
466
- // - fill-first: always start with the primary account;
467
- // only fall over when the primary is cooling down (429/401).
468
- // - round-robin: rotate the starting index on every request
469
- // so traffic is spread evenly across accounts.
470
- const orderedAccounts = [...enabledAccounts];
471
- // Reset round-robin index when account list size changes
472
- // (e.g. a new account was authenticated while the proxy was running).
473
- // Only applies to round-robin; fill-first uses primaryAccountIndex
474
- // as a sticky primary and should not be disrupted.
475
- if (accountStrategy === "round-robin" &&
476
- orderedAccounts.length !== lastKnownAccountCount) {
477
- primaryAccountIndex = 0;
478
- lastKnownAccountCount = orderedAccounts.length;
479
- }
480
- if (orderedAccounts.length > 1) {
481
- if (accountStrategy === "round-robin") {
482
- // Advance the index on every request for even distribution
483
- const idx = primaryAccountIndex % orderedAccounts.length;
484
- primaryAccountIndex =
485
- (primaryAccountIndex + 1) % orderedAccounts.length;
486
- if (idx > 0) {
487
- const head = orderedAccounts.splice(0, idx);
488
- orderedAccounts.push(...head);
489
- }
490
- }
491
- else {
492
- // fill-first (default): clamp primaryAccountIndex
493
- const idx = primaryAccountIndex % orderedAccounts.length;
494
- if (idx > 0) {
495
- const head = orderedAccounts.splice(0, idx);
496
- orderedAccounts.push(...head);
497
- }
498
- }
499
- }
500
- let lastError;
501
- let sawRateLimit = false;
502
- let sawNetworkError = false;
503
- let sawTransientFailure = false;
504
- let authFailureMessage = null;
505
- const bodyStr = JSON.stringify(body);
506
- const requestStart = Date.now();
507
- const toolCount = Array.isArray(body.tools)
508
- ? body.tools.length
509
- : 0;
510
- const url = "https://api.anthropic.com/v1/messages?beta=true";
511
- const clientHeaders = ctx.headers ?? {};
512
- for (const account of orderedAccounts) {
513
- const accountState = getOrCreateRuntimeState(account.key);
514
- if (accountState.coolingUntil &&
515
- accountState.coolingUntil > Date.now()) {
516
- continue;
517
- }
518
- const logAttempt = (status, errorType, errorMessage) => {
519
- logRequest({
520
- timestamp: new Date().toISOString(),
521
- requestId: ctx.requestId,
522
- method: ctx.method,
523
- path: ctx.path,
524
- model: body.model,
525
- stream: !!body.stream,
526
- toolCount,
527
- account: account.label,
528
- accountType: account.type,
529
- responseStatus: status,
530
- responseTimeMs: Date.now() - requestStart,
531
- ...(errorType ? { errorType } : {}),
532
- ...(errorMessage ? { errorMessage } : {}),
533
- });
534
- };
535
- // Auto-refresh expiring access tokens once before making the request.
536
- if (needsRefresh(account)) {
537
- const refreshed = await refreshToken(account);
538
- if (refreshed.success) {
539
- if (account.persistTarget) {
540
- await persistTokens(account.persistTarget, account);
541
- }
542
- accountState.consecutiveRefreshFailures = 0;
543
- }
544
- else {
545
- accountState.consecutiveRefreshFailures += 1;
546
- lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
547
- logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
548
- if (accountState.consecutiveRefreshFailures >=
549
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
550
- await disableAccountUntilReauth(account, accountState);
551
- authFailureMessage = formatReauthMessage(account.label);
552
- logAttempt(401, "authentication_error", String(lastError));
553
- continue;
554
- }
555
- }
556
- }
557
- const isOAuth = account.type === "oauth";
558
- // Decision 6: Passthrough client headers, fill gaps only.
559
- // Start with a copy of incoming client headers, then set
560
- // defaults for anything the client didn't send. Always
561
- // override auth + content-type.
562
- const headers = {};
563
- for (const [hk, hv] of Object.entries(clientHeaders)) {
564
- const lower = hk.toLowerCase();
565
- if (typeof hv === "string" &&
566
- !BLOCKED_UPSTREAM_HEADERS.has(lower)) {
567
- headers[lower] = hv;
568
- }
569
- }
570
- // Always set (override) — auth and content-type are proxy-controlled
571
- headers["content-type"] = "application/json";
572
- if (isOAuth) {
573
- headers["authorization"] = `Bearer ${account.token}`;
574
- delete headers["x-api-key"];
575
- }
576
- else {
577
- headers["x-api-key"] = account.token;
578
- delete headers["authorization"];
579
- }
580
- // Apply header snapshot defaults for OAuth accounts
581
- if (isOAuth) {
582
- await applyHeaderSnapshot(headers, account.label);
583
- }
584
- // Hard defaults for anything still missing
585
- if (!headers["user-agent"]) {
586
- headers["user-agent"] = "claude-cli/2.1.86 (external, cli)";
587
- }
588
- if (!headers["anthropic-version"]) {
589
- headers["anthropic-version"] = "2023-06-01";
590
- }
591
- if (!headers["anthropic-dangerous-direct-browser-access"]) {
592
- headers["anthropic-dangerous-direct-browser-access"] = "true";
593
- }
594
- // Manage anthropic-beta header based on auth type.
595
- // OAuth requires specific betas; API-key must NOT carry them.
596
- if (isOAuth) {
597
- const existing = new Set((headers["anthropic-beta"] ?? "")
598
- .split(",")
599
- .map((s) => s.trim())
600
- .filter(Boolean));
601
- existing.add("oauth-2025-04-20");
602
- existing.add("claude-code-20250219");
603
- headers["anthropic-beta"] = [...existing].join(",");
604
- }
605
- else {
606
- // Strip OAuth-specific betas that may have leaked from client
607
- const cleaned = (headers["anthropic-beta"] ?? "")
608
- .split(",")
609
- .map((s) => s.trim())
610
- .filter((s) => s && s !== "oauth-2025-04-20")
611
- .join(",");
612
- if (cleaned) {
613
- headers["anthropic-beta"] = cleaned;
614
- }
615
- else {
616
- delete headers["anthropic-beta"];
617
- }
618
- }
619
- // Polyfill request body for OAuth accounts
620
- const buildUpstreamBody = () => isOAuth ? polyfillOAuthBody(bodyStr, account.token) : bodyStr;
621
- const finalBodyStr = buildUpstreamBody();
622
- logger.always(`[proxy] → account=${account.label} (${account.type})`);
623
- recordRequest(account.label, account.type);
624
- // Log full request for debugging (written to ~/.neurolink/logs/proxy-debug-*.jsonl)
625
- const fetchStartMs = Date.now();
626
- let response;
627
- try {
628
- response = await fetch(url, {
629
- method: "POST",
630
- headers,
631
- body: finalBodyStr,
632
- signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
633
- });
634
- }
635
- catch (fetchErr) {
636
- if (!isRetryableNetworkError(fetchErr)) {
637
- throw fetchErr;
638
- }
639
- // Decision 8: Network errors — immediate rotation, no cooldown
640
- sawNetworkError = true;
641
- recordError(account.label, account.type, 502);
642
- const errorCode = getErrorCode(fetchErr) ?? "unknown";
643
- const errorMessage = fetchErr instanceof Error
644
- ? fetchErr.message
645
- : String(fetchErr);
646
- lastError = errorMessage;
647
- logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
648
- logAttempt(502, "network_error", errorMessage);
649
- continue;
650
- }
651
- // Check 429 (with Retry-After + exponential backoff) → continue.
652
- if (response.status === 429) {
653
- sawRateLimit = true;
654
- const retryAfter = response.headers.get("retry-after");
655
- let cooldownMs = 0;
656
- if (retryAfter) {
657
- const seconds = parseInt(retryAfter, 10);
658
- if (!Number.isNaN(seconds)) {
659
- cooldownMs = seconds * 1000;
660
- }
661
- else {
662
- const date = new Date(retryAfter);
663
- // eslint-disable-next-line max-depth
664
- if (!Number.isNaN(date.getTime())) {
665
- cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
666
- }
667
- }
668
- }
669
- const level = accountState.backoffLevel;
670
- const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
671
- const backoffMs = Math.min(baseCooldown * Math.pow(2, level), RATE_LIMIT_BACKOFF_CAP_MS);
672
- accountState.coolingUntil = Date.now() + backoffMs;
673
- accountState.backoffLevel += 1;
674
- advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
675
- recordError(account.label, account.type, 429);
676
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
677
- lastError = await response.text();
678
- logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
679
- logAttempt(429, "rate_limit_error", String(lastError));
680
- continue;
681
- }
682
- // On 401 for refreshable OAuth: refresh token and retry before failing over.
683
- if (response.status === 401 &&
684
- account.type === "oauth" &&
685
- account.refreshToken) {
686
- recordError(account.label, account.type, 401);
687
- let authRetrySucceeded = false;
688
- let authRetryError = "received 401 from Anthropic";
689
- for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
690
- logger.always(`[proxy] ← 401 account=${account.label} refreshing (attempt ${authRetry + 1}/${MAX_AUTH_RETRIES})`);
691
- const refreshSucceeded = await refreshToken(account);
692
- if (!refreshSucceeded.success) {
693
- accountState.consecutiveRefreshFailures += 1;
694
- authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
695
- lastError = authRetryError;
696
- logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
697
- // eslint-disable-next-line max-depth
698
- if (accountState.consecutiveRefreshFailures >=
699
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
700
- await disableAccountUntilReauth(account, accountState);
701
- authFailureMessage = formatReauthMessage(account.label);
702
- break;
703
- }
704
- // eslint-disable-next-line max-depth
705
- if (authRetry < MAX_AUTH_RETRIES - 1) {
706
- await sleep(2000);
707
- }
708
- continue;
709
- }
710
- if (account.persistTarget) {
711
- await persistTokens(account.persistTarget, account);
712
- }
713
- headers.authorization = `Bearer ${account.token}`;
714
- try {
715
- const retryResp = await fetch(url, {
716
- method: "POST",
717
- headers,
718
- body: buildUpstreamBody(),
719
- signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
720
- });
721
- // eslint-disable-next-line max-depth
722
- if (retryResp.ok) {
723
- authRetrySucceeded = true;
724
- accountState.consecutiveRefreshFailures = 0;
725
- accountState.backoffLevel = 0;
726
- accountState.coolingUntil = undefined;
727
- logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
728
- recordSuccess(account.label, account.type);
729
- logAttempt(retryResp.status);
730
- // Capture quota headers after successful auth-retry
731
- {
732
- const retryQuota = parseQuotaHeaders(retryResp.headers);
733
- // eslint-disable-next-line max-depth
734
- if (retryQuota) {
735
- saveAccountQuota(account.label, retryQuota).catch(() => { });
736
- }
737
- }
738
- // eslint-disable-next-line max-depth
739
- if (body.stream && retryResp.body) {
740
- const retryReader = retryResp.body.getReader();
741
- let retryStreamClosed = false;
742
- const retryStream = new ReadableStream({
743
- async pull(controller) {
744
- if (retryStreamClosed) {
745
- return;
746
- }
747
- try {
748
- const { done, value } = await retryReader.read();
749
- if (retryStreamClosed) {
750
- return;
751
- }
752
- if (done) {
753
- retryStreamClosed = true;
754
- controller.close();
755
- return;
756
- }
757
- controller.enqueue(value);
758
- }
759
- catch (streamErr) {
760
- const errMsg = streamErr instanceof Error
761
- ? streamErr.message
762
- : String(streamErr);
763
- logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
764
- logStreamError({
765
- timestamp: new Date().toISOString(),
766
- requestId: ctx.requestId,
767
- account: account.label,
768
- model: body.model,
769
- errorMessage: errMsg,
770
- durationMs: Date.now() - fetchStartMs,
771
- });
772
- if (!retryStreamClosed) {
773
- retryStreamClosed = true;
774
- const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
775
- controller.enqueue(new TextEncoder().encode(errorEvent));
776
- controller.close();
777
- }
778
- }
779
- },
780
- cancel() {
781
- retryStreamClosed = true;
782
- retryReader.cancel();
783
- },
784
- });
785
- const responseHeaders = {
786
- "content-type": "text/event-stream",
787
- "cache-control": "no-cache",
788
- connection: "keep-alive",
789
- };
790
- // eslint-disable-next-line max-depth
791
- for (const h of [
792
- "retry-after",
793
- "anthropic-ratelimit-requests-remaining",
794
- "anthropic-ratelimit-requests-limit",
795
- "anthropic-ratelimit-tokens-remaining",
796
- "anthropic-ratelimit-tokens-limit",
797
- ]) {
798
- const val = retryResp.headers.get(h);
799
- // eslint-disable-next-line max-depth
800
- if (val) {
801
- responseHeaders[h] = val;
802
- }
803
- }
804
- return new Response(retryStream, {
805
- status: retryResp.status,
806
- headers: responseHeaders,
807
- });
808
- }
809
- return retryResp.json();
810
- }
811
- const retryStatus = retryResp.status;
812
- const retryBody = await retryResp.text();
813
- authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
814
- lastError = retryBody;
815
- logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
816
- recordError(account.label, account.type, retryStatus);
817
- // eslint-disable-next-line max-depth
818
- if (retryStatus === 429) {
819
- sawRateLimit = true;
820
- const retryAfter = retryResp.headers.get("retry-after");
821
- const parsedRetryAfter = parseInt(retryAfter ?? "", 10);
822
- const cooldownMs = Number.isNaN(parsedRetryAfter)
823
- ? 60_000
824
- : Math.max(1, parsedRetryAfter) * 1000;
825
- accountState.coolingUntil = Date.now() + cooldownMs;
826
- advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
827
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
828
- break;
829
- }
830
- // eslint-disable-next-line max-depth
831
- if (retryStatus === 401 ||
832
- retryStatus === 402 ||
833
- retryStatus === 403) {
834
- // eslint-disable-next-line max-depth
835
- if (authRetry < MAX_AUTH_RETRIES - 1) {
836
- await sleep(1000);
837
- }
838
- continue;
839
- }
840
- // eslint-disable-next-line max-depth
841
- if (isTransientHttpFailure(retryStatus, retryBody)) {
842
- // Decision 8: No cooldown for transient errors — rotate immediately
843
- sawTransientFailure = true;
844
- break;
845
- }
846
- logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
847
- // eslint-disable-next-line max-depth
848
- try {
849
- return JSON.parse(retryBody);
850
- }
851
- catch {
852
- return buildClaudeError(retryStatus, retryBody);
853
- }
854
- }
855
- catch (retryFetchErr) {
856
- // Decision 8: No cooldown for network errors — rotate immediately
857
- sawNetworkError = true;
858
- recordError(account.label, account.type, 502);
859
- const message = retryFetchErr instanceof Error
860
- ? retryFetchErr.message
861
- : String(retryFetchErr);
862
- authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
863
- lastError = authRetryError;
864
- logger.debug(`[proxy] ${authRetryError}`);
865
- break;
866
- }
867
- }
868
- if (!authRetrySucceeded) {
869
- // eslint-disable-next-line max-depth
870
- if (!accountState.permanentlyDisabled) {
871
- // eslint-disable-next-line max-depth
872
- if (!accountState.coolingUntil ||
873
- accountState.coolingUntil <= Date.now()) {
874
- accountState.coolingUntil =
875
- Date.now() + AUTH_COOLDOWN_MS;
876
- }
877
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
878
- }
879
- lastError = authRetryError;
880
- logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
881
- logAttempt(401, "authentication_error", authRetryError);
882
- continue;
883
- }
884
- }
885
- if (!response.ok) {
886
- const errBody = await response.text();
887
- // Log full error for debugging
888
- const errRespHeaders = {};
889
- response.headers.forEach((v, k) => {
890
- errRespHeaders[k] = v;
891
- });
892
- logFullRequestResponse({
893
- timestamp: new Date().toISOString(),
894
- requestId: ctx.requestId,
895
- account: account.label,
896
- model: body.model,
897
- stream: !!body.stream,
898
- requestHeaders: redactSensitiveHeaders(headers),
899
- requestBody: {
900
- model: body.model,
901
- max_tokens: body.max_tokens,
902
- stream: body.stream,
903
- system: Array.isArray(body.system)
904
- ? `[${body.system.length} blocks]`
905
- : typeof body.system,
906
- messages: Array.isArray(body.messages)
907
- ? `[${body.messages.length} messages]`
908
- : "?",
909
- tools: Array.isArray(body.tools)
910
- ? `[${body.tools.length} tools]`
911
- : "none",
912
- tool_choice: body.tool_choice,
913
- thinking: body.thinking,
914
- },
915
- requestBodySize: bodyStr.length,
916
- responseStatus: response.status,
917
- responseHeaders: errRespHeaders,
918
- responseBody: errBody.substring(0, 2000),
919
- responseBodySize: errBody.length,
920
- durationMs: Date.now() - fetchStartMs,
921
- });
922
- // Request-shape errors (do not retry).
923
- if (isInvalidRequestError(response.status, errBody)) {
924
- logger.always(`[proxy] ← ${response.status} request-shape error (no retry)`);
925
- logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
926
- try {
927
- return JSON.parse(errBody);
928
- }
929
- catch {
930
- return buildClaudeError(response.status, errBody);
931
- }
932
- }
933
- // Auth failures for OAuth accounts without refresh token.
934
- if ((response.status === 401 ||
935
- response.status === 402 ||
936
- response.status === 403) &&
937
- account.type === "oauth" &&
938
- !account.refreshToken) {
939
- recordError(account.label, account.type, response.status);
940
- accountState.consecutiveRefreshFailures += 1;
941
- accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
942
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
943
- if (accountState.consecutiveRefreshFailures >=
944
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
945
- await disableAccountUntilReauth(account, accountState);
946
- }
947
- authFailureMessage = formatReauthMessage(account.label);
948
- logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
949
- lastError = errBody;
950
- logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
951
- continue;
952
- }
953
- // Auth failures for API-key accounts.
954
- if ((response.status === 401 ||
955
- response.status === 402 ||
956
- response.status === 403) &&
957
- account.type === "api_key") {
958
- recordError(account.label, account.type, response.status);
959
- authFailureMessage =
960
- "Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
961
- accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
962
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
963
- logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
964
- lastError = errBody;
965
- logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
966
- continue;
967
- }
968
- // 404 is generally model/account specific; return immediately (no cooldown per Decision 8).
969
- if (response.status === 404) {
970
- recordError(account.label, account.type, response.status);
971
- logger.always(`[proxy] ← 404 account=${account.label}`);
972
- logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
973
- try {
974
- return JSON.parse(errBody);
975
- }
976
- catch {
977
- return buildClaudeError(404, errBody);
978
- }
979
- }
980
- // Decision 8: Transient upstream failures — immediate rotation, NO cooldown.
981
- if (isTransientHttpFailure(response.status, errBody)) {
982
- recordError(account.label, account.type, response.status);
983
- sawTransientFailure = true;
984
- // No cooldown for transient errors (502, 503, etc.) — rotate immediately
985
- logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
986
- lastError = errBody;
987
- logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
988
- continue;
989
- }
990
- // Other non-ok errors → return as-is.
991
- recordError(account.label, account.type, response.status);
992
- logger.always(`[proxy] ← ${response.status} account=${account.label}`);
993
- logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
994
- logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
995
- try {
996
- return JSON.parse(errBody);
997
- }
998
- catch {
999
- return buildClaudeError(response.status, errBody);
1000
- }
1001
- }
1002
- // Success path.
1003
- accountState.backoffLevel = 0;
1004
- accountState.coolingUntil = undefined;
1005
- accountState.consecutiveRefreshFailures = 0;
1006
- recordSuccess(account.label, account.type);
1007
- logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1008
- logAttempt(response.status);
1009
- // Capture quota/utilisation headers (fire-and-forget).
1010
- const quota = parseQuotaHeaders(response.headers);
1011
- if (quota) {
1012
- saveAccountQuota(account.label, quota).catch(() => {
1013
- // Non-fatal: quota persistence is best-effort
1014
- });
1015
- }
1016
- // Log full request + response headers for debugging
1017
- const respHeaders = {};
1018
- response.headers.forEach((v, k) => {
1019
- respHeaders[k] = v;
1020
- });
1021
- logFullRequestResponse({
1022
- timestamp: new Date().toISOString(),
1023
- requestId: ctx.requestId,
1024
- account: account.label,
1025
- model: body.model,
1026
- stream: !!body.stream,
1027
- requestHeaders: redactSensitiveHeaders(headers),
1028
- requestBody: {
1029
- model: body.model,
1030
- max_tokens: body.max_tokens,
1031
- stream: body.stream,
1032
- system: Array.isArray(body.system)
1033
- ? `[${body.system.length} blocks]`
1034
- : typeof body.system,
1035
- messages: Array.isArray(body.messages)
1036
- ? `[${body.messages.length} messages]`
1037
- : "?",
1038
- tools: Array.isArray(body.tools)
1039
- ? `[${body.tools.length} tools]`
1040
- : "none",
1041
- tool_choice: body.tool_choice,
1042
- thinking: body.thinking,
1043
- metadata: body.metadata ? "present" : "absent",
1044
- },
1045
- requestBodySize: bodyStr.length,
1046
- responseStatus: response.status,
1047
- responseHeaders: respHeaders,
1048
- durationMs: Date.now() - fetchStartMs,
1049
- });
1050
- if (body.stream) {
1051
- // Bootstrap retry: read first chunk to verify stream is valid.
1052
- if (response.body) {
1053
- const reader = response.body.getReader();
1054
- const firstChunk = await reader.read();
1055
- if (firstChunk.done ||
1056
- !firstChunk.value ||
1057
- firstChunk.value.length === 0) {
1058
- // Empty stream — retry with next account.
1059
- reader.cancel();
1060
- accountState.coolingUntil = Date.now() + 10_000;
1061
- recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
1062
- logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
1063
- continue;
1064
- }
1065
- // Stream is valid — create a new ReadableStream with first chunk prepended.
1066
- let mainStreamClosed = false;
1067
- const remainingStream = new ReadableStream({
1068
- start(controller) {
1069
- controller.enqueue(firstChunk.value);
1070
- },
1071
- async pull(controller) {
1072
- if (mainStreamClosed) {
1073
- return;
1074
- }
1075
- try {
1076
- const { done, value } = await reader.read();
1077
- if (mainStreamClosed) {
1078
- return;
1079
- }
1080
- if (done) {
1081
- mainStreamClosed = true;
1082
- controller.close();
1083
- return;
1084
- }
1085
- controller.enqueue(value);
1086
- }
1087
- catch (streamErr) {
1088
- const errMsg = streamErr instanceof Error
1089
- ? streamErr.message
1090
- : String(streamErr);
1091
- logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
1092
- logStreamError({
1093
- timestamp: new Date().toISOString(),
1094
- requestId: ctx.requestId,
1095
- account: account.label,
1096
- model: body.model,
1097
- errorMessage: errMsg,
1098
- durationMs: Date.now() - fetchStartMs,
1099
- });
1100
- // Send SSE error event so the client gets a meaningful error
1101
- // instead of a raw connection drop
1102
- if (!mainStreamClosed) {
1103
- mainStreamClosed = true;
1104
- const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
1105
- controller.enqueue(new TextEncoder().encode(errorEvent));
1106
- controller.close();
1107
- }
1108
- }
1109
- },
1110
- cancel() {
1111
- mainStreamClosed = true;
1112
- reader.cancel();
1113
- },
1114
- });
1115
- // Forward rate limit headers from Anthropic.
1116
- const responseHeaders = {
1117
- "content-type": "text/event-stream",
1118
- "cache-control": "no-cache",
1119
- connection: "keep-alive",
1120
- };
1121
- for (const h of [
1122
- "retry-after",
1123
- "anthropic-ratelimit-requests-remaining",
1124
- "anthropic-ratelimit-requests-limit",
1125
- "anthropic-ratelimit-tokens-remaining",
1126
- "anthropic-ratelimit-tokens-limit",
1127
- ]) {
1128
- const val = response.headers.get(h);
1129
- // eslint-disable-next-line max-depth
1130
- if (val) {
1131
- responseHeaders[h] = val;
1132
- }
1133
- }
1134
- return new Response(remainingStream, {
1135
- status: response.status,
1136
- headers: responseHeaders,
1137
- });
1138
- }
1139
- return buildClaudeError(502, "No response body from upstream");
1140
- }
1141
- // Non-streaming: return JSON directly.
1142
- return response.json();
1143
- }
1144
- // All accounts exhausted — compute earliest recovery time.
1145
- const earliestRecovery = orderedAccounts.reduce((min, account) => {
1146
- const coolingUntil = getOrCreateRuntimeState(account.key).coolingUntil;
1147
- return coolingUntil ? Math.min(min, coolingUntil) : min;
1148
- }, Infinity);
1149
- const retryAfterSec = Number.isFinite(earliestRecovery)
1150
- ? Math.max(1, Math.ceil((earliestRecovery - Date.now()) / 1000))
1151
- : 60;
1152
- // Try fallback chain (alternative providers)
1153
- const chain = modelRouter?.getFallbackChain() ?? [];
1154
- for (const fallback of chain) {
1155
- try {
1156
- logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1157
- const parsed = parseClaudeRequest(body);
1158
- const opts = {
1159
- input: {
1160
- text: parsed.prompt,
1161
- ...(parsed.images.length > 0
1162
- ? { images: parsed.images }
1163
- : {}),
1164
- },
1165
- provider: fallback.provider,
1166
- model: fallback.model,
1167
- systemPrompt: parsed.systemPrompt,
1168
- maxTokens: parsed.maxTokens,
1169
- ...(parsed.temperature !== undefined
1170
- ? { temperature: parsed.temperature }
1171
- : {}),
1172
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1173
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1174
- ...(parsed.stopSequences?.length
1175
- ? { stopSequences: parsed.stopSequences }
1176
- : {}),
1177
- tools: parsed.tools,
1178
- ...(parsed.toolChoice
1179
- ? { toolChoice: parsed.toolChoice }
1180
- : {}),
1181
- ...(parsed.thinkingConfig
1182
- ? { thinkingConfig: parsed.thinkingConfig }
1183
- : {}),
1184
- ...(parsed.conversationMessages?.length
1185
- ? {
1186
- conversationMessages: parsed.conversationMessages.slice(0, -1),
1187
- }
1188
- : {}),
1189
- maxSteps: 1,
1190
- };
1191
- if (body.stream) {
1192
- const streamResult = await ctx.neurolink.stream(opts);
1193
- const serializer = new ClaudeStreamSerializer(body.model, 0);
1194
- async function* sseGenerator() {
1195
- for (const frame of serializer.start()) {
1196
- yield frame;
1197
- }
1198
- for await (const chunk of streamResult.stream) {
1199
- const text = extractText(chunk);
1200
- if (text) {
1201
- for (const frame of serializer.pushDelta(text)) {
1202
- yield frame;
1203
- }
1204
- }
1205
- }
1206
- // Emit tool_use blocks if model wants to call tools
1207
- if (streamResult.toolCalls?.length) {
1208
- for (const tc of streamResult.toolCalls) {
1209
- const toolName = tc.toolName ??
1210
- tc.name ??
1211
- "unknown";
1212
- const toolArgs = tc.args ??
1213
- tc.parameters ??
1214
- {};
1215
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
1216
- yield frame;
1217
- }
1218
- }
1219
- }
1220
- const reason = streamResult.finishReason ?? "end_turn";
1221
- for (const frame of serializer.finish(0, reason)) {
1222
- yield frame;
1223
- }
1224
- }
1225
- return sseGenerator();
1226
- }
1227
- const streamResult = await ctx.neurolink.stream(opts);
1228
- let collectedText = "";
1229
- for await (const chunk of streamResult.stream) {
1230
- const text = extractText(chunk);
1231
- if (text) {
1232
- collectedText += text;
1233
- }
1234
- }
1235
- const internal = {
1236
- content: collectedText,
1237
- model: streamResult.model,
1238
- finishReason: streamResult.finishReason ?? "end_turn",
1239
- reasoning: undefined,
1240
- usage: streamResult.usage
1241
- ? {
1242
- input: streamResult.usage.input ??
1243
- 0,
1244
- output: streamResult.usage
1245
- .output ?? 0,
1246
- total: streamResult.usage.total ??
1247
- 0,
1248
- }
1249
- : undefined,
1250
- toolCalls: streamResult.toolCalls,
1251
- };
1252
- return serializeClaudeResponse(internal, body.model);
1253
- }
1254
- catch (fallbackErr) {
1255
- logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1256
- continue;
1257
- }
1258
- }
1259
- // If no explicit fallback chain is configured, try SDK auto-provider fallback.
1260
- if (chain.length === 0) {
1261
- try {
1262
- logger.always("[proxy] fallback → auto-provider");
1263
- const parsed = parseClaudeRequest(body);
1264
- const opts = {
1265
- input: {
1266
- text: parsed.prompt,
1267
- ...(parsed.images.length > 0
1268
- ? { images: parsed.images }
1269
- : {}),
1270
- },
1271
- systemPrompt: parsed.systemPrompt,
1272
- maxTokens: parsed.maxTokens,
1273
- ...(parsed.temperature !== undefined
1274
- ? { temperature: parsed.temperature }
1275
- : {}),
1276
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1277
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1278
- ...(parsed.stopSequences?.length
1279
- ? { stopSequences: parsed.stopSequences }
1280
- : {}),
1281
- tools: parsed.tools,
1282
- ...(parsed.toolChoice
1283
- ? { toolChoice: parsed.toolChoice }
1284
- : {}),
1285
- ...(parsed.thinkingConfig
1286
- ? { thinkingConfig: parsed.thinkingConfig }
1287
- : {}),
1288
- ...(parsed.conversationMessages?.length
1289
- ? {
1290
- conversationMessages: parsed.conversationMessages.slice(0, -1),
1291
- }
1292
- : {}),
1293
- maxSteps: 1,
1294
- };
1295
- if (body.stream) {
1296
- const streamResult = await ctx.neurolink.stream(opts);
1297
- const serializer = new ClaudeStreamSerializer(body.model, 0);
1298
- async function* sseGenerator() {
1299
- for (const frame of serializer.start()) {
1300
- yield frame;
1301
- }
1302
- for await (const chunk of streamResult.stream) {
1303
- const text = extractText(chunk);
1304
- if (text) {
1305
- for (const frame of serializer.pushDelta(text)) {
1306
- yield frame;
1307
- }
1308
- }
1309
- }
1310
- // Emit tool_use blocks if model wants to call tools
1311
- if (streamResult.toolCalls?.length) {
1312
- for (const tc of streamResult.toolCalls) {
1313
- const toolName = tc.toolName ??
1314
- tc.name ??
1315
- "unknown";
1316
- const toolArgs = tc.args ??
1317
- tc.parameters ??
1318
- {};
1319
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
1320
- yield frame;
1321
- }
1322
- }
1323
- }
1324
- const reason = streamResult.finishReason ?? "end_turn";
1325
- for (const frame of serializer.finish(0, reason)) {
1326
- yield frame;
1327
- }
1328
- }
1329
- return sseGenerator();
1330
- }
1331
- const streamResult = await ctx.neurolink.stream(opts);
1332
- let collectedText = "";
1333
- for await (const chunk of streamResult.stream) {
1334
- const text = extractText(chunk);
1335
- if (text) {
1336
- collectedText += text;
1337
- }
1338
- }
1339
- const internal = {
1340
- content: collectedText,
1341
- model: streamResult.model,
1342
- finishReason: streamResult.finishReason ?? "end_turn",
1343
- reasoning: undefined,
1344
- usage: streamResult.usage
1345
- ? {
1346
- input: streamResult.usage.input ??
1347
- 0,
1348
- output: streamResult.usage
1349
- .output ?? 0,
1350
- total: streamResult.usage.total ??
1351
- 0,
1352
- }
1353
- : undefined,
1354
- toolCalls: streamResult.toolCalls,
1355
- };
1356
- return serializeClaudeResponse(internal, body.model);
1357
- }
1358
- catch (fallbackErr) {
1359
- logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error
1360
- ? fallbackErr.message
1361
- : String(fallbackErr)}`);
1362
- }
1363
- }
1364
- if (authFailureMessage && !sawRateLimit) {
1365
- return buildClaudeError(401, authFailureMessage);
1366
- }
1367
- if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
1368
- return buildClaudeError(502, `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error
1369
- ? lastError.message
1370
- : String(lastError ?? "unknown")}`);
1371
- }
1372
- if (!sawRateLimit) {
1373
- return buildClaudeError(502, `All Anthropic accounts failed. Last error: ${lastError instanceof Error
1374
- ? lastError.message
1375
- : String(lastError ?? "unknown")}`);
1376
- }
1377
- // All accounts AND all fallbacks exhausted — return 429 with Retry-After
1378
- logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1379
- const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
1380
- return new Response(JSON.stringify(errorBody), {
1381
- status: 429,
1382
- headers: {
1383
- "content-type": "application/json",
1384
- "retry-after": String(retryAfterSec),
1385
- },
3076
+ return handleAnthropicRoutedClaudeRequest({
3077
+ ctx,
3078
+ body,
3079
+ modelRouter,
3080
+ tracer,
3081
+ requestStartTime,
3082
+ accountStrategy,
3083
+ buildLoggedClaudeError,
3084
+ logProxyBody,
3085
+ logFinalRequest,
1386
3086
  });
1387
3087
  }
1388
3088
  else {
1389
- // ─── TRANSLATION MODE (Claude → Other Provider) ───────
1390
- // Parse into NeuroLink format, call generate/stream, serialize back
1391
- const parsed = parseClaudeRequest(body);
1392
- const historyMessages = parsed.conversationMessages.slice(0, -1);
1393
- const options = {
1394
- input: {
1395
- text: parsed.prompt,
1396
- ...(parsed.images.length > 0
1397
- ? { images: parsed.images }
1398
- : {}),
3089
+ return handleTranslatedClaudeRequest({
3090
+ ctx,
3091
+ body,
3092
+ route: {
3093
+ provider: route.provider,
3094
+ model: route.model,
1399
3095
  },
1400
- provider: route.provider,
1401
- model: route.model,
1402
- systemPrompt: parsed.systemPrompt,
1403
- maxTokens: parsed.maxTokens,
1404
- ...(parsed.temperature !== undefined
1405
- ? { temperature: parsed.temperature }
1406
- : {}),
1407
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1408
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1409
- ...(parsed.stopSequences?.length
1410
- ? { stopSequences: parsed.stopSequences }
1411
- : {}),
1412
- ...(parsed.thinkingConfig
1413
- ? { thinkingConfig: parsed.thinkingConfig }
1414
- : {}),
1415
- tools: parsed.tools,
1416
- ...(parsed.toolChoice ? { toolChoice: parsed.toolChoice } : {}),
1417
- maxSteps: 1,
1418
- ...(historyMessages.length > 0
1419
- ? { conversationMessages: historyMessages }
1420
- : {}),
1421
- };
1422
- if (body.stream) {
1423
- const streamResult = await ctx.neurolink.stream(options);
1424
- const serializer = new ClaudeStreamSerializer(body.model, 0);
1425
- const KEEPALIVE_INTERVAL_MS = 15_000; // 15 seconds
1426
- // Return a ReadableStream that emits SSE keep-alive comments
1427
- // every ~15s independently of upstream chunk arrival, so
1428
- // intermediaries don't drop the connection during stalls.
1429
- const encoder = new TextEncoder();
1430
- let translationKeepAliveTimer;
1431
- let translationCancelled = false;
1432
- // Hold a reference to the upstream async iterator so
1433
- // we can abort it when the client disconnects.
1434
- let upstreamIterator;
1435
- const translationStream = new ReadableStream({
1436
- async start(controller) {
1437
- // Emit start frames
1438
- for (const frame of serializer.start()) {
1439
- controller.enqueue(encoder.encode(frame));
1440
- }
1441
- // Keep-alive interval — fires even when upstream is stalled
1442
- translationKeepAliveTimer = setInterval(() => {
1443
- try {
1444
- controller.enqueue(encoder.encode(": keep-alive\n\n"));
1445
- }
1446
- catch {
1447
- // Controller already closed — ignore
1448
- }
1449
- }, KEEPALIVE_INTERVAL_MS);
1450
- try {
1451
- const iterable = streamResult.stream;
1452
- upstreamIterator = iterable[Symbol.asyncIterator]();
1453
- // Manually drive the async iterator so we can cancel it
1454
- while (true) {
1455
- if (translationCancelled) {
1456
- break;
1457
- }
1458
- const { value: chunk, done } = await upstreamIterator.next();
1459
- if (done) {
1460
- break;
1461
- }
1462
- if (translationCancelled) {
1463
- break;
1464
- }
1465
- const text = extractText(chunk);
1466
- if (text) {
1467
- for (const frame of serializer.pushDelta(text)) {
1468
- controller.enqueue(encoder.encode(frame));
1469
- }
1470
- }
1471
- }
1472
- // Emit tool_use blocks if model wants to call tools
1473
- if (!translationCancelled &&
1474
- streamResult.toolCalls?.length) {
1475
- for (const tc of streamResult.toolCalls) {
1476
- const toolName = tc.toolName ??
1477
- tc.name ??
1478
- "unknown";
1479
- const toolArgs = tc.args ??
1480
- tc.parameters ??
1481
- {};
1482
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
1483
- controller.enqueue(encoder.encode(frame));
1484
- }
1485
- }
1486
- }
1487
- if (!translationCancelled) {
1488
- const reason = streamResult.finishReason ?? "end_turn";
1489
- for (const frame of serializer.finish(0, reason)) {
1490
- controller.enqueue(encoder.encode(frame));
1491
- }
1492
- }
1493
- }
1494
- catch (streamErr) {
1495
- if (translationCancelled) {
1496
- return;
1497
- }
1498
- const errMsg = streamErr instanceof Error
1499
- ? streamErr.message
1500
- : String(streamErr);
1501
- logger.always(`[proxy] mid-stream error (translation mode): ${errMsg}`);
1502
- const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
1503
- controller.enqueue(encoder.encode(errorEvent));
1504
- }
1505
- finally {
1506
- if (translationKeepAliveTimer) {
1507
- clearInterval(translationKeepAliveTimer);
1508
- }
1509
- if (!translationCancelled) {
1510
- controller.close();
1511
- }
1512
- }
1513
- },
1514
- cancel() {
1515
- translationCancelled = true;
1516
- if (translationKeepAliveTimer) {
1517
- clearInterval(translationKeepAliveTimer);
1518
- translationKeepAliveTimer = undefined;
1519
- }
1520
- // Propagate cancellation to the upstream provider stream
1521
- if (upstreamIterator?.return) {
1522
- upstreamIterator.return(undefined).catch((cancelErr) => {
1523
- logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`);
1524
- });
1525
- }
1526
- },
1527
- });
1528
- return new Response(translationStream, {
1529
- headers: {
1530
- "content-type": "text/event-stream",
1531
- "cache-control": "no-cache",
1532
- connection: "keep-alive",
1533
- },
1534
- });
1535
- }
1536
- const streamResult = await ctx.neurolink.stream(options);
1537
- let collectedText = "";
1538
- for await (const chunk of streamResult.stream) {
1539
- const text = extractText(chunk);
1540
- if (text) {
1541
- collectedText += text;
1542
- }
1543
- }
1544
- const internal = {
1545
- content: collectedText,
1546
- model: streamResult.model,
1547
- finishReason: streamResult.finishReason ?? "end_turn",
1548
- reasoning: undefined,
1549
- usage: streamResult.usage
1550
- ? {
1551
- input: streamResult.usage.input ?? 0,
1552
- output: streamResult.usage.output ?? 0,
1553
- total: streamResult.usage.total ?? 0,
1554
- }
1555
- : undefined,
1556
- toolCalls: streamResult.toolCalls,
1557
- };
1558
- return serializeClaudeResponse(internal, body.model);
3096
+ modelRouter,
3097
+ tracer,
3098
+ requestStartTime,
3099
+ logProxyBody,
3100
+ });
1559
3101
  }
1560
3102
  }
1561
3103
  catch (error) {
1562
- logger.error(`[claude-proxy] Generation error for ${body.model}: ${error instanceof Error ? error.message : String(error)}`);
1563
- return buildClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
3104
+ const errMsg = error instanceof Error ? error.message : String(error);
3105
+ logger.error(`[claude-proxy] Generation error for ${body.model}: ${errMsg}`);
3106
+ tracer?.setError("generation_error", errMsg.slice(0, 500));
3107
+ tracer?.end(502, Date.now() - requestStartTime);
3108
+ return buildLoggedClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
1564
3109
  }
1565
3110
  },
1566
3111
  description: "Claude-compatible messages endpoint routed through NeuroLink",
@@ -1621,6 +3166,26 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1621
3166
  // ---------------------------------------------------------------------------
1622
3167
  // Helpers
1623
3168
  // ---------------------------------------------------------------------------
3169
+ /**
3170
+ * Extract token usage from a StreamResult.usage object, handling multiple
3171
+ * naming conventions across AI SDK versions and providers:
3172
+ * - AI SDK v6: inputTokens / outputTokens
3173
+ * - AI SDK v4: promptTokens / completionTokens
3174
+ * - NeuroLink internal: input / output
3175
+ */
3176
+ function extractUsageFromStreamResult(usage) {
3177
+ if (!usage || typeof usage !== "object") {
3178
+ return { input: 0, output: 0, total: 0 };
3179
+ }
3180
+ const u = usage;
3181
+ const input = (typeof u.inputTokens === "number" ? u.inputTokens : 0) ||
3182
+ (typeof u.promptTokens === "number" ? u.promptTokens : 0) ||
3183
+ (typeof u.input === "number" ? u.input : 0);
3184
+ const output = (typeof u.outputTokens === "number" ? u.outputTokens : 0) ||
3185
+ (typeof u.completionTokens === "number" ? u.completionTokens : 0) ||
3186
+ (typeof u.output === "number" ? u.output : 0);
3187
+ return { input, output, total: input + output };
3188
+ }
1624
3189
  /**
1625
3190
  * Extract text content from a stream chunk (handles various chunk formats).
1626
3191
  */
@@ -1781,6 +3346,124 @@ export function isInvalidRequestError(status, errBody) {
1781
3346
  return (parsed.errorType === "invalid_request_error" ||
1782
3347
  errBody.includes("invalid_request_error"));
1783
3348
  }
3349
+ function normalizeClaudeRequestForAnthropic(body) {
3350
+ return {
3351
+ ...body,
3352
+ messages: body.messages.map((msg) => {
3353
+ if (typeof msg.content !== "string") {
3354
+ return msg;
3355
+ }
3356
+ return {
3357
+ ...msg,
3358
+ content: [{ type: "text", text: msg.content }],
3359
+ };
3360
+ }),
3361
+ };
3362
+ }
3363
+ export function buildProxyFallbackOptions(parsed, overrides = {}) {
3364
+ const historyMessages = parsed.conversationMessages.slice(0, -1);
3365
+ const toolNames = Object.keys(parsed.tools);
3366
+ const images = shouldOmitImagesForTarget(overrides.provider, overrides.model)
3367
+ ? []
3368
+ : parsed.images;
3369
+ const thinkingConfig = shouldOmitThinkingConfigForTarget(overrides.provider, overrides.model)
3370
+ ? undefined
3371
+ : parsed.thinkingConfig;
3372
+ const toolChoice = parsed.toolChoiceName
3373
+ ? { type: "tool", toolName: parsed.toolChoiceName }
3374
+ : parsed.toolChoice;
3375
+ return {
3376
+ input: {
3377
+ text: parsed.prompt,
3378
+ ...(images.length > 0 ? { images } : {}),
3379
+ },
3380
+ ...(overrides.provider ? { provider: overrides.provider } : {}),
3381
+ ...(overrides.model ? { model: overrides.model } : {}),
3382
+ systemPrompt: parsed.systemPrompt,
3383
+ maxTokens: parsed.maxTokens,
3384
+ ...(parsed.temperature !== undefined
3385
+ ? { temperature: parsed.temperature }
3386
+ : {}),
3387
+ ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
3388
+ ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
3389
+ ...(parsed.stopSequences?.length
3390
+ ? { stopSequences: parsed.stopSequences }
3391
+ : {}),
3392
+ ...(thinkingConfig ? { thinkingConfig } : {}),
3393
+ ...(toolNames.length === 0 ? { disableTools: true } : {}),
3394
+ // Claude-compatible requests already declare the exact tool contract.
3395
+ // Filter out NeuroLink's built-in agent tools so translated fallbacks only
3396
+ // expose the tools the client actually knows how to handle.
3397
+ ...(toolNames.length > 0
3398
+ ? {
3399
+ tools: parsed.tools,
3400
+ toolFilter: toolNames,
3401
+ }
3402
+ : {}),
3403
+ ...(toolChoice ? { toolChoice } : {}),
3404
+ ...(historyMessages.length > 0
3405
+ ? { conversationMessages: historyMessages }
3406
+ : {}),
3407
+ disableInternalFallback: true,
3408
+ skipToolPromptInjection: true,
3409
+ maxSteps: 1,
3410
+ };
3411
+ }
3412
+ export function buildProxyTranslationAttempts(primary, modelRouter, parsed) {
3413
+ const attempts = [
3414
+ {
3415
+ provider: primary.provider,
3416
+ model: primary.model,
3417
+ label: `${primary.provider}/${primary.model ?? "unknown"}`,
3418
+ },
3419
+ ];
3420
+ const chain = modelRouter?.getFallbackChain() ?? [];
3421
+ for (const fallback of chain) {
3422
+ if (fallback.provider === primary.provider &&
3423
+ fallback.model === primary.model) {
3424
+ continue;
3425
+ }
3426
+ if (shouldSkipTranslationTarget(fallback.provider, fallback.model, parsed)) {
3427
+ continue;
3428
+ }
3429
+ attempts.push({
3430
+ provider: fallback.provider,
3431
+ model: fallback.model,
3432
+ label: `${fallback.provider}/${fallback.model}`,
3433
+ });
3434
+ }
3435
+ if (chain.length === 0) {
3436
+ attempts.push({ label: "auto-provider" });
3437
+ }
3438
+ return attempts;
3439
+ }
3440
+ function hasTranslatedOutput(collectedText, toolCalls) {
3441
+ return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
3442
+ }
3443
+ function shouldOmitImagesForTarget(provider, model) {
3444
+ // `open-large` in our LiteLLM setup handles text and tools, but returns an
3445
+ // empty completion when binary images are forwarded. Claude Code already
3446
+ // includes textual image markers in the prompt, so dropping only the binary
3447
+ // image payload keeps the request usable instead of breaking fallback.
3448
+ return provider === "litellm" && model === "open-large";
3449
+ }
3450
+ function shouldOmitThinkingConfigForTarget(provider, model) {
3451
+ return provider === "vertex" && model === "gemini-2.5-flash";
3452
+ }
3453
+ function shouldSkipTranslationTarget(provider, model, parsed) {
3454
+ if (provider === "ollama" &&
3455
+ model === "qwen2.5:0.5b" &&
3456
+ (parsed?.images.length ?? 0) > 0) {
3457
+ return true;
3458
+ }
3459
+ return false;
3460
+ }
3461
+ function extractToolArgs(toolCall) {
3462
+ return (toolCall.args ??
3463
+ toolCall.parameters ??
3464
+ toolCall.input ??
3465
+ {});
3466
+ }
1784
3467
  /**
1785
3468
  * Detect transient upstream failures that should trigger account/provider failover.
1786
3469
  *