@juspay/neurolink 9.40.0 → 9.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +137 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +471 -445
  11. package/dist/cli/commands/mcp.js +3 -0
  12. package/dist/cli/commands/proxy.d.ts +2 -1
  13. package/dist/cli/commands/proxy.js +279 -16
  14. package/dist/cli/commands/task.d.ts +56 -0
  15. package/dist/cli/commands/task.js +838 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +8 -4
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/modules/GenerationHandler.js +3 -2
  22. package/dist/core/redisConversationMemoryManager.js +7 -3
  23. package/dist/evaluation/BatchEvaluator.js +4 -1
  24. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  25. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  26. package/dist/evaluation/pipeline/evaluationPipeline.js +20 -8
  27. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  28. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  29. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  30. package/dist/lib/auth/anthropicOAuth.js +137 -4
  31. package/dist/lib/auth/providers/firebase.js +5 -1
  32. package/dist/lib/auth/providers/jwt.js +5 -1
  33. package/dist/lib/auth/providers/workos.js +5 -1
  34. package/dist/lib/auth/sessionManager.d.ts +1 -1
  35. package/dist/lib/auth/sessionManager.js +58 -27
  36. package/dist/lib/client/aiSdkAdapter.js +3 -0
  37. package/dist/lib/client/streamingClient.js +30 -10
  38. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  39. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  40. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  41. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  42. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  43. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +20 -8
  44. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  45. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  46. package/dist/lib/neurolink.d.ts +18 -1
  47. package/dist/lib/neurolink.js +367 -484
  48. package/dist/lib/observability/otelBridge.d.ts +2 -2
  49. package/dist/lib/observability/otelBridge.js +12 -3
  50. package/dist/lib/providers/amazonBedrock.js +2 -4
  51. package/dist/lib/providers/anthropic.d.ts +9 -5
  52. package/dist/lib/providers/anthropic.js +19 -14
  53. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  54. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  55. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  56. package/dist/lib/providers/azureOpenai.js +5 -4
  57. package/dist/lib/providers/googleAiStudio.js +30 -1
  58. package/dist/lib/providers/googleVertex.js +28 -6
  59. package/dist/lib/providers/huggingFace.d.ts +3 -3
  60. package/dist/lib/providers/huggingFace.js +6 -8
  61. package/dist/lib/providers/litellm.js +41 -29
  62. package/dist/lib/providers/mistral.js +2 -1
  63. package/dist/lib/providers/ollama.js +80 -23
  64. package/dist/lib/providers/openAI.js +3 -2
  65. package/dist/lib/providers/openRouter.js +2 -1
  66. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  67. package/dist/lib/providers/openaiCompatible.js +4 -4
  68. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  69. package/dist/lib/proxy/claudeFormat.js +25 -20
  70. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  71. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  72. package/dist/lib/proxy/modelRouter.js +3 -0
  73. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  74. package/dist/lib/proxy/oauthFetch.js +65 -72
  75. package/dist/lib/proxy/proxyConfig.js +44 -24
  76. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  77. package/dist/lib/proxy/proxyEnv.js +73 -0
  78. package/dist/lib/proxy/proxyFetch.js +50 -4
  79. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  80. package/dist/lib/proxy/proxyTracer.js +645 -0
  81. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  82. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  83. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  84. package/dist/lib/proxy/requestLogger.js +406 -37
  85. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  86. package/dist/lib/proxy/sseInterceptor.js +402 -0
  87. package/dist/lib/proxy/usageStats.d.ts +4 -3
  88. package/dist/lib/proxy/usageStats.js +25 -12
  89. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  90. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  91. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +7 -2
  92. package/dist/lib/server/routes/claudeProxyRoutes.js +1737 -508
  93. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  94. package/dist/lib/services/server/ai/observability/instrumentation.js +240 -40
  95. package/dist/lib/tasks/backends/bullmqBackend.d.ts +33 -0
  96. package/dist/lib/tasks/backends/bullmqBackend.js +196 -0
  97. package/dist/lib/tasks/backends/nodeTimeoutBackend.d.ts +27 -0
  98. package/dist/lib/tasks/backends/nodeTimeoutBackend.js +141 -0
  99. package/dist/lib/tasks/backends/taskBackendRegistry.d.ts +31 -0
  100. package/dist/lib/tasks/backends/taskBackendRegistry.js +66 -0
  101. package/dist/lib/tasks/errors.d.ts +31 -0
  102. package/dist/lib/tasks/errors.js +18 -0
  103. package/dist/lib/tasks/store/fileTaskStore.d.ts +43 -0
  104. package/dist/lib/tasks/store/fileTaskStore.js +179 -0
  105. package/dist/lib/tasks/store/redisTaskStore.d.ts +43 -0
  106. package/dist/lib/tasks/store/redisTaskStore.js +197 -0
  107. package/dist/lib/tasks/taskExecutor.d.ts +21 -0
  108. package/dist/lib/tasks/taskExecutor.js +166 -0
  109. package/dist/lib/tasks/taskManager.d.ts +63 -0
  110. package/dist/lib/tasks/taskManager.js +426 -0
  111. package/dist/lib/tasks/tools/taskTools.d.ts +135 -0
  112. package/dist/lib/tasks/tools/taskTools.js +274 -0
  113. package/dist/lib/telemetry/index.d.ts +2 -1
  114. package/dist/lib/telemetry/index.js +2 -1
  115. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  116. package/dist/lib/telemetry/telemetryService.js +65 -5
  117. package/dist/lib/types/cli.d.ts +10 -0
  118. package/dist/lib/types/configTypes.d.ts +3 -0
  119. package/dist/lib/types/generateTypes.d.ts +13 -0
  120. package/dist/lib/types/index.d.ts +1 -0
  121. package/dist/lib/types/proxyTypes.d.ts +37 -5
  122. package/dist/lib/types/streamTypes.d.ts +25 -3
  123. package/dist/lib/types/taskTypes.d.ts +275 -0
  124. package/dist/lib/types/taskTypes.js +37 -0
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +18 -0
  127. package/dist/lib/utils/providerHealth.js +240 -9
  128. package/dist/lib/utils/providerUtils.js +14 -8
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/neurolink.d.ts +18 -1
  132. package/dist/neurolink.js +367 -484
  133. package/dist/observability/otelBridge.d.ts +2 -2
  134. package/dist/observability/otelBridge.js +12 -3
  135. package/dist/providers/amazonBedrock.js +2 -4
  136. package/dist/providers/anthropic.d.ts +9 -5
  137. package/dist/providers/anthropic.js +19 -14
  138. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  139. package/dist/providers/anthropicBaseProvider.js +5 -4
  140. package/dist/providers/azureOpenai.d.ts +1 -1
  141. package/dist/providers/azureOpenai.js +5 -4
  142. package/dist/providers/googleAiStudio.js +30 -1
  143. package/dist/providers/googleVertex.js +28 -6
  144. package/dist/providers/huggingFace.d.ts +3 -3
  145. package/dist/providers/huggingFace.js +6 -7
  146. package/dist/providers/litellm.js +41 -29
  147. package/dist/providers/mistral.js +2 -1
  148. package/dist/providers/ollama.js +80 -23
  149. package/dist/providers/openAI.js +3 -2
  150. package/dist/providers/openRouter.js +2 -1
  151. package/dist/providers/openaiCompatible.d.ts +4 -4
  152. package/dist/providers/openaiCompatible.js +4 -3
  153. package/dist/proxy/claudeFormat.d.ts +3 -2
  154. package/dist/proxy/claudeFormat.js +25 -20
  155. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  156. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  157. package/dist/proxy/modelRouter.js +3 -0
  158. package/dist/proxy/oauthFetch.d.ts +1 -1
  159. package/dist/proxy/oauthFetch.js +65 -72
  160. package/dist/proxy/proxyConfig.js +44 -24
  161. package/dist/proxy/proxyEnv.d.ts +19 -0
  162. package/dist/proxy/proxyEnv.js +72 -0
  163. package/dist/proxy/proxyFetch.js +50 -4
  164. package/dist/proxy/proxyTracer.d.ts +133 -0
  165. package/dist/proxy/proxyTracer.js +644 -0
  166. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  167. package/dist/proxy/rawStreamCapture.js +82 -0
  168. package/dist/proxy/requestLogger.d.ts +32 -5
  169. package/dist/proxy/requestLogger.js +406 -37
  170. package/dist/proxy/sseInterceptor.d.ts +97 -0
  171. package/dist/proxy/sseInterceptor.js +401 -0
  172. package/dist/proxy/usageStats.d.ts +4 -3
  173. package/dist/proxy/usageStats.js +25 -12
  174. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  175. package/dist/rag/chunking/markdownChunker.js +15 -6
  176. package/dist/server/routes/claudeProxyRoutes.d.ts +7 -2
  177. package/dist/server/routes/claudeProxyRoutes.js +1737 -508
  178. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  179. package/dist/services/server/ai/observability/instrumentation.js +240 -40
  180. package/dist/tasks/backends/bullmqBackend.d.ts +33 -0
  181. package/dist/tasks/backends/bullmqBackend.js +195 -0
  182. package/dist/tasks/backends/nodeTimeoutBackend.d.ts +27 -0
  183. package/dist/tasks/backends/nodeTimeoutBackend.js +140 -0
  184. package/dist/tasks/backends/taskBackendRegistry.d.ts +31 -0
  185. package/dist/tasks/backends/taskBackendRegistry.js +65 -0
  186. package/dist/tasks/errors.d.ts +31 -0
  187. package/dist/tasks/errors.js +17 -0
  188. package/dist/tasks/store/fileTaskStore.d.ts +43 -0
  189. package/dist/tasks/store/fileTaskStore.js +178 -0
  190. package/dist/tasks/store/redisTaskStore.d.ts +43 -0
  191. package/dist/tasks/store/redisTaskStore.js +196 -0
  192. package/dist/tasks/taskExecutor.d.ts +21 -0
  193. package/dist/tasks/taskExecutor.js +165 -0
  194. package/dist/tasks/taskManager.d.ts +63 -0
  195. package/dist/tasks/taskManager.js +425 -0
  196. package/dist/tasks/tools/taskTools.d.ts +135 -0
  197. package/dist/tasks/tools/taskTools.js +273 -0
  198. package/dist/telemetry/index.d.ts +2 -1
  199. package/dist/telemetry/index.js +2 -1
  200. package/dist/telemetry/telemetryService.d.ts +3 -0
  201. package/dist/telemetry/telemetryService.js +65 -5
  202. package/dist/types/cli.d.ts +10 -0
  203. package/dist/types/configTypes.d.ts +3 -0
  204. package/dist/types/generateTypes.d.ts +13 -0
  205. package/dist/types/index.d.ts +1 -0
  206. package/dist/types/proxyTypes.d.ts +37 -5
  207. package/dist/types/streamTypes.d.ts +25 -3
  208. package/dist/types/taskTypes.d.ts +275 -0
  209. package/dist/types/taskTypes.js +36 -0
  210. package/dist/utils/messageBuilder.js +3 -2
  211. package/dist/utils/providerHealth.d.ts +18 -0
  212. package/dist/utils/providerHealth.js +240 -9
  213. package/dist/utils/providerUtils.js +14 -8
  214. package/dist/utils/toolChoice.d.ts +4 -0
  215. package/dist/utils/toolChoice.js +6 -0
  216. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  217. package/docs/changelog.md +252 -0
  218. package/package.json +19 -1
  219. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  220. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  221. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  222. package/scripts/observability/manage-local-openobserve.sh +184 -0
  223. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  224. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-depth */
1
2
  /**
2
3
  * Claude-Compatible Proxy Routes
3
4
  *
@@ -9,20 +10,23 @@
9
10
  * provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
10
11
  * Without a router, models are passed through to the Anthropic provider.
11
12
  */
12
- import { readFile, access } from "node:fs/promises";
13
- import { join } from "node:path";
13
+ import { access, mkdir, readFile, rename, writeFile } from "node:fs/promises";
14
14
  import { homedir } from "node:os";
15
- import { parseClaudeRequest, serializeClaudeResponse, ClaudeStreamSerializer, buildClaudeError, generateToolUseId, } from "../../proxy/claudeFormat.js";
15
+ import { join } from "node:path";
16
+ import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
17
+ import { parseQuotaHeaders, saveAccountQuota } from "../../proxy/accountQuota.js";
18
+ import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
19
+ import { ProxyTracer } from "../../proxy/proxyTracer.js";
20
+ import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
21
+ import { logBodyCapture, logRequest, logRequestAttempt, logStreamError } from "../../proxy/requestLogger.js";
22
+ import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
23
+ import { needsRefresh, persistTokens, refreshToken } from "../../proxy/tokenRefresh.js";
24
+ import { recordAttempt, recordAttemptError, recordCooldown, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
16
25
  import { logger } from "../../utils/logger.js";
17
- import { recordRequest, recordSuccess, recordError, recordCooldown, } from "../../proxy/usageStats.js";
18
- import { logRequest, logFullRequestResponse, logStreamError, } from "../../proxy/requestLogger.js";
19
- import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
20
- import { needsRefresh, refreshToken, persistTokens, } from "../../proxy/tokenRefresh.js";
26
+ import { ProviderHealthChecker } from "../../utils/providerHealth.js";
21
27
  // ---------------------------------------------------------------------------
22
28
  // Helpers
23
29
  // ---------------------------------------------------------------------------
24
- /** Header names whose values must be masked in debug logs. */
25
- const SENSITIVE_HEADERS = new Set(["authorization", "x-api-key"]);
26
30
  /** Headers that must never be forwarded upstream to Anthropic. */
27
31
  const BLOCKED_UPSTREAM_HEADERS = new Set([
28
32
  "cookie",
@@ -32,22 +36,6 @@ const BLOCKED_UPSTREAM_HEADERS = new Set([
32
36
  "content-length",
33
37
  "transfer-encoding",
34
38
  ]);
35
- /** Return a shallow copy of `headers` with sensitive values redacted. */
36
- function redactSensitiveHeaders(headers) {
37
- const redacted = {};
38
- for (const [key, value] of Object.entries(headers)) {
39
- if (SENSITIVE_HEADERS.has(key.toLowerCase()) && value.length > 8) {
40
- redacted[key] = value.substring(0, 8) + "...";
41
- }
42
- else if (SENSITIVE_HEADERS.has(key.toLowerCase())) {
43
- redacted[key] = "***";
44
- }
45
- else {
46
- redacted[key] = value;
47
- }
48
- }
49
- return redacted;
50
- }
51
39
  // ---------------------------------------------------------------------------
52
40
  // Module-level state
53
41
  // ---------------------------------------------------------------------------
@@ -83,65 +71,192 @@ function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
83
71
  }
84
72
  primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount;
85
73
  }
86
- // ---------------------------------------------------------------------------
87
- // OAuth polyfill helpers (extracted to reduce block nesting)
88
- // ---------------------------------------------------------------------------
89
74
  const snapshotCache = new Map();
90
75
  const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
91
- /**
92
- * Load a header snapshot captured from a real Claude Code session and apply
93
- * any headers the client didn't send. This makes non-Claude-Code requests
94
- * (e.g. from Curator, custom apps) appear identical to Claude Code.
95
- */
96
- async function applyHeaderSnapshot(headers, accountLabel) {
76
+ const SNAPSHOT_STABLE_HEADERS = new Set([
77
+ "accept",
78
+ "accept-encoding",
79
+ "accept-language",
80
+ "anthropic-beta",
81
+ "anthropic-dangerous-direct-browser-access",
82
+ "anthropic-version",
83
+ "sec-fetch-mode",
84
+ "user-agent",
85
+ "x-app",
86
+ "x-stainless-arch",
87
+ "x-stainless-lang",
88
+ "x-stainless-os",
89
+ "x-stainless-package-version",
90
+ "x-stainless-retry-count",
91
+ "x-stainless-runtime",
92
+ "x-stainless-runtime-version",
93
+ "x-stainless-timeout",
94
+ "x-subscription-tier",
95
+ ]);
96
+ const NON_CLAUDE_OAUTH_BETAS = [
97
+ "oauth-2025-04-20",
98
+ "claude-code-20250219",
99
+ "fine-grained-tool-streaming-2025-05-14",
100
+ ];
101
+ function getSnapshotSafeLabel(accountLabel) {
102
+ return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
103
+ }
104
+ function getSnapshotPath(accountLabel) {
105
+ return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`);
106
+ }
107
+ function applySnapshotHeaders(headers, snapshot) {
108
+ if (!snapshot?.headers) {
109
+ return;
110
+ }
111
+ for (const [sk, sv] of Object.entries(snapshot.headers)) {
112
+ const lower = sk.toLowerCase();
113
+ if (typeof sv === "string" &&
114
+ !headers[lower] &&
115
+ !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
116
+ lower !== "authorization" &&
117
+ lower !== "x-api-key" &&
118
+ lower !== "x-claude-code-session-id") {
119
+ headers[lower] = sv;
120
+ }
121
+ }
122
+ }
123
+ async function loadClaudeSnapshot(accountLabel) {
97
124
  try {
98
- // Sanitize accountLabel to prevent directory traversal
99
- const safeLabel = accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
100
- // Check cache first
125
+ const safeLabel = getSnapshotSafeLabel(accountLabel);
101
126
  const cached = snapshotCache.get(safeLabel);
102
127
  if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) {
103
- for (const [sk, sv] of Object.entries(cached.headers)) {
104
- const lower = sk.toLowerCase();
105
- if (typeof sv === "string" &&
106
- !headers[lower] &&
107
- !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
108
- lower !== "authorization" &&
109
- lower !== "x-api-key") {
110
- headers[lower] = sv;
111
- }
112
- }
113
- return;
128
+ return cached.snapshot;
114
129
  }
115
- const snapshotPath = join(homedir(), ".neurolink", "header-snapshots", `anthropic_${safeLabel}.json`);
130
+ const snapshotPath = getSnapshotPath(accountLabel);
116
131
  try {
117
132
  await access(snapshotPath);
118
133
  }
119
134
  catch {
120
- return;
135
+ return null;
121
136
  }
122
137
  const snapshot = JSON.parse(await readFile(snapshotPath, "utf8"));
123
- if (!snapshot.headers) {
124
- return;
138
+ if (!snapshot || typeof snapshot !== "object") {
139
+ return null;
140
+ }
141
+ const normalized = {
142
+ accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string"
143
+ ? snapshot.accountKey
144
+ : `anthropic:${accountLabel}`,
145
+ capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string"
146
+ ? snapshot.capturedAt
147
+ : new Date(0).toISOString(),
148
+ source: "claude-code",
149
+ headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {},
150
+ ...(snapshot.body ? { body: snapshot.body } : {}),
151
+ };
152
+ if (Object.keys(normalized.headers).length === 0 && Object.keys(normalized.body ?? {}).length === 0) {
153
+ return null;
125
154
  }
126
- // Store in cache
127
155
  snapshotCache.set(safeLabel, {
128
- headers: snapshot.headers,
156
+ snapshot: normalized,
129
157
  loadedAt: Date.now(),
130
158
  });
131
- for (const [sk, sv] of Object.entries(snapshot.headers)) {
132
- const lower = sk.toLowerCase();
133
- if (typeof sv === "string" &&
134
- !headers[lower] &&
135
- !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
136
- lower !== "authorization" &&
137
- lower !== "x-api-key") {
138
- headers[lower] = sv;
139
- }
159
+ return normalized;
160
+ }
161
+ catch {
162
+ return null;
163
+ }
164
+ }
165
+ function buildSnapshotHeaders(headers, existingHeaders) {
166
+ const merged = { ...(existingHeaders ?? {}) };
167
+ for (const [key, value] of Object.entries(headers)) {
168
+ const lower = key.toLowerCase();
169
+ if (typeof value === "string" &&
170
+ SNAPSHOT_STABLE_HEADERS.has(lower) &&
171
+ !BLOCKED_UPSTREAM_HEADERS.has(lower) &&
172
+ lower !== "authorization" &&
173
+ lower !== "x-api-key" &&
174
+ lower !== "x-claude-code-session-id") {
175
+ merged[lower] = value;
140
176
  }
141
177
  }
178
+ return merged;
179
+ }
180
+ function extractSnapshotBody(body) {
181
+ if (!body || typeof body !== "object") {
182
+ return undefined;
183
+ }
184
+ const parsed = body;
185
+ const identity = parseClaudeCodeUserId(parsed.metadata?.user_id);
186
+ const systemBlocks = Array.isArray(parsed.system)
187
+ ? parsed.system
188
+ : typeof parsed.system === "string"
189
+ ? [{ type: "text", text: parsed.system }]
190
+ : [];
191
+ const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("x-anthropic-billing-header"))?.text;
192
+ const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("Claude Agent SDK"))?.text;
193
+ if (!identity && !billingHeader && !agentBlock) {
194
+ return undefined;
195
+ }
196
+ return {
197
+ ...(identity ? { metadataUserId: identity.metadataUserId } : {}),
198
+ ...(identity ? { sessionId: identity.sessionId } : {}),
199
+ ...(billingHeader ? { billingHeader } : {}),
200
+ ...(agentBlock ? { agentBlock } : {}),
201
+ };
202
+ }
203
+ function isLikelyClaudeClient(headers, snapshotBody) {
204
+ return (typeof headers["x-claude-code-session-id"] === "string" ||
205
+ headers["user-agent"]?.startsWith("claude-cli/") ||
206
+ !!snapshotBody?.metadataUserId ||
207
+ !!snapshotBody?.billingHeader ||
208
+ !!snapshotBody?.agentBlock);
209
+ }
210
+ function snapshotsMatch(existing, next) {
211
+ if (!existing) {
212
+ return false;
213
+ }
214
+ return (JSON.stringify(existing.headers ?? {}) === JSON.stringify(next.headers ?? {}) &&
215
+ JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {}));
216
+ }
217
+ async function persistClaudeSnapshot(accountLabel, snapshot) {
218
+ const snapshotPath = getSnapshotPath(accountLabel);
219
+ const dirPath = join(homedir(), ".neurolink", "header-snapshots");
220
+ await mkdir(dirPath, { recursive: true });
221
+ const tmpPath = `${snapshotPath}.tmp`;
222
+ await writeFile(tmpPath, JSON.stringify(snapshot, null, 2), { mode: 0o600 });
223
+ await rename(tmpPath, snapshotPath);
224
+ snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
225
+ snapshot,
226
+ loadedAt: Date.now(),
227
+ });
228
+ }
229
+ async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) {
230
+ const existing = await loadClaudeSnapshot(accountLabel);
231
+ let parsedBody;
232
+ try {
233
+ parsedBody = JSON.parse(bodyStr);
234
+ }
142
235
  catch {
143
- // Snapshot missing or corrupt — continue without it
236
+ return existing;
237
+ }
238
+ const body = extractSnapshotBody(parsedBody);
239
+ if (!isLikelyClaudeClient(headers, body)) {
240
+ return existing;
241
+ }
242
+ const next = {
243
+ accountKey,
244
+ capturedAt: new Date().toISOString(),
245
+ source: "claude-code",
246
+ headers: buildSnapshotHeaders(headers, existing?.headers),
247
+ body: {
248
+ ...(existing?.body ?? {}),
249
+ ...(body ?? {}),
250
+ ...(typeof headers["x-claude-code-session-id"] === "string"
251
+ ? { sessionId: headers["x-claude-code-session-id"] }
252
+ : {}),
253
+ },
254
+ };
255
+ if (snapshotsMatch(existing, next)) {
256
+ return existing;
144
257
  }
258
+ await persistClaudeSnapshot(accountLabel, next);
259
+ return next;
145
260
  }
146
261
  /**
147
262
  * Polyfill the request body for OAuth accounts.
@@ -149,59 +264,74 @@ async function applyHeaderSnapshot(headers, accountLabel) {
149
264
  * into the body. Non-CC clients (Curator, custom apps) don't send these —
150
265
  * Anthropic rejects without them.
151
266
  */
152
- function polyfillOAuthBody(bodyStr, accountToken) {
267
+ function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) {
153
268
  try {
154
269
  const parsed = JSON.parse(bodyStr);
155
270
  // Billing header block (required by Anthropic for OAuth)
156
- const randomHex = Math.random().toString(16).substring(2, 5);
157
- const billingBlock = {
158
- type: "text",
159
- text: `x-anthropic-billing-header: cc_version=2.1.86.${randomHex}; cc_entrypoint=cli; cch=proxy;`,
160
- };
271
+ // NOTE: This block MUST be deterministic (no random values) to preserve
272
+ // Anthropic's prompt caching prefix chain. We keep the real Claude Code
273
+ // version/entrypoint shape when present, but stabilize the volatile cch.
161
274
  const agentBlock = {
162
275
  type: "text",
163
- text: "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
276
+ text: snapshot?.body?.agentBlock || "You are a Claude agent, built on Anthropic's Claude Agent SDK.",
164
277
  };
165
- // Normalise system to array and prepend billing + agent
278
+ // Normalise system to array and APPEND billing + agent blocks.
279
+ // IMPORTANT: We append (not prepend) to preserve the client's cache
280
+ // prefix chain. Anthropic's prompt caching uses prefix matching — if we
281
+ // insert anything before the client's system blocks, we invalidate all
282
+ // cached content (tools, system prompt, message history).
283
+ //
284
+ // Claude Code sends a billing block with a `cch=<hash>` value that changes
285
+ // on every request. We fix this by:
286
+ // 1. Removing the client's billing block from its current position
287
+ // 2. Stabilizing it while keeping the official Claude Code shape
288
+ // 3. Appending it at the END so the cacheable system blocks stay
289
+ // at the front of the prefix chain
166
290
  if (parsed.system) {
167
291
  if (typeof parsed.system === "string") {
168
292
  parsed.system = [{ type: "text", text: parsed.system }];
169
293
  }
170
294
  if (Array.isArray(parsed.system)) {
171
- const hasBilling = parsed.system.some((b) => typeof b.text === "string" &&
172
- b.text.includes("x-anthropic-billing-header"));
173
- const hasAgent = parsed.system.some((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
174
- const toInsert = [];
175
- if (!hasBilling) {
176
- toInsert.push(billingBlock);
177
- }
178
- if (!hasAgent) {
179
- toInsert.push(agentBlock);
180
- }
181
- if (toInsert.length > 0) {
182
- parsed.system = [...toInsert, ...parsed.system];
295
+ // Find and remove existing billing/agent blocks from wherever
296
+ // the client placed them (typically at system[0])
297
+ const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("x-anthropic-billing-header"));
298
+ const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
299
+ const billingBlock = {
300
+ type: "text",
301
+ text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader),
302
+ };
303
+ // Remove in reverse index order so indices stay valid
304
+ const indicesToRemove = [billingIdx, agentIdx].filter((i) => i >= 0).sort((a, b) => b - a);
305
+ for (const idx of indicesToRemove) {
306
+ parsed.system.splice(idx, 1);
183
307
  }
308
+ // Always append a deterministic billing block at the end.
309
+ // If the client sent one, we stripped its dynamic cch= and use
310
+ // our stable version instead. If not, we add ours.
311
+ parsed.system = [...parsed.system, billingBlock, agentBlock];
184
312
  }
185
313
  }
186
314
  else {
187
- parsed.system = [billingBlock, agentBlock];
188
- }
189
- // Inject metadata.user_id (required for OAuth)
190
- if (!parsed.metadata?.user_id) {
191
- const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
192
- const hash = Array.from(new TextEncoder().encode(tokenPrefix))
193
- .reduce((a, b) => ((a << 5) - a + b) | 0, 0)
194
- .toString(16)
195
- .replace("-", "");
196
- parsed.metadata = {
197
- ...parsed.metadata,
198
- user_id: `proxy-${hash}`,
315
+ const billingBlock = {
316
+ type: "text",
317
+ text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader),
199
318
  };
319
+ parsed.system = [billingBlock, agentBlock];
200
320
  }
201
- return JSON.stringify(parsed);
321
+ // Inject Claude-Code-shaped metadata.user_id (required for OAuth).
322
+ const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
323
+ const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, {
324
+ existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId,
325
+ preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId,
326
+ });
327
+ parsed.metadata = {
328
+ ...parsed.metadata,
329
+ user_id: identity.metadataUserId,
330
+ };
331
+ return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId };
202
332
  }
203
333
  catch {
204
- return bodyStr; // JSON parse failed — use original body
334
+ return { bodyStr }; // JSON parse failed — use original body
205
335
  }
206
336
  }
207
337
  // ---------------------------------------------------------------------------
@@ -269,7 +399,8 @@ async function tryLoadLegacyAccount(creds, legacyCredPath) {
269
399
  * @param basePath - Base path prefix (default: "" since Claude API uses /v1/...).
270
400
  * @returns RouteGroup with Claude-compatible endpoints.
271
401
  */
272
- export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first") {
402
+ // eslint-disable-next-line max-lines-per-function
403
+ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrategy = "fill-first", passthroughMode = false) {
273
404
  return {
274
405
  prefix: `${basePath}/v1`,
275
406
  routes: [
@@ -282,8 +413,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
282
413
  handler: async (ctx) => {
283
414
  const body = ctx.body;
284
415
  // 1. Validate
285
- if (typeof body?.model !== "string" ||
286
- !Array.isArray(body?.messages)) {
416
+ if (typeof body?.model !== "string" || !Array.isArray(body?.messages)) {
287
417
  return buildClaudeError(400, "Missing required fields: model, messages");
288
418
  }
289
419
  // 2. Resolve model via router (or pass through to anthropic)
@@ -298,15 +428,476 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
298
428
  provider: "anthropic",
299
429
  model: body.model,
300
430
  };
431
+ const clientRequestBody = JSON.stringify(body);
432
+ // ── OTel tracing ──────────────────────────────────────
433
+ let tracer;
434
+ try {
435
+ tracer = ProxyTracer.startRequest({
436
+ requestId: ctx.requestId,
437
+ method: ctx.method,
438
+ path: ctx.path,
439
+ model: body.model,
440
+ stream: body.stream ?? false,
441
+ toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
442
+ sessionId: ctx.headers["x-neurolink-session-id"] ?? ctx.headers["x-claude-code-session-id"] ?? undefined,
443
+ userAgent: ctx.headers["user-agent"] ?? undefined,
444
+ }, ctx.headers);
445
+ const receiveSpan = tracer.startReceive();
446
+ tracer.logRequestHeaders(ctx.headers);
447
+ tracer.logRequestBody(clientRequestBody);
448
+ receiveSpan.end();
449
+ }
450
+ catch {
451
+ // Graceful degradation — continue without tracing
452
+ tracer = undefined;
453
+ }
454
+ const requestStartTime = Date.now();
455
+ const logProxyBody = (capture) => {
456
+ const traceCtx = tracer?.getTraceContext();
457
+ void logBodyCapture({
458
+ timestamp: new Date().toISOString(),
459
+ requestId: ctx.requestId,
460
+ model: body.model,
461
+ stream: body.stream ?? false,
462
+ ...capture,
463
+ ...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
464
+ });
465
+ };
466
+ const logFinalRequest = (status, accountLabel, accountType, errorType, errorMessage, extra) => {
467
+ const traceCtx = tracer?.getTraceContext();
468
+ logRequest({
469
+ timestamp: new Date().toISOString(),
470
+ requestId: ctx.requestId,
471
+ method: ctx.method,
472
+ path: ctx.path,
473
+ model: body.model,
474
+ stream: !!body.stream,
475
+ toolCount: Array.isArray(body.tools) ? body.tools.length : 0,
476
+ account: accountLabel,
477
+ accountType,
478
+ responseStatus: status,
479
+ responseTimeMs: Date.now() - requestStartTime,
480
+ ...(errorType ? { errorType } : {}),
481
+ ...(errorMessage ? { errorMessage } : {}),
482
+ ...(extra?.inputTokens !== undefined ? { inputTokens: extra.inputTokens } : {}),
483
+ ...(extra?.outputTokens !== undefined ? { outputTokens: extra.outputTokens } : {}),
484
+ ...(extra?.cacheCreationTokens !== undefined ? { cacheCreationTokens: extra.cacheCreationTokens } : {}),
485
+ ...(extra?.cacheReadTokens !== undefined ? { cacheReadTokens: extra.cacheReadTokens } : {}),
486
+ ...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
487
+ });
488
+ };
489
+ logProxyBody({
490
+ phase: "client_request",
491
+ headers: ctx.headers,
492
+ body: clientRequestBody,
493
+ bodySize: Buffer.byteLength(clientRequestBody, "utf8"),
494
+ contentType: ctx.headers["content-type"] ?? "application/json",
495
+ });
496
+ const buildLoggedClaudeError = (status, message, errorType, extra) => {
497
+ const errorBody = buildClaudeError(status, message, errorType);
498
+ const errorBodyText = JSON.stringify(errorBody);
499
+ recordFinalError(status, extra?.account, extra?.accountType);
500
+ logFinalRequest(status, extra?.account ?? "", extra?.accountType ?? "final", errorType, message);
501
+ logProxyBody({
502
+ phase: "client_response",
503
+ headers: { "content-type": "application/json" },
504
+ body: errorBodyText,
505
+ bodySize: Buffer.byteLength(errorBodyText, "utf8"),
506
+ contentType: "application/json",
507
+ responseStatus: status,
508
+ durationMs: Date.now() - requestStartTime,
509
+ ...extra,
510
+ });
511
+ return errorBody;
512
+ };
301
513
  try {
302
514
  // 3. Route based on target provider
303
515
  if (route.provider === null) {
304
- return buildClaudeError(404, `Model '${body.model}' is not a Claude model. ` +
305
- `Use a model router to route it to another provider.`);
516
+ tracer?.setError("not_found_error", `Model '${body.model}' is not a Claude model.`);
517
+ tracer?.end(404, Date.now() - requestStartTime);
518
+ return buildLoggedClaudeError(404, `Model '${body.model}' is not a Claude model. ` + `Use a model router to route it to another provider.`);
306
519
  }
307
520
  const isClaudeTarget = route.provider === "anthropic";
308
521
  if (isClaudeTarget) {
309
- // ─── PASSTHROUGH MODE (Claude Claude) ───────────────
522
+ // --- PASSTHROUGH MODE (Claude -> Claude) -------------------
523
+ tracer?.setMode("passthrough");
524
+ // ── CLI --passthrough: raw transparent forwarding ──────
525
+ if (passthroughMode) {
526
+ tracer?.setMode("passthrough-cli");
527
+ const bodyStr = clientRequestBody;
528
+ const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
529
+ // Forward client headers as-is, filtering blocked ones
530
+ const upstreamHeaders = {};
531
+ for (const [key, value] of Object.entries(ctx.headers)) {
532
+ if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) {
533
+ upstreamHeaders[key] = value;
534
+ }
535
+ }
536
+ // Ensure content-type is set
537
+ if (!upstreamHeaders["content-type"]) {
538
+ upstreamHeaders["content-type"] = "application/json";
539
+ }
540
+ const upstreamSpan = tracer?.startUpstreamAttempt({
541
+ account: "passthrough",
542
+ attempt: 1,
543
+ polyfillHeaders: false,
544
+ polyfillBody: false,
545
+ upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true",
546
+ });
547
+ tracer?.logUpstreamRequestHeaders(upstreamHeaders);
548
+ tracer?.logUpstreamRequestBody(bodyStr);
549
+ logProxyBody({
550
+ phase: "upstream_request",
551
+ headers: upstreamHeaders,
552
+ body: bodyStr,
553
+ bodySize: Buffer.byteLength(bodyStr, "utf8"),
554
+ contentType: upstreamHeaders["content-type"] ?? "application/json",
555
+ account: "passthrough",
556
+ accountType: "passthrough",
557
+ attempt: 1,
558
+ });
559
+ let response;
560
+ try {
561
+ response = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
562
+ method: "POST",
563
+ headers: upstreamHeaders,
564
+ body: bodyStr,
565
+ signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
566
+ });
567
+ }
568
+ catch (fetchErr) {
569
+ const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
570
+ tracer?.setError("network_error", errMsg);
571
+ upstreamSpan?.end();
572
+ tracer?.end(502, Date.now() - requestStartTime);
573
+ logRequest({
574
+ timestamp: new Date().toISOString(),
575
+ requestId: ctx.requestId,
576
+ method: ctx.method,
577
+ path: ctx.path,
578
+ model: body.model,
579
+ stream: body.stream ?? false,
580
+ toolCount,
581
+ account: "passthrough",
582
+ accountType: "passthrough",
583
+ responseStatus: 502,
584
+ responseTimeMs: Date.now() - requestStartTime,
585
+ errorType: "network_error",
586
+ errorMessage: errMsg,
587
+ });
588
+ const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`);
589
+ logProxyBody({
590
+ phase: "client_response",
591
+ headers: { "content-type": "application/json" },
592
+ body: JSON.stringify(errorBody),
593
+ bodySize: Buffer.byteLength(JSON.stringify(errorBody), "utf8"),
594
+ contentType: "application/json",
595
+ account: "passthrough",
596
+ accountType: "passthrough",
597
+ attempt: 1,
598
+ responseStatus: 502,
599
+ durationMs: Date.now() - requestStartTime,
600
+ });
601
+ return errorBody;
602
+ }
603
+ const upstreamResponseHeaders = {};
604
+ response.headers.forEach((v, k) => {
605
+ upstreamResponseHeaders[k] = v;
606
+ });
607
+ tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders);
608
+ if (!response.ok) {
609
+ const errorText = await response.text();
610
+ tracer?.logUpstreamResponseBody(errorText);
611
+ logProxyBody({
612
+ phase: "upstream_response",
613
+ headers: upstreamResponseHeaders,
614
+ body: errorText,
615
+ bodySize: Buffer.byteLength(errorText, "utf8"),
616
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
617
+ account: "passthrough",
618
+ accountType: "passthrough",
619
+ attempt: 1,
620
+ responseStatus: response.status,
621
+ durationMs: Date.now() - requestStartTime,
622
+ });
623
+ logProxyBody({
624
+ phase: "client_response",
625
+ headers: upstreamResponseHeaders,
626
+ body: errorText,
627
+ bodySize: Buffer.byteLength(errorText, "utf8"),
628
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
629
+ account: "passthrough",
630
+ accountType: "passthrough",
631
+ attempt: 1,
632
+ responseStatus: response.status,
633
+ durationMs: Date.now() - requestStartTime,
634
+ });
635
+ upstreamSpan?.end();
636
+ tracer?.setError("api_error", errorText.slice(0, 500));
637
+ tracer?.end(response.status, Date.now() - requestStartTime);
638
+ try {
639
+ return JSON.parse(errorText);
640
+ }
641
+ catch {
642
+ return buildClaudeError(response.status, errorText);
643
+ }
644
+ }
645
+ // Streaming response
646
+ if (body.stream && response.body) {
647
+ const responseHeaders = { ...upstreamResponseHeaders };
648
+ const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
649
+ let streamSource = response.body;
650
+ if (tracer) {
651
+ try {
652
+ const { stream: interceptor, telemetry } = createSSEInterceptor({ captureRawText: true });
653
+ streamSource = streamSource.pipeThrough(interceptor);
654
+ const capturedTracer = tracer;
655
+ const capturedUpstreamSpan = upstreamSpan;
656
+ const capturedResponse = response;
657
+ const capturedRequestBytes = bodyStr.length;
658
+ Promise.all([telemetry, clientCapture])
659
+ .then(([data, clientBody]) => {
660
+ capturedTracer.setUsage({
661
+ inputTokens: data.usage.inputTokens,
662
+ outputTokens: data.usage.outputTokens,
663
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
664
+ cacheReadTokens: data.usage.cacheReadInputTokens,
665
+ });
666
+ capturedTracer.logStreamEvents(data.events);
667
+ const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
668
+ const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
669
+ const usageUpdate = {
670
+ inputTokens: data.usage.inputTokens,
671
+ outputTokens: data.usage.outputTokens,
672
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
673
+ cacheReadTokens: data.usage.cacheReadInputTokens,
674
+ };
675
+ if (!isNaN(rateLimit5h)) {
676
+ usageUpdate.rateLimitAfter5h = rateLimit5h;
677
+ }
678
+ if (!isNaN(rateLimit7d)) {
679
+ usageUpdate.rateLimitAfter7d = rateLimit7d;
680
+ }
681
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
682
+ capturedTracer.setUsage(usageUpdate);
683
+ }
684
+ capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
685
+ capturedTracer.recordMetrics();
686
+ capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
687
+ capturedUpstreamSpan?.end();
688
+ capturedTracer.end(200, Date.now() - requestStartTime);
689
+ const traceCtx = capturedTracer.getTraceContext();
690
+ logRequest({
691
+ timestamp: new Date().toISOString(),
692
+ requestId: ctx.requestId,
693
+ method: ctx.method,
694
+ path: ctx.path,
695
+ model: body.model,
696
+ stream: true,
697
+ toolCount,
698
+ account: "passthrough",
699
+ accountType: "passthrough",
700
+ responseStatus: 200,
701
+ responseTimeMs: Date.now() - requestStartTime,
702
+ inputTokens: data.usage.inputTokens,
703
+ outputTokens: data.usage.outputTokens,
704
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
705
+ cacheReadTokens: data.usage.cacheReadInputTokens,
706
+ traceId: traceCtx.traceId,
707
+ spanId: traceCtx.spanId,
708
+ });
709
+ logProxyBody({
710
+ phase: "upstream_response",
711
+ headers: responseHeaders,
712
+ body: data.rawText ?? "",
713
+ bodySize: data.totalBytesReceived,
714
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
715
+ account: "passthrough",
716
+ accountType: "passthrough",
717
+ attempt: 1,
718
+ responseStatus: 200,
719
+ durationMs: Date.now() - requestStartTime,
720
+ });
721
+ logProxyBody({
722
+ phase: "client_response",
723
+ headers: responseHeaders,
724
+ body: clientBody.text,
725
+ bodySize: clientBody.totalBytes,
726
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
727
+ account: "passthrough",
728
+ accountType: "passthrough",
729
+ attempt: 1,
730
+ responseStatus: 200,
731
+ durationMs: Date.now() - requestStartTime,
732
+ });
733
+ })
734
+ .catch((err) => {
735
+ capturedTracer.setError("stream_error", err instanceof Error ? err.message : String(err));
736
+ capturedUpstreamSpan?.end();
737
+ capturedTracer.end(500, Date.now() - requestStartTime);
738
+ const traceCtx = capturedTracer.getTraceContext();
739
+ logRequest({
740
+ timestamp: new Date().toISOString(),
741
+ requestId: ctx.requestId,
742
+ method: ctx.method,
743
+ path: ctx.path,
744
+ model: body.model,
745
+ stream: true,
746
+ toolCount,
747
+ account: "passthrough",
748
+ accountType: "passthrough",
749
+ responseStatus: 500,
750
+ responseTimeMs: Date.now() - requestStartTime,
751
+ errorType: "stream_error",
752
+ errorMessage: err instanceof Error ? err.message : String(err),
753
+ traceId: traceCtx.traceId,
754
+ spanId: traceCtx.spanId,
755
+ });
756
+ });
757
+ }
758
+ catch {
759
+ // Streaming capture is best-effort; request completion is handled elsewhere.
760
+ }
761
+ }
762
+ else {
763
+ clientCapture
764
+ .then((clientBody) => {
765
+ logProxyBody({
766
+ phase: "upstream_response",
767
+ headers: responseHeaders,
768
+ body: clientBody.text,
769
+ bodySize: clientBody.totalBytes,
770
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
771
+ account: "passthrough",
772
+ accountType: "passthrough",
773
+ attempt: 1,
774
+ responseStatus: 200,
775
+ durationMs: Date.now() - requestStartTime,
776
+ });
777
+ logProxyBody({
778
+ phase: "client_response",
779
+ headers: responseHeaders,
780
+ body: clientBody.text,
781
+ bodySize: clientBody.totalBytes,
782
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
783
+ account: "passthrough",
784
+ accountType: "passthrough",
785
+ attempt: 1,
786
+ responseStatus: 200,
787
+ durationMs: Date.now() - requestStartTime,
788
+ });
789
+ })
790
+ .catch(() => {
791
+ // Non-fatal
792
+ });
793
+ }
794
+ const clientStream = streamSource.pipeThrough(clientCaptureStream);
795
+ return new Response(clientStream, {
796
+ status: response.status,
797
+ headers: responseHeaders,
798
+ });
799
+ }
800
+ // Non-streaming response
801
+ const responseText = await response.text();
802
+ tracer?.logUpstreamResponseBody(responseText);
803
+ logProxyBody({
804
+ phase: "upstream_response",
805
+ headers: upstreamResponseHeaders,
806
+ body: responseText,
807
+ bodySize: Buffer.byteLength(responseText, "utf8"),
808
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
809
+ account: "passthrough",
810
+ accountType: "passthrough",
811
+ attempt: 1,
812
+ responseStatus: response.status,
813
+ durationMs: Date.now() - requestStartTime,
814
+ });
815
+ logProxyBody({
816
+ phase: "client_response",
817
+ headers: upstreamResponseHeaders,
818
+ body: responseText,
819
+ bodySize: Buffer.byteLength(responseText, "utf8"),
820
+ contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
821
+ account: "passthrough",
822
+ accountType: "passthrough",
823
+ attempt: 1,
824
+ responseStatus: response.status,
825
+ durationMs: Date.now() - requestStartTime,
826
+ });
827
+ const responseJson = JSON.parse(responseText);
828
+ if (tracer && responseJson && typeof responseJson === "object") {
829
+ const usage = responseJson.usage;
830
+ if (usage) {
831
+ tracer.setUsage({
832
+ inputTokens: usage.input_tokens ?? 0,
833
+ outputTokens: usage.output_tokens ?? 0,
834
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
835
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
836
+ });
837
+ const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
838
+ const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
839
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
840
+ const usageWithRates = {
841
+ inputTokens: usage.input_tokens ?? 0,
842
+ outputTokens: usage.output_tokens ?? 0,
843
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
844
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
845
+ };
846
+ if (!isNaN(rateLimit5h)) {
847
+ usageWithRates.rateLimitAfter5h = rateLimit5h;
848
+ }
849
+ if (!isNaN(rateLimit7d)) {
850
+ usageWithRates.rateLimitAfter7d = rateLimit7d;
851
+ }
852
+ tracer.setUsage(usageWithRates);
853
+ }
854
+ }
855
+ tracer.recordMetrics();
856
+ const responseJsonStr = JSON.stringify(responseJson);
857
+ tracer.recordBodySizes(bodyStr.length, responseJsonStr.length);
858
+ upstreamSpan?.end();
859
+ tracer.end(response.status, Date.now() - requestStartTime);
860
+ const traceCtx = tracer.getTraceContext();
861
+ logRequest({
862
+ timestamp: new Date().toISOString(),
863
+ requestId: ctx.requestId,
864
+ method: ctx.method,
865
+ path: ctx.path,
866
+ model: body.model,
867
+ stream: false,
868
+ toolCount,
869
+ account: "passthrough",
870
+ accountType: "passthrough",
871
+ responseStatus: response.status,
872
+ responseTimeMs: Date.now() - requestStartTime,
873
+ inputTokens: usage?.input_tokens,
874
+ outputTokens: usage?.output_tokens,
875
+ cacheCreationTokens: usage?.cache_creation_input_tokens,
876
+ cacheReadTokens: usage?.cache_read_input_tokens,
877
+ traceId: traceCtx.traceId,
878
+ spanId: traceCtx.spanId,
879
+ });
880
+ }
881
+ else {
882
+ upstreamSpan?.end();
883
+ tracer?.end(response.status, Date.now() - requestStartTime);
884
+ logRequest({
885
+ timestamp: new Date().toISOString(),
886
+ requestId: ctx.requestId,
887
+ method: ctx.method,
888
+ path: ctx.path,
889
+ model: body.model,
890
+ stream: false,
891
+ toolCount,
892
+ account: "passthrough",
893
+ accountType: "passthrough",
894
+ responseStatus: response.status,
895
+ responseTimeMs: Date.now() - requestStartTime,
896
+ });
897
+ }
898
+ return responseJson;
899
+ }
900
+ // ── END CLI --passthrough ─────────────────────────────
310
901
  const fs = await import("fs");
311
902
  const os = await import("os");
312
903
  const accounts = [];
@@ -330,8 +921,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
330
921
  // On cold start, lastToken is empty — don't treat that as a
331
922
  // credential change; only compare on subsequent reloads.
332
923
  const tokens = await tokenStore.loadTokens(key);
333
- const hasTrackedTokens = existingState.lastToken !== undefined &&
334
- existingState.lastToken !== "";
924
+ const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== "";
335
925
  const tokenChanged = tokens &&
336
926
  hasTrackedTokens &&
337
927
  (existingState.lastToken !== tokens.accessToken ||
@@ -436,13 +1026,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
436
1026
  });
437
1027
  }
438
1028
  if (accounts.length === 0) {
439
- return buildClaudeError(401, "No Anthropic credentials found");
1029
+ tracer?.setError("authentication_error", "No Anthropic credentials found");
1030
+ tracer?.end(401, Date.now() - requestStartTime);
1031
+ return buildLoggedClaudeError(401, "No Anthropic credentials found");
440
1032
  }
441
1033
  // Sync in-memory runtime state with current token material.
442
1034
  for (const account of accounts) {
443
1035
  const state = getOrCreateRuntimeState(account.key);
444
- const tokenChanged = state.lastToken !== account.token ||
445
- state.lastRefreshToken !== account.refreshToken;
1036
+ const tokenChanged = state.lastToken !== account.token || state.lastRefreshToken !== account.refreshToken;
446
1037
  if (tokenChanged) {
447
1038
  if (state.permanentlyDisabled) {
448
1039
  logger.always(`[proxy] account=${account.label} credentials changed, re-enabling`);
@@ -456,11 +1047,13 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
456
1047
  state.lastRefreshToken = account.refreshToken;
457
1048
  }
458
1049
  const enabledAccounts = accounts.filter((account) => {
459
- return !getOrCreateRuntimeState(account.key)
460
- .permanentlyDisabled;
1050
+ return !getOrCreateRuntimeState(account.key).permanentlyDisabled;
461
1051
  });
462
1052
  if (enabledAccounts.length === 0) {
463
- return buildClaudeError(401, formatReauthMessage(accounts.map((account) => account.label)));
1053
+ const reauthMsg = formatReauthMessage(accounts.map((account) => account.label));
1054
+ tracer?.setError("authentication_error", reauthMsg);
1055
+ tracer?.end(401, Date.now() - requestStartTime);
1056
+ return buildLoggedClaudeError(401, reauthMsg);
464
1057
  }
465
1058
  // Order accounts based on the configured strategy.
466
1059
  // - fill-first: always start with the primary account;
@@ -472,8 +1065,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
472
1065
  // (e.g. a new account was authenticated while the proxy was running).
473
1066
  // Only applies to round-robin; fill-first uses primaryAccountIndex
474
1067
  // as a sticky primary and should not be disrupted.
475
- if (accountStrategy === "round-robin" &&
476
- orderedAccounts.length !== lastKnownAccountCount) {
1068
+ if (accountStrategy === "round-robin" && orderedAccounts.length !== lastKnownAccountCount) {
477
1069
  primaryAccountIndex = 0;
478
1070
  lastKnownAccountCount = orderedAccounts.length;
479
1071
  }
@@ -481,8 +1073,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
481
1073
  if (accountStrategy === "round-robin") {
482
1074
  // Advance the index on every request for even distribution
483
1075
  const idx = primaryAccountIndex % orderedAccounts.length;
484
- primaryAccountIndex =
485
- (primaryAccountIndex + 1) % orderedAccounts.length;
1076
+ primaryAccountIndex = (primaryAccountIndex + 1) % orderedAccounts.length;
486
1077
  if (idx > 0) {
487
1078
  const head = orderedAccounts.splice(0, idx);
488
1079
  orderedAccounts.push(...head);
@@ -501,24 +1092,30 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
501
1092
  let sawRateLimit = false;
502
1093
  let sawNetworkError = false;
503
1094
  let sawTransientFailure = false;
1095
+ let invalidRequestFailure = null;
504
1096
  let authFailureMessage = null;
505
- const bodyStr = JSON.stringify(body);
1097
+ const normalizedAnthropicBody = normalizeClaudeRequestForAnthropic(body);
1098
+ const bodyStr = JSON.stringify(normalizedAnthropicBody);
506
1099
  const requestStart = Date.now();
507
- const toolCount = Array.isArray(body.tools)
508
- ? body.tools.length
509
- : 0;
1100
+ const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
510
1101
  const url = "https://api.anthropic.com/v1/messages?beta=true";
511
1102
  const clientHeaders = ctx.headers ?? {};
1103
+ const clientSnapshotBody = extractSnapshotBody(body);
1104
+ const isClaudeClientRequest = isLikelyClaudeClient(clientHeaders, clientSnapshotBody);
1105
+ let attemptNumber = 0;
1106
+ // OTel: account selection span (covers the whole selection phase)
1107
+ const acctSelectionSpan = tracer?.startAccountSelection();
512
1108
  for (const account of orderedAccounts) {
513
1109
  const accountState = getOrCreateRuntimeState(account.key);
514
- if (accountState.coolingUntil &&
515
- accountState.coolingUntil > Date.now()) {
1110
+ if (accountState.coolingUntil && accountState.coolingUntil > Date.now()) {
516
1111
  continue;
517
1112
  }
518
- const logAttempt = (status, errorType, errorMessage) => {
519
- logRequest({
1113
+ const logAttempt = (status, errorType, errorMessage, extra) => {
1114
+ const traceCtx = tracer?.getTraceContext();
1115
+ logRequestAttempt({
520
1116
  timestamp: new Date().toISOString(),
521
1117
  requestId: ctx.requestId,
1118
+ attempt: attemptNumber,
522
1119
  method: ctx.method,
523
1120
  path: ctx.path,
524
1121
  model: body.model,
@@ -530,8 +1127,31 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
530
1127
  responseTimeMs: Date.now() - requestStart,
531
1128
  ...(errorType ? { errorType } : {}),
532
1129
  ...(errorMessage ? { errorMessage } : {}),
1130
+ ...(extra?.inputTokens !== undefined ? { inputTokens: extra.inputTokens } : {}),
1131
+ ...(extra?.outputTokens !== undefined ? { outputTokens: extra.outputTokens } : {}),
1132
+ ...(extra?.cacheCreationTokens !== undefined
1133
+ ? { cacheCreationTokens: extra.cacheCreationTokens }
1134
+ : {}),
1135
+ ...(extra?.cacheReadTokens !== undefined ? { cacheReadTokens: extra.cacheReadTokens } : {}),
1136
+ ...(traceCtx ? { traceId: traceCtx.traceId, spanId: traceCtx.spanId } : {}),
533
1137
  });
534
1138
  };
1139
+ // OTel: record account selection and start upstream attempt span
1140
+ attemptNumber++;
1141
+ if (tracer) {
1142
+ // End the selection span on first actual attempt
1143
+ if (attemptNumber === 1 && acctSelectionSpan) {
1144
+ tracer.setAccountSelection({
1145
+ strategy: accountStrategy,
1146
+ accountsTotal: accounts.length,
1147
+ accountsHealthy: enabledAccounts.length,
1148
+ selectedAccount: account.label,
1149
+ accountType: account.type,
1150
+ });
1151
+ acctSelectionSpan.end();
1152
+ }
1153
+ }
1154
+ let upstreamSpan;
535
1155
  // Auto-refresh expiring access tokens once before making the request.
536
1156
  if (needsRefresh(account)) {
537
1157
  const refreshed = await refreshToken(account);
@@ -545,8 +1165,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
545
1165
  accountState.consecutiveRefreshFailures += 1;
546
1166
  lastError = `token refresh failed for account=${account.label}: ${refreshed.error?.slice(0, 200) ?? "unknown"}`;
547
1167
  logger.debug(`[proxy] preflight refresh failed account=${account.label} failures=${accountState.consecutiveRefreshFailures}`);
548
- if (accountState.consecutiveRefreshFailures >=
549
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
1168
+ if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
550
1169
  await disableAccountUntilReauth(account, accountState);
551
1170
  authFailureMessage = formatReauthMessage(account.label);
552
1171
  logAttempt(401, "authentication_error", String(lastError));
@@ -555,6 +1174,9 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
555
1174
  }
556
1175
  }
557
1176
  const isOAuth = account.type === "oauth";
1177
+ const snapshot = isOAuth
1178
+ ? await maybeRefreshClaudeSnapshot(account.label, account.key, clientHeaders, bodyStr)
1179
+ : null;
558
1180
  // Decision 6: Passthrough client headers, fill gaps only.
559
1181
  // Start with a copy of incoming client headers, then set
560
1182
  // defaults for anything the client didn't send. Always
@@ -562,8 +1184,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
562
1184
  const headers = {};
563
1185
  for (const [hk, hv] of Object.entries(clientHeaders)) {
564
1186
  const lower = hk.toLowerCase();
565
- if (typeof hv === "string" &&
566
- !BLOCKED_UPSTREAM_HEADERS.has(lower)) {
1187
+ if (typeof hv === "string" && !BLOCKED_UPSTREAM_HEADERS.has(lower)) {
567
1188
  headers[lower] = hv;
568
1189
  }
569
1190
  }
@@ -579,11 +1200,11 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
579
1200
  }
580
1201
  // Apply header snapshot defaults for OAuth accounts
581
1202
  if (isOAuth) {
582
- await applyHeaderSnapshot(headers, account.label);
1203
+ applySnapshotHeaders(headers, snapshot);
583
1204
  }
584
1205
  // Hard defaults for anything still missing
585
1206
  if (!headers["user-agent"]) {
586
- headers["user-agent"] = "claude-cli/2.1.86 (external, cli)";
1207
+ headers["user-agent"] = CLAUDE_CLI_USER_AGENT;
587
1208
  }
588
1209
  if (!headers["anthropic-version"]) {
589
1210
  headers["anthropic-version"] = "2023-06-01";
@@ -591,15 +1212,25 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
591
1212
  if (!headers["anthropic-dangerous-direct-browser-access"]) {
592
1213
  headers["anthropic-dangerous-direct-browser-access"] = "true";
593
1214
  }
1215
+ if (!headers["x-app"]) {
1216
+ headers["x-app"] = "cli";
1217
+ }
1218
+ if (!headers["accept"]) {
1219
+ headers["accept"] = "application/json";
1220
+ }
594
1221
  // Manage anthropic-beta header based on auth type.
595
1222
  // OAuth requires specific betas; API-key must NOT carry them.
596
1223
  if (isOAuth) {
597
- const existing = new Set((headers["anthropic-beta"] ?? "")
1224
+ const betaSeed = isClaudeClientRequest
1225
+ ? (headers["anthropic-beta"] ?? "")
1226
+ : (clientHeaders["anthropic-beta"] ?? "");
1227
+ const existing = new Set(betaSeed
598
1228
  .split(",")
599
1229
  .map((s) => s.trim())
600
1230
  .filter(Boolean));
601
- existing.add("oauth-2025-04-20");
602
- existing.add("claude-code-20250219");
1231
+ for (const beta of isClaudeClientRequest ? CLAUDE_CODE_OAUTH_BETAS : NON_CLAUDE_OAUTH_BETAS) {
1232
+ existing.add(beta);
1233
+ }
603
1234
  headers["anthropic-beta"] = [...existing].join(",");
604
1235
  }
605
1236
  else {
@@ -607,7 +1238,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
607
1238
  const cleaned = (headers["anthropic-beta"] ?? "")
608
1239
  .split(",")
609
1240
  .map((s) => s.trim())
610
- .filter((s) => s && s !== "oauth-2025-04-20")
1241
+ .filter((s) => s && !CLAUDE_CODE_OAUTH_BETAS.includes(s))
611
1242
  .join(",");
612
1243
  if (cleaned) {
613
1244
  headers["anthropic-beta"] = cleaned;
@@ -616,13 +1247,46 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
616
1247
  delete headers["anthropic-beta"];
617
1248
  }
618
1249
  }
619
- // Polyfill request body for OAuth accounts
620
- const buildUpstreamBody = () => isOAuth ? polyfillOAuthBody(bodyStr, account.token) : bodyStr;
621
- const finalBodyStr = buildUpstreamBody();
1250
+ // Polyfill request body for ALL OAuth accounts.
1251
+ // Anthropic requires metadata.user_id and billing headers
1252
+ // for OAuth — not just Claude Code clients.
1253
+ const shouldPolyfillBody = isOAuth;
1254
+ const buildUpstreamBody = (token) => shouldPolyfillBody
1255
+ ? polyfillOAuthBody(bodyStr, token, snapshot, headers["x-claude-code-session-id"])
1256
+ : { bodyStr };
1257
+ const polyfilledBody = buildUpstreamBody(account.token);
1258
+ if (isOAuth && polyfilledBody.sessionId && !headers["x-claude-code-session-id"]) {
1259
+ headers["x-claude-code-session-id"] = polyfilledBody.sessionId;
1260
+ }
1261
+ const finalBodyStr = polyfilledBody.bodyStr;
622
1262
  logger.always(`[proxy] → account=${account.label} (${account.type})`);
623
- recordRequest(account.label, account.type);
1263
+ recordAttempt(account.label, account.type);
624
1264
  // Log full request for debugging (written to ~/.neurolink/logs/proxy-debug-*.jsonl)
625
1265
  const fetchStartMs = Date.now();
1266
+ // OTel: start upstream attempt span and inject trace headers
1267
+ if (tracer) {
1268
+ upstreamSpan = tracer.startUpstreamAttempt({
1269
+ attempt: attemptNumber,
1270
+ account: account.label,
1271
+ polyfillHeaders: isOAuth,
1272
+ polyfillBody: isOAuth,
1273
+ upstreamUrl: url,
1274
+ });
1275
+ tracer.logUpstreamRequestHeaders(headers);
1276
+ tracer.logUpstreamRequestBody(finalBodyStr);
1277
+ const traceHeaders = tracer.getTraceHeaders();
1278
+ Object.assign(headers, traceHeaders);
1279
+ }
1280
+ logProxyBody({
1281
+ phase: "upstream_request",
1282
+ headers,
1283
+ body: finalBodyStr,
1284
+ bodySize: Buffer.byteLength(finalBodyStr, "utf8"),
1285
+ contentType: headers["content-type"] ?? "application/json",
1286
+ account: account.label,
1287
+ accountType: account.type,
1288
+ attempt: attemptNumber,
1289
+ });
626
1290
  let response;
627
1291
  try {
628
1292
  response = await fetch(url, {
@@ -638,14 +1302,16 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
638
1302
  }
639
1303
  // Decision 8: Network errors — immediate rotation, no cooldown
640
1304
  sawNetworkError = true;
641
- recordError(account.label, account.type, 502);
1305
+ recordAttemptError(account.label, account.type, 502);
642
1306
  const errorCode = getErrorCode(fetchErr) ?? "unknown";
643
- const errorMessage = fetchErr instanceof Error
644
- ? fetchErr.message
645
- : String(fetchErr);
1307
+ const errorMessage = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
646
1308
  lastError = errorMessage;
647
1309
  logger.always(`[proxy] fetch error account=${account.label} code=${errorCode} (rotating): ${errorMessage}`);
648
1310
  logAttempt(502, "network_error", errorMessage);
1311
+ tracer?.setError("network_error", errorMessage);
1312
+ tracer?.recordRetry(account.label, "network_error");
1313
+ upstreamSpan?.end();
1314
+ upstreamSpan = undefined;
649
1315
  continue;
650
1316
  }
651
1317
  // Check 429 (with Retry-After + exponential backoff) → continue.
@@ -660,7 +1326,6 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
660
1326
  }
661
1327
  else {
662
1328
  const date = new Date(retryAfter);
663
- // eslint-disable-next-line max-depth
664
1329
  if (!Number.isNaN(date.getTime())) {
665
1330
  cooldownMs = Math.max(date.getTime() - Date.now(), 1000);
666
1331
  }
@@ -668,22 +1333,24 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
668
1333
  }
669
1334
  const level = accountState.backoffLevel;
670
1335
  const baseCooldown = cooldownMs > 0 ? cooldownMs : RATE_LIMIT_BACKOFF_BASE_MS;
671
- const backoffMs = Math.min(baseCooldown * Math.pow(2, level), RATE_LIMIT_BACKOFF_CAP_MS);
1336
+ const backoffMs = Math.min(baseCooldown * 2 ** level, RATE_LIMIT_BACKOFF_CAP_MS);
672
1337
  accountState.coolingUntil = Date.now() + backoffMs;
673
1338
  accountState.backoffLevel += 1;
674
1339
  advancePrimaryIfCurrent(account.key, enabledAccounts.length, orderedAccounts[0]?.key);
675
- recordError(account.label, account.type, 429);
1340
+ recordAttemptError(account.label, account.type, 429);
676
1341
  recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
677
1342
  lastError = await response.text();
678
1343
  logger.always(`[proxy] ← 429 account=${account.label} backoff-level=${accountState.backoffLevel} cooldown=${Math.round(backoffMs / 1000)}s`);
679
1344
  logAttempt(429, "rate_limit_error", String(lastError));
1345
+ tracer?.setError("rate_limit_error", String(lastError).slice(0, 500));
1346
+ tracer?.recordRetry(account.label, "rate_limit");
1347
+ upstreamSpan?.end();
1348
+ upstreamSpan = undefined;
680
1349
  continue;
681
1350
  }
682
1351
  // On 401 for refreshable OAuth: refresh token and retry before failing over.
683
- if (response.status === 401 &&
684
- account.type === "oauth" &&
685
- account.refreshToken) {
686
- recordError(account.label, account.type, 401);
1352
+ if (response.status === 401 && account.type === "oauth" && account.refreshToken) {
1353
+ recordAttemptError(account.label, account.type, 401);
687
1354
  let authRetrySucceeded = false;
688
1355
  let authRetryError = "received 401 from Anthropic";
689
1356
  for (let authRetry = 0; authRetry < MAX_AUTH_RETRIES; authRetry++) {
@@ -694,14 +1361,11 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
694
1361
  authRetryError = `refresh failed for account=${account.label} attempt ${authRetry + 1}/${MAX_AUTH_RETRIES}: ${refreshSucceeded.error?.slice(0, 200) ?? "unknown"}`;
695
1362
  lastError = authRetryError;
696
1363
  logger.always(`[proxy] ⚠ account=${account.label} refresh failed on attempt ${authRetry + 1}`);
697
- // eslint-disable-next-line max-depth
698
- if (accountState.consecutiveRefreshFailures >=
699
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
1364
+ if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
700
1365
  await disableAccountUntilReauth(account, accountState);
701
1366
  authFailureMessage = formatReauthMessage(account.label);
702
1367
  break;
703
1368
  }
704
- // eslint-disable-next-line max-depth
705
1369
  if (authRetry < MAX_AUTH_RETRIES - 1) {
706
1370
  await sleep(2000);
707
1371
  }
@@ -715,27 +1379,24 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
715
1379
  const retryResp = await fetch(url, {
716
1380
  method: "POST",
717
1381
  headers,
718
- body: buildUpstreamBody(),
1382
+ body: buildUpstreamBody(account.token).bodyStr,
719
1383
  signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
720
1384
  });
721
- // eslint-disable-next-line max-depth
722
1385
  if (retryResp.ok) {
723
1386
  authRetrySucceeded = true;
724
1387
  accountState.consecutiveRefreshFailures = 0;
725
1388
  accountState.backoffLevel = 0;
726
1389
  accountState.coolingUntil = undefined;
727
1390
  logger.always(`[proxy] ← 200 account=${account.label} (after ${authRetry + 1} refresh(es))`);
728
- recordSuccess(account.label, account.type);
729
- logAttempt(retryResp.status);
1391
+ // Final success is recorded only once the response path
1392
+ // that reaches the client is fully determined.
730
1393
  // Capture quota headers after successful auth-retry
731
1394
  {
732
1395
  const retryQuota = parseQuotaHeaders(retryResp.headers);
733
- // eslint-disable-next-line max-depth
734
1396
  if (retryQuota) {
735
1397
  saveAccountQuota(account.label, retryQuota).catch(() => { });
736
1398
  }
737
1399
  }
738
- // eslint-disable-next-line max-depth
739
1400
  if (body.stream && retryResp.body) {
740
1401
  const retryReader = retryResp.body.getReader();
741
1402
  let retryStreamClosed = false;
@@ -757,9 +1418,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
757
1418
  controller.enqueue(value);
758
1419
  }
759
1420
  catch (streamErr) {
760
- const errMsg = streamErr instanceof Error
761
- ? streamErr.message
762
- : String(streamErr);
1421
+ const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
763
1422
  logger.always(`[proxy] mid-stream error (auth-retry) account=${account.label}: ${errMsg}`);
764
1423
  logStreamError({
765
1424
  timestamp: new Date().toISOString(),
@@ -782,12 +1441,57 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
782
1441
  retryReader.cancel();
783
1442
  },
784
1443
  });
1444
+ // OTel: pipe auth-retry stream through SSE interceptor
1445
+ let retryClientStream = retryStream;
1446
+ if (tracer) {
1447
+ try {
1448
+ const { stream: retryInterceptor, telemetry: retryTelemetry } = createSSEInterceptor();
1449
+ retryClientStream = retryStream.pipeThrough(retryInterceptor);
1450
+ const capturedTracer2 = tracer;
1451
+ const capturedUpstreamSpan2 = upstreamSpan;
1452
+ const capturedRetryResp = retryResp;
1453
+ const capturedRetryRequestBytes = finalBodyStr.length;
1454
+ const capturedAccountLabel2 = account.label;
1455
+ retryTelemetry
1456
+ .then((data) => {
1457
+ capturedTracer2.setUsage({
1458
+ inputTokens: data.usage.inputTokens,
1459
+ outputTokens: data.usage.outputTokens,
1460
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1461
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1462
+ });
1463
+ capturedTracer2.logStreamEvents(data.events);
1464
+ capturedTracer2.logUpstreamResponseHeaders(Object.fromEntries([...capturedRetryResp.headers.entries()]));
1465
+ capturedTracer2.recordMetrics();
1466
+ capturedTracer2.recordBodySizes(capturedRetryRequestBytes, data.totalBytesReceived);
1467
+ capturedUpstreamSpan2?.end();
1468
+ capturedTracer2.end(200, Date.now() - requestStartTime);
1469
+ recordFinalSuccess(capturedAccountLabel2, account.type);
1470
+ // Deferred JSONL log with token usage (auth-retry streaming)
1471
+ logFinalRequest(200, capturedAccountLabel2, account.type, undefined, undefined, {
1472
+ inputTokens: data.usage.inputTokens,
1473
+ outputTokens: data.usage.outputTokens,
1474
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1475
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1476
+ });
1477
+ })
1478
+ .catch((err) => {
1479
+ capturedTracer2.setError("stream_error", err instanceof Error ? err.message : String(err));
1480
+ capturedUpstreamSpan2?.end();
1481
+ capturedTracer2.end(500, Date.now() - requestStartTime);
1482
+ recordFinalError(500, capturedAccountLabel2, account.type);
1483
+ logFinalRequest(500, capturedAccountLabel2, account.type, "stream_error", err instanceof Error ? err.message : String(err));
1484
+ });
1485
+ }
1486
+ catch {
1487
+ retryClientStream = retryStream;
1488
+ }
1489
+ }
785
1490
  const responseHeaders = {
786
1491
  "content-type": "text/event-stream",
787
1492
  "cache-control": "no-cache",
788
1493
  connection: "keep-alive",
789
1494
  };
790
- // eslint-disable-next-line max-depth
791
1495
  for (const h of [
792
1496
  "retry-after",
793
1497
  "anthropic-ratelimit-requests-remaining",
@@ -796,25 +1500,81 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
796
1500
  "anthropic-ratelimit-tokens-limit",
797
1501
  ]) {
798
1502
  const val = retryResp.headers.get(h);
799
- // eslint-disable-next-line max-depth
800
1503
  if (val) {
801
1504
  responseHeaders[h] = val;
802
1505
  }
803
1506
  }
804
- return new Response(retryStream, {
1507
+ return new Response(retryClientStream, {
805
1508
  status: retryResp.status,
806
1509
  headers: responseHeaders,
807
1510
  });
808
1511
  }
809
- return retryResp.json();
1512
+ // OTel: non-streaming auth-retry success
1513
+ const retryRespHeaders = Object.fromEntries([...retryResp.headers.entries()]);
1514
+ const retryText = await retryResp.text();
1515
+ tracer?.logUpstreamResponseHeaders(retryRespHeaders);
1516
+ tracer?.logUpstreamResponseBody(retryText);
1517
+ logProxyBody({
1518
+ phase: "upstream_response",
1519
+ headers: retryRespHeaders,
1520
+ body: retryText,
1521
+ bodySize: Buffer.byteLength(retryText, "utf8"),
1522
+ contentType: retryRespHeaders["content-type"] ?? "application/json",
1523
+ account: account.label,
1524
+ accountType: account.type,
1525
+ attempt: attemptNumber,
1526
+ responseStatus: retryResp.status,
1527
+ durationMs: Date.now() - fetchStartMs,
1528
+ });
1529
+ logProxyBody({
1530
+ phase: "client_response",
1531
+ headers: retryRespHeaders,
1532
+ body: retryText,
1533
+ bodySize: Buffer.byteLength(retryText, "utf8"),
1534
+ contentType: retryRespHeaders["content-type"] ?? "application/json",
1535
+ account: account.label,
1536
+ accountType: account.type,
1537
+ attempt: attemptNumber,
1538
+ responseStatus: retryResp.status,
1539
+ durationMs: Date.now() - requestStartTime,
1540
+ });
1541
+ const retryJson = JSON.parse(retryText);
1542
+ if (tracer && retryJson && typeof retryJson === "object") {
1543
+ const retryUsage = retryJson.usage;
1544
+ if (retryUsage) {
1545
+ tracer.setUsage({
1546
+ inputTokens: retryUsage.input_tokens ?? 0,
1547
+ outputTokens: retryUsage.output_tokens ?? 0,
1548
+ cacheCreationTokens: retryUsage.cache_creation_input_tokens ?? 0,
1549
+ cacheReadTokens: retryUsage.cache_read_input_tokens ?? 0,
1550
+ });
1551
+ }
1552
+ tracer.recordMetrics();
1553
+ const retryJsonStr = JSON.stringify(retryJson);
1554
+ tracer.recordBodySizes(finalBodyStr.length, retryJsonStr.length);
1555
+ upstreamSpan?.end();
1556
+ tracer.end(retryResp.status, Date.now() - requestStartTime);
1557
+ recordFinalSuccess(account.label, account.type);
1558
+ logFinalRequest(retryResp.status, account.label, account.type, undefined, undefined, {
1559
+ inputTokens: retryUsage?.input_tokens,
1560
+ outputTokens: retryUsage?.output_tokens,
1561
+ cacheCreationTokens: retryUsage?.cache_creation_input_tokens,
1562
+ cacheReadTokens: retryUsage?.cache_read_input_tokens,
1563
+ });
1564
+ }
1565
+ else {
1566
+ upstreamSpan?.end();
1567
+ recordFinalSuccess(account.label, account.type);
1568
+ logFinalRequest(retryResp.status, account.label, account.type);
1569
+ }
1570
+ return retryJson;
810
1571
  }
811
1572
  const retryStatus = retryResp.status;
812
1573
  const retryBody = await retryResp.text();
813
1574
  authRetryError = `retry ${authRetry + 1}/${MAX_AUTH_RETRIES} failed with status ${retryStatus}`;
814
1575
  lastError = retryBody;
815
1576
  logger.debug(`[proxy] retry ${authRetry + 1} failed: ${retryStatus} ${retryBody.substring(0, 120)}`);
816
- recordError(account.label, account.type, retryStatus);
817
- // eslint-disable-next-line max-depth
1577
+ recordAttemptError(account.label, account.type, retryStatus);
818
1578
  if (retryStatus === 429) {
819
1579
  sawRateLimit = true;
820
1580
  const retryAfter = retryResp.headers.get("retry-after");
@@ -827,38 +1587,33 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
827
1587
  recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
828
1588
  break;
829
1589
  }
830
- // eslint-disable-next-line max-depth
831
- if (retryStatus === 401 ||
832
- retryStatus === 402 ||
833
- retryStatus === 403) {
834
- // eslint-disable-next-line max-depth
1590
+ if (retryStatus === 401 || retryStatus === 402 || retryStatus === 403) {
835
1591
  if (authRetry < MAX_AUTH_RETRIES - 1) {
836
1592
  await sleep(1000);
837
1593
  }
838
1594
  continue;
839
1595
  }
840
- // eslint-disable-next-line max-depth
841
1596
  if (isTransientHttpFailure(retryStatus, retryBody)) {
842
1597
  // Decision 8: No cooldown for transient errors — rotate immediately
843
1598
  sawTransientFailure = true;
844
1599
  break;
845
1600
  }
846
1601
  logAttempt(retryStatus, "api_error", summarizeErrorMessage(retryBody));
847
- // eslint-disable-next-line max-depth
1602
+ recordFinalError(retryStatus, account.label, account.type);
848
1603
  try {
1604
+ logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
849
1605
  return JSON.parse(retryBody);
850
1606
  }
851
1607
  catch {
1608
+ logFinalRequest(retryStatus, account.label, account.type, "api_error", summarizeErrorMessage(retryBody));
852
1609
  return buildClaudeError(retryStatus, retryBody);
853
1610
  }
854
1611
  }
855
1612
  catch (retryFetchErr) {
856
1613
  // Decision 8: No cooldown for network errors — rotate immediately
857
1614
  sawNetworkError = true;
858
- recordError(account.label, account.type, 502);
859
- const message = retryFetchErr instanceof Error
860
- ? retryFetchErr.message
861
- : String(retryFetchErr);
1615
+ recordAttemptError(account.label, account.type, 502);
1616
+ const message = retryFetchErr instanceof Error ? retryFetchErr.message : String(retryFetchErr);
862
1617
  authRetryError = `network error on retry ${authRetry + 1}: ${message}`;
863
1618
  lastError = authRetryError;
864
1619
  logger.debug(`[proxy] ${authRetryError}`);
@@ -866,96 +1621,83 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
866
1621
  }
867
1622
  }
868
1623
  if (!authRetrySucceeded) {
869
- // eslint-disable-next-line max-depth
870
1624
  if (!accountState.permanentlyDisabled) {
871
- // eslint-disable-next-line max-depth
872
- if (!accountState.coolingUntil ||
873
- accountState.coolingUntil <= Date.now()) {
874
- accountState.coolingUntil =
875
- Date.now() + AUTH_COOLDOWN_MS;
1625
+ if (!accountState.coolingUntil || accountState.coolingUntil <= Date.now()) {
1626
+ accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
876
1627
  }
877
1628
  recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
878
1629
  }
879
1630
  lastError = authRetryError;
880
1631
  logger.always(`[proxy] ⚠ account=${account.label} auth retries exhausted, cooldown=5min`);
881
1632
  logAttempt(401, "authentication_error", authRetryError);
1633
+ tracer?.setError("authentication_error", authRetryError);
1634
+ tracer?.recordRetry(account.label, "auth_exhausted");
1635
+ upstreamSpan?.end();
1636
+ upstreamSpan = undefined;
882
1637
  continue;
883
1638
  }
884
1639
  }
885
1640
  if (!response.ok) {
886
1641
  const errBody = await response.text();
887
- // Log full error for debugging
888
1642
  const errRespHeaders = {};
889
1643
  response.headers.forEach((v, k) => {
890
1644
  errRespHeaders[k] = v;
891
1645
  });
892
- logFullRequestResponse({
893
- timestamp: new Date().toISOString(),
894
- requestId: ctx.requestId,
1646
+ tracer?.logUpstreamResponseHeaders(errRespHeaders);
1647
+ tracer?.logUpstreamResponseBody(errBody);
1648
+ logProxyBody({
1649
+ phase: "upstream_response",
1650
+ headers: errRespHeaders,
1651
+ body: errBody,
1652
+ bodySize: Buffer.byteLength(errBody, "utf8"),
1653
+ contentType: errRespHeaders["content-type"] ?? "application/json",
895
1654
  account: account.label,
896
- model: body.model,
897
- stream: !!body.stream,
898
- requestHeaders: redactSensitiveHeaders(headers),
899
- requestBody: {
900
- model: body.model,
901
- max_tokens: body.max_tokens,
902
- stream: body.stream,
903
- system: Array.isArray(body.system)
904
- ? `[${body.system.length} blocks]`
905
- : typeof body.system,
906
- messages: Array.isArray(body.messages)
907
- ? `[${body.messages.length} messages]`
908
- : "?",
909
- tools: Array.isArray(body.tools)
910
- ? `[${body.tools.length} tools]`
911
- : "none",
912
- tool_choice: body.tool_choice,
913
- thinking: body.thinking,
914
- },
915
- requestBodySize: bodyStr.length,
1655
+ accountType: account.type,
1656
+ attempt: attemptNumber,
916
1657
  responseStatus: response.status,
917
- responseHeaders: errRespHeaders,
918
- responseBody: errBody.substring(0, 2000),
919
- responseBodySize: errBody.length,
920
1658
  durationMs: Date.now() - fetchStartMs,
921
1659
  });
922
- // Request-shape errors (do not retry).
1660
+ // Upstream invalid_request_error responses are not retried on the
1661
+ // same Anthropic account, but may still be handed to fallback providers.
923
1662
  if (isInvalidRequestError(response.status, errBody)) {
924
- logger.always(`[proxy] ← ${response.status} request-shape error (no retry)`);
1663
+ logger.always(`[proxy] ← ${response.status} upstream invalid_request_error`);
925
1664
  logAttempt(response.status, "invalid_request_error", summarizeErrorMessage(errBody));
926
- try {
927
- return JSON.parse(errBody);
928
- }
929
- catch {
930
- return buildClaudeError(response.status, errBody);
931
- }
1665
+ tracer?.setError("invalid_request_error", summarizeErrorMessage(errBody));
1666
+ invalidRequestFailure = {
1667
+ status: response.status,
1668
+ body: errBody,
1669
+ contentType: errRespHeaders["content-type"],
1670
+ };
1671
+ lastError = summarizeErrorMessage(errBody);
1672
+ upstreamSpan?.end();
1673
+ upstreamSpan = undefined;
1674
+ break;
932
1675
  }
933
1676
  // Auth failures for OAuth accounts without refresh token.
934
- if ((response.status === 401 ||
935
- response.status === 402 ||
936
- response.status === 403) &&
1677
+ if ((response.status === 401 || response.status === 402 || response.status === 403) &&
937
1678
  account.type === "oauth" &&
938
1679
  !account.refreshToken) {
939
- recordError(account.label, account.type, response.status);
1680
+ recordAttemptError(account.label, account.type, response.status);
940
1681
  accountState.consecutiveRefreshFailures += 1;
941
1682
  accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
942
1683
  recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
943
- if (accountState.consecutiveRefreshFailures >=
944
- MAX_CONSECUTIVE_REFRESH_FAILURES) {
1684
+ if (accountState.consecutiveRefreshFailures >= MAX_CONSECUTIVE_REFRESH_FAILURES) {
945
1685
  await disableAccountUntilReauth(account, accountState);
946
1686
  }
947
1687
  authFailureMessage = formatReauthMessage(account.label);
948
1688
  logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
949
1689
  lastError = errBody;
950
1690
  logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
1691
+ tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
1692
+ tracer?.recordRetry(account.label, "auth_no_refresh");
1693
+ upstreamSpan?.end();
1694
+ upstreamSpan = undefined;
951
1695
  continue;
952
1696
  }
953
1697
  // Auth failures for API-key accounts.
954
- if ((response.status === 401 ||
955
- response.status === 402 ||
956
- response.status === 403) &&
1698
+ if ((response.status === 401 || response.status === 402 || response.status === 403) &&
957
1699
  account.type === "api_key") {
958
- recordError(account.label, account.type, response.status);
1700
+ recordAttemptError(account.label, account.type, response.status);
959
1701
  authFailureMessage =
960
1702
  "Authentication failed for Anthropic API key credentials. Update ANTHROPIC_API_KEY or re-login with OAuth.";
961
1703
  accountState.coolingUntil = Date.now() + AUTH_COOLDOWN_MS;
@@ -963,49 +1705,126 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
963
1705
  logger.always(`[proxy] ← ${response.status} account=${account.label} cooldown=5min`);
964
1706
  lastError = errBody;
965
1707
  logAttempt(response.status, "authentication_error", summarizeErrorMessage(errBody));
1708
+ tracer?.setError("authentication_error", summarizeErrorMessage(errBody));
1709
+ tracer?.recordRetry(account.label, "auth_api_key");
1710
+ upstreamSpan?.end();
1711
+ upstreamSpan = undefined;
966
1712
  continue;
967
1713
  }
968
1714
  // 404 is generally model/account specific; return immediately (no cooldown per Decision 8).
969
1715
  if (response.status === 404) {
970
- recordError(account.label, account.type, response.status);
1716
+ recordFinalError(response.status, account.label, account.type);
971
1717
  logger.always(`[proxy] ← 404 account=${account.label}`);
972
1718
  logAttempt(404, "not_found_error", summarizeErrorMessage(errBody));
1719
+ tracer?.setError("not_found_error", summarizeErrorMessage(errBody));
1720
+ upstreamSpan?.end();
1721
+ tracer?.end(404, Date.now() - requestStartTime);
973
1722
  try {
974
- return JSON.parse(errBody);
1723
+ const parsedError = JSON.parse(errBody);
1724
+ logFinalRequest(404, account.label, account.type, "not_found_error", summarizeErrorMessage(errBody));
1725
+ logProxyBody({
1726
+ phase: "client_response",
1727
+ headers: {
1728
+ "content-type": errRespHeaders["content-type"] ?? "application/json",
1729
+ },
1730
+ body: errBody,
1731
+ bodySize: Buffer.byteLength(errBody, "utf8"),
1732
+ contentType: errRespHeaders["content-type"] ?? "application/json",
1733
+ account: account.label,
1734
+ accountType: account.type,
1735
+ attempt: attemptNumber,
1736
+ responseStatus: 404,
1737
+ durationMs: Date.now() - requestStartTime,
1738
+ });
1739
+ return parsedError;
975
1740
  }
976
1741
  catch {
977
- return buildClaudeError(404, errBody);
1742
+ logFinalRequest(404, account.label, account.type, "not_found_error", summarizeErrorMessage(errBody));
1743
+ const clientError = buildClaudeError(404, errBody);
1744
+ const clientErrorBody = JSON.stringify(clientError);
1745
+ logProxyBody({
1746
+ phase: "client_response",
1747
+ headers: { "content-type": "application/json" },
1748
+ body: clientErrorBody,
1749
+ bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
1750
+ contentType: "application/json",
1751
+ account: account.label,
1752
+ accountType: account.type,
1753
+ attempt: attemptNumber,
1754
+ responseStatus: 404,
1755
+ durationMs: Date.now() - requestStartTime,
1756
+ });
1757
+ return clientError;
978
1758
  }
979
1759
  }
980
1760
  // Decision 8: Transient upstream failures — immediate rotation, NO cooldown.
981
1761
  if (isTransientHttpFailure(response.status, errBody)) {
982
- recordError(account.label, account.type, response.status);
1762
+ recordAttemptError(account.label, account.type, response.status);
983
1763
  sawTransientFailure = true;
984
1764
  // No cooldown for transient errors (502, 503, etc.) — rotate immediately
985
1765
  logger.always(`[proxy] ← ${response.status} account=${account.label} (transient, rotating)`);
986
1766
  lastError = errBody;
987
1767
  logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
1768
+ tracer?.setError("transient_error", summarizeErrorMessage(errBody));
1769
+ tracer?.recordRetry(account.label, "transient");
1770
+ upstreamSpan?.end();
1771
+ upstreamSpan = undefined;
988
1772
  continue;
989
1773
  }
990
1774
  // Other non-ok errors → return as-is.
991
- recordError(account.label, account.type, response.status);
1775
+ recordFinalError(response.status, account.label, account.type);
992
1776
  logger.always(`[proxy] ← ${response.status} account=${account.label}`);
993
1777
  logger.debug(`[claude-proxy] error body: ${errBody.substring(0, 200)}`);
994
1778
  logAttempt(response.status, "api_error", summarizeErrorMessage(errBody));
1779
+ tracer?.setError("api_error", summarizeErrorMessage(errBody));
1780
+ upstreamSpan?.end();
1781
+ tracer?.end(response.status, Date.now() - requestStartTime);
995
1782
  try {
996
- return JSON.parse(errBody);
1783
+ const parsedError = JSON.parse(errBody);
1784
+ logFinalRequest(response.status, account.label, account.type, "api_error", summarizeErrorMessage(errBody));
1785
+ logProxyBody({
1786
+ phase: "client_response",
1787
+ headers: {
1788
+ "content-type": errRespHeaders["content-type"] ?? "application/json",
1789
+ },
1790
+ body: errBody,
1791
+ bodySize: Buffer.byteLength(errBody, "utf8"),
1792
+ contentType: errRespHeaders["content-type"] ?? "application/json",
1793
+ account: account.label,
1794
+ accountType: account.type,
1795
+ attempt: attemptNumber,
1796
+ responseStatus: response.status,
1797
+ durationMs: Date.now() - requestStartTime,
1798
+ });
1799
+ return parsedError;
997
1800
  }
998
1801
  catch {
999
- return buildClaudeError(response.status, errBody);
1802
+ logFinalRequest(response.status, account.label, account.type, "api_error", summarizeErrorMessage(errBody));
1803
+ const clientError = buildClaudeError(response.status, errBody);
1804
+ const clientErrorBody = JSON.stringify(clientError);
1805
+ logProxyBody({
1806
+ phase: "client_response",
1807
+ headers: { "content-type": "application/json" },
1808
+ body: clientErrorBody,
1809
+ bodySize: Buffer.byteLength(clientErrorBody, "utf8"),
1810
+ contentType: "application/json",
1811
+ account: account.label,
1812
+ accountType: account.type,
1813
+ attempt: attemptNumber,
1814
+ responseStatus: response.status,
1815
+ durationMs: Date.now() - requestStartTime,
1816
+ });
1817
+ return clientError;
1000
1818
  }
1001
1819
  }
1002
1820
  // Success path.
1003
1821
  accountState.backoffLevel = 0;
1004
1822
  accountState.coolingUntil = undefined;
1005
1823
  accountState.consecutiveRefreshFailures = 0;
1006
- recordSuccess(account.label, account.type);
1007
1824
  logger.always(`[proxy] ← ${response.status} account=${account.label}`);
1008
- logAttempt(response.status);
1825
+ // NOTE: logAttempt is deferred below so we can include token
1826
+ // usage. For streaming, the SSE interceptor callback logs it;
1827
+ // for non-streaming, we log after JSON parsing.
1009
1828
  // Capture quota/utilisation headers (fire-and-forget).
1010
1829
  const quota = parseQuotaHeaders(response.headers);
1011
1830
  if (quota) {
@@ -1013,53 +1832,25 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1013
1832
  // Non-fatal: quota persistence is best-effort
1014
1833
  });
1015
1834
  }
1016
- // Log full request + response headers for debugging
1017
1835
  const respHeaders = {};
1018
1836
  response.headers.forEach((v, k) => {
1019
1837
  respHeaders[k] = v;
1020
1838
  });
1021
- logFullRequestResponse({
1022
- timestamp: new Date().toISOString(),
1023
- requestId: ctx.requestId,
1024
- account: account.label,
1025
- model: body.model,
1026
- stream: !!body.stream,
1027
- requestHeaders: redactSensitiveHeaders(headers),
1028
- requestBody: {
1029
- model: body.model,
1030
- max_tokens: body.max_tokens,
1031
- stream: body.stream,
1032
- system: Array.isArray(body.system)
1033
- ? `[${body.system.length} blocks]`
1034
- : typeof body.system,
1035
- messages: Array.isArray(body.messages)
1036
- ? `[${body.messages.length} messages]`
1037
- : "?",
1038
- tools: Array.isArray(body.tools)
1039
- ? `[${body.tools.length} tools]`
1040
- : "none",
1041
- tool_choice: body.tool_choice,
1042
- thinking: body.thinking,
1043
- metadata: body.metadata ? "present" : "absent",
1044
- },
1045
- requestBodySize: bodyStr.length,
1046
- responseStatus: response.status,
1047
- responseHeaders: respHeaders,
1048
- durationMs: Date.now() - fetchStartMs,
1049
- });
1839
+ tracer?.logUpstreamResponseHeaders(respHeaders);
1050
1840
  if (body.stream) {
1051
1841
  // Bootstrap retry: read first chunk to verify stream is valid.
1052
1842
  if (response.body) {
1053
1843
  const reader = response.body.getReader();
1054
1844
  const firstChunk = await reader.read();
1055
- if (firstChunk.done ||
1056
- !firstChunk.value ||
1057
- firstChunk.value.length === 0) {
1845
+ if (firstChunk.done || !firstChunk.value || firstChunk.value.length === 0) {
1058
1846
  // Empty stream — retry with next account.
1059
1847
  reader.cancel();
1060
1848
  accountState.coolingUntil = Date.now() + 10_000;
1061
1849
  recordCooldown(account.label, account.type, accountState.coolingUntil, accountState.backoffLevel);
1062
1850
  logger.always(`[proxy] ← empty stream from account=${account.label}, trying next`);
1851
+ tracer?.recordRetry(account.label, "empty_stream");
1852
+ upstreamSpan?.end();
1853
+ upstreamSpan = undefined;
1063
1854
  continue;
1064
1855
  }
1065
1856
  // Stream is valid — create a new ReadableStream with first chunk prepended.
@@ -1085,9 +1876,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1085
1876
  controller.enqueue(value);
1086
1877
  }
1087
1878
  catch (streamErr) {
1088
- const errMsg = streamErr instanceof Error
1089
- ? streamErr.message
1090
- : String(streamErr);
1879
+ const errMsg = streamErr instanceof Error ? streamErr.message : String(streamErr);
1091
1880
  logger.always(`[proxy] mid-stream error account=${account.label}: ${errMsg}`);
1092
1881
  logStreamError({
1093
1882
  timestamp: new Date().toISOString(),
@@ -1112,6 +1901,175 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1112
1901
  reader.cancel();
1113
1902
  },
1114
1903
  });
1904
+ // OTel: pipe stream through SSE interceptor for telemetry extraction.
1905
+ // The interceptor passes all bytes through unmodified and resolves
1906
+ // its telemetry promise when the stream finishes.
1907
+ const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
1908
+ let streamSource = remainingStream;
1909
+ if (tracer) {
1910
+ try {
1911
+ const { stream: interceptor, telemetry } = createSSEInterceptor({ captureRawText: true });
1912
+ streamSource = streamSource.pipeThrough(interceptor);
1913
+ // Capture refs in const variables for the async closure —
1914
+ // loop variables (upstreamSpan, response) will change on next iteration,
1915
+ // and TypeScript needs the narrowed type for tracer.
1916
+ const capturedTracer = tracer;
1917
+ const capturedUpstreamSpan = upstreamSpan;
1918
+ const capturedResponse = response;
1919
+ const capturedRequestBytes = finalBodyStr.length;
1920
+ const capturedAccountLabel = account.label;
1921
+ Promise.all([telemetry, clientCapture])
1922
+ .then(([data, clientBody]) => {
1923
+ capturedTracer.setUsage({
1924
+ inputTokens: data.usage.inputTokens,
1925
+ outputTokens: data.usage.outputTokens,
1926
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1927
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1928
+ });
1929
+ capturedTracer.logStreamEvents(data.events);
1930
+ // Extract rate limits from response headers
1931
+ const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
1932
+ const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
1933
+ const usageUpdate = {
1934
+ inputTokens: data.usage.inputTokens,
1935
+ outputTokens: data.usage.outputTokens,
1936
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1937
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1938
+ };
1939
+ if (!isNaN(rateLimit5h)) {
1940
+ usageUpdate.rateLimitAfter5h = rateLimit5h;
1941
+ }
1942
+ if (!isNaN(rateLimit7d)) {
1943
+ usageUpdate.rateLimitAfter7d = rateLimit7d;
1944
+ }
1945
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
1946
+ capturedTracer.setUsage(usageUpdate);
1947
+ }
1948
+ capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
1949
+ capturedTracer.recordMetrics();
1950
+ capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
1951
+ capturedUpstreamSpan?.end();
1952
+ capturedTracer.end(200, Date.now() - requestStartTime);
1953
+ recordFinalSuccess(capturedAccountLabel, account.type);
1954
+ // Deferred JSONL log with token usage + traceId
1955
+ // (streaming: tokens only available after SSE stream finishes)
1956
+ logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
1957
+ inputTokens: data.usage.inputTokens,
1958
+ outputTokens: data.usage.outputTokens,
1959
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
1960
+ cacheReadTokens: data.usage.cacheReadInputTokens,
1961
+ });
1962
+ logProxyBody({
1963
+ phase: "upstream_response",
1964
+ headers: respHeaders,
1965
+ body: data.rawText ?? "",
1966
+ bodySize: data.totalBytesReceived,
1967
+ contentType: respHeaders["content-type"] ?? "text/event-stream",
1968
+ account: capturedAccountLabel,
1969
+ accountType: account.type,
1970
+ attempt: attemptNumber,
1971
+ responseStatus: 200,
1972
+ durationMs: Date.now() - requestStartTime,
1973
+ });
1974
+ logProxyBody({
1975
+ phase: "client_response",
1976
+ headers: responseHeaders,
1977
+ body: clientBody.text,
1978
+ bodySize: clientBody.totalBytes,
1979
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
1980
+ account: capturedAccountLabel,
1981
+ accountType: account.type,
1982
+ attempt: attemptNumber,
1983
+ responseStatus: 200,
1984
+ durationMs: Date.now() - requestStartTime,
1985
+ });
1986
+ })
1987
+ .catch((err) => {
1988
+ capturedTracer.setError("stream_error", err instanceof Error ? err.message : String(err));
1989
+ capturedUpstreamSpan?.end();
1990
+ capturedTracer.end(500, Date.now() - requestStartTime);
1991
+ recordFinalError(500, capturedAccountLabel, account.type);
1992
+ // Log the streaming error in JSONL
1993
+ logFinalRequest(500, capturedAccountLabel, account.type, "stream_error", err instanceof Error ? err.message : String(err));
1994
+ });
1995
+ }
1996
+ catch {
1997
+ // Interceptor attachment failed after stream setup; response handling continues.
1998
+ }
1999
+ }
2000
+ else {
2001
+ // No tracer — still intercept stream for JSONL token logging
2002
+ upstreamSpan?.end();
2003
+ try {
2004
+ const { stream: noTracerInterceptor, telemetry: noTracerTelemetry } = createSSEInterceptor({
2005
+ captureRawText: true,
2006
+ });
2007
+ streamSource = streamSource.pipeThrough(noTracerInterceptor);
2008
+ const capturedAccountLabel = account.label;
2009
+ Promise.all([noTracerTelemetry, clientCapture])
2010
+ .then(([data, clientBody]) => {
2011
+ recordFinalSuccess(capturedAccountLabel, account.type);
2012
+ logFinalRequest(200, capturedAccountLabel, account.type, undefined, undefined, {
2013
+ inputTokens: data.usage.inputTokens,
2014
+ outputTokens: data.usage.outputTokens,
2015
+ cacheCreationTokens: data.usage.cacheCreationInputTokens,
2016
+ cacheReadTokens: data.usage.cacheReadInputTokens,
2017
+ });
2018
+ logProxyBody({
2019
+ phase: "upstream_response",
2020
+ headers: respHeaders,
2021
+ body: data.rawText ?? "",
2022
+ bodySize: data.totalBytesReceived,
2023
+ contentType: respHeaders["content-type"] ?? "text/event-stream",
2024
+ account: capturedAccountLabel,
2025
+ accountType: account.type,
2026
+ attempt: attemptNumber,
2027
+ responseStatus: 200,
2028
+ durationMs: Date.now() - requestStartTime,
2029
+ });
2030
+ logProxyBody({
2031
+ phase: "client_response",
2032
+ headers: responseHeaders,
2033
+ body: clientBody.text,
2034
+ bodySize: clientBody.totalBytes,
2035
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
2036
+ account: capturedAccountLabel,
2037
+ accountType: account.type,
2038
+ attempt: attemptNumber,
2039
+ responseStatus: 200,
2040
+ durationMs: Date.now() - requestStartTime,
2041
+ });
2042
+ })
2043
+ .catch(() => {
2044
+ recordFinalSuccess(account.label, account.type);
2045
+ logFinalRequest(response.status, account.label, account.type);
2046
+ });
2047
+ }
2048
+ catch {
2049
+ // SSE interceptor creation failed — log without tokens
2050
+ clientCapture
2051
+ .then((clientBody) => {
2052
+ logProxyBody({
2053
+ phase: "client_response",
2054
+ headers: responseHeaders,
2055
+ body: clientBody.text,
2056
+ bodySize: clientBody.totalBytes,
2057
+ contentType: responseHeaders["content-type"] ?? "text/event-stream",
2058
+ account: account.label,
2059
+ accountType: account.type,
2060
+ attempt: attemptNumber,
2061
+ responseStatus: 200,
2062
+ durationMs: Date.now() - requestStartTime,
2063
+ });
2064
+ })
2065
+ .catch(() => {
2066
+ // Non-fatal
2067
+ });
2068
+ recordFinalSuccess(account.label, account.type);
2069
+ logFinalRequest(response.status, account.label, account.type);
2070
+ }
2071
+ }
2072
+ const clientStream = streamSource.pipeThrough(clientCaptureStream);
1115
2073
  // Forward rate limit headers from Anthropic.
1116
2074
  const responseHeaders = {
1117
2075
  "content-type": "text/event-stream",
@@ -1126,20 +2084,124 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1126
2084
  "anthropic-ratelimit-tokens-limit",
1127
2085
  ]) {
1128
2086
  const val = response.headers.get(h);
1129
- // eslint-disable-next-line max-depth
1130
2087
  if (val) {
1131
2088
  responseHeaders[h] = val;
1132
2089
  }
1133
2090
  }
1134
- return new Response(remainingStream, {
2091
+ return new Response(clientStream, {
1135
2092
  status: response.status,
1136
2093
  headers: responseHeaders,
1137
2094
  });
1138
2095
  }
1139
- return buildClaudeError(502, "No response body from upstream");
2096
+ upstreamSpan?.end();
2097
+ tracer?.setError("stream_error", "No response body from upstream");
2098
+ tracer?.end(502, Date.now() - requestStartTime);
2099
+ recordFinalError(502, account.label, account.type);
2100
+ logFinalRequest(502, account.label, account.type, "stream_error", "No response body from upstream");
2101
+ const clientError = buildClaudeError(502, "No response body from upstream");
2102
+ logProxyBody({
2103
+ phase: "client_response",
2104
+ headers: { "content-type": "application/json" },
2105
+ body: JSON.stringify(clientError),
2106
+ bodySize: Buffer.byteLength(JSON.stringify(clientError), "utf8"),
2107
+ contentType: "application/json",
2108
+ account: account.label,
2109
+ accountType: account.type,
2110
+ attempt: attemptNumber,
2111
+ responseStatus: 502,
2112
+ durationMs: Date.now() - requestStartTime,
2113
+ });
2114
+ return clientError;
1140
2115
  }
1141
2116
  // Non-streaming: return JSON directly.
1142
- return response.json();
2117
+ // OTel: extract usage from response JSON before returning.
2118
+ const responseText = await response.text();
2119
+ tracer?.logUpstreamResponseBody(responseText);
2120
+ logProxyBody({
2121
+ phase: "upstream_response",
2122
+ headers: respHeaders,
2123
+ body: responseText,
2124
+ bodySize: Buffer.byteLength(responseText, "utf8"),
2125
+ contentType: respHeaders["content-type"] ?? "application/json",
2126
+ account: account.label,
2127
+ accountType: account.type,
2128
+ attempt: attemptNumber,
2129
+ responseStatus: response.status,
2130
+ durationMs: Date.now() - fetchStartMs,
2131
+ });
2132
+ logProxyBody({
2133
+ phase: "client_response",
2134
+ headers: respHeaders,
2135
+ body: responseText,
2136
+ bodySize: Buffer.byteLength(responseText, "utf8"),
2137
+ contentType: respHeaders["content-type"] ?? "application/json",
2138
+ account: account.label,
2139
+ accountType: account.type,
2140
+ attempt: attemptNumber,
2141
+ responseStatus: response.status,
2142
+ durationMs: Date.now() - requestStartTime,
2143
+ });
2144
+ const responseJson = JSON.parse(responseText);
2145
+ if (tracer && responseJson && typeof responseJson === "object") {
2146
+ const usage = responseJson.usage;
2147
+ if (usage) {
2148
+ tracer.setUsage({
2149
+ inputTokens: usage.input_tokens ?? 0,
2150
+ outputTokens: usage.output_tokens ?? 0,
2151
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
2152
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
2153
+ });
2154
+ // Extract rate limits from response headers
2155
+ const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
2156
+ const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
2157
+ if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
2158
+ const usageWithRates = {
2159
+ inputTokens: usage.input_tokens ?? 0,
2160
+ outputTokens: usage.output_tokens ?? 0,
2161
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
2162
+ cacheReadTokens: usage.cache_read_input_tokens ?? 0,
2163
+ };
2164
+ if (!isNaN(rateLimit5h)) {
2165
+ usageWithRates.rateLimitAfter5h = rateLimit5h;
2166
+ }
2167
+ if (!isNaN(rateLimit7d)) {
2168
+ usageWithRates.rateLimitAfter7d = rateLimit7d;
2169
+ }
2170
+ tracer.setUsage(usageWithRates);
2171
+ }
2172
+ }
2173
+ tracer.recordMetrics();
2174
+ const responseJsonStr = JSON.stringify(responseJson);
2175
+ tracer.recordBodySizes(finalBodyStr.length, responseJsonStr.length);
2176
+ upstreamSpan?.end();
2177
+ tracer.end(response.status, Date.now() - requestStartTime);
2178
+ recordFinalSuccess(account.label, account.type);
2179
+ logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
2180
+ inputTokens: usage?.input_tokens,
2181
+ outputTokens: usage?.output_tokens,
2182
+ cacheCreationTokens: usage?.cache_creation_input_tokens,
2183
+ cacheReadTokens: usage?.cache_read_input_tokens,
2184
+ });
2185
+ }
2186
+ else {
2187
+ upstreamSpan?.end();
2188
+ // No tracer — still extract usage from response JSON for JSONL logging
2189
+ const noTracerUsage = responseJson && typeof responseJson === "object"
2190
+ ? responseJson.usage
2191
+ : undefined;
2192
+ recordFinalSuccess(account.label, account.type);
2193
+ logFinalRequest(response.status, account.label, account.type, undefined, undefined, {
2194
+ inputTokens: noTracerUsage?.input_tokens,
2195
+ outputTokens: noTracerUsage?.output_tokens,
2196
+ cacheCreationTokens: noTracerUsage?.cache_creation_input_tokens,
2197
+ cacheReadTokens: noTracerUsage?.cache_read_input_tokens,
2198
+ });
2199
+ }
2200
+ return responseJson;
2201
+ }
2202
+ // OTel: end account selection span if all accounts were skipped
2203
+ if (attemptNumber === 0) {
2204
+ acctSelectionSpan?.end();
1143
2205
  }
1144
2206
  // All accounts exhausted — compute earliest recovery time.
1145
2207
  const earliestRecovery = orderedAccounts.reduce((min, account) => {
@@ -1152,42 +2214,18 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1152
2214
  // Try fallback chain (alternative providers)
1153
2215
  const chain = modelRouter?.getFallbackChain() ?? [];
1154
2216
  for (const fallback of chain) {
2217
+ const availability = await ProviderHealthChecker.checkFallbackProviderAvailability(fallback.provider, fallback.model);
2218
+ if (!availability.available) {
2219
+ logger.debug(`[proxy] skipping fallback ${fallback.provider}/${fallback.model}: ${availability.reason ?? "provider unavailable"}`);
2220
+ continue;
2221
+ }
1155
2222
  try {
1156
2223
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1157
2224
  const parsed = parseClaudeRequest(body);
1158
- const opts = {
1159
- input: {
1160
- text: parsed.prompt,
1161
- ...(parsed.images.length > 0
1162
- ? { images: parsed.images }
1163
- : {}),
1164
- },
2225
+ const opts = buildProxyFallbackOptions(parsed, {
1165
2226
  provider: fallback.provider,
1166
2227
  model: fallback.model,
1167
- systemPrompt: parsed.systemPrompt,
1168
- maxTokens: parsed.maxTokens,
1169
- ...(parsed.temperature !== undefined
1170
- ? { temperature: parsed.temperature }
1171
- : {}),
1172
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1173
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1174
- ...(parsed.stopSequences?.length
1175
- ? { stopSequences: parsed.stopSequences }
1176
- : {}),
1177
- tools: parsed.tools,
1178
- ...(parsed.toolChoice
1179
- ? { toolChoice: parsed.toolChoice }
1180
- : {}),
1181
- ...(parsed.thinkingConfig
1182
- ? { thinkingConfig: parsed.thinkingConfig }
1183
- : {}),
1184
- ...(parsed.conversationMessages?.length
1185
- ? {
1186
- conversationMessages: parsed.conversationMessages.slice(0, -1),
1187
- }
1188
- : {}),
1189
- maxSteps: 1,
1190
- };
2228
+ });
1191
2229
  if (body.stream) {
1192
2230
  const streamResult = await ctx.neurolink.stream(opts);
1193
2231
  const serializer = new ClaudeStreamSerializer(body.model, 0);
@@ -1195,33 +2233,38 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1195
2233
  for (const frame of serializer.start()) {
1196
2234
  yield frame;
1197
2235
  }
2236
+ let collectedText = "";
1198
2237
  for await (const chunk of streamResult.stream) {
1199
2238
  const text = extractText(chunk);
1200
2239
  if (text) {
2240
+ collectedText += text;
1201
2241
  for (const frame of serializer.pushDelta(text)) {
1202
2242
  yield frame;
1203
2243
  }
1204
2244
  }
1205
2245
  }
1206
2246
  // Emit tool_use blocks if model wants to call tools
1207
- if (streamResult.toolCalls?.length) {
1208
- for (const tc of streamResult.toolCalls) {
1209
- const toolName = tc.toolName ??
1210
- tc.name ??
1211
- "unknown";
1212
- const toolArgs = tc.args ??
1213
- tc.parameters ??
1214
- {};
1215
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
2247
+ const toolCalls = streamResult.toolCalls ?? [];
2248
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
2249
+ throw new Error(`Translated provider ${fallback.provider}/${fallback.model} returned no content or tool calls`);
2250
+ }
2251
+ if (toolCalls.length) {
2252
+ for (const tc of toolCalls) {
2253
+ const toolName = tc.toolName ?? tc.name ?? "unknown";
2254
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
1216
2255
  yield frame;
1217
2256
  }
1218
2257
  }
1219
2258
  }
1220
2259
  const reason = streamResult.finishReason ?? "end_turn";
1221
- for (const frame of serializer.finish(0, reason)) {
2260
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
2261
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1222
2262
  yield frame;
1223
2263
  }
1224
2264
  }
2265
+ tracer?.end(200, Date.now() - requestStartTime);
2266
+ recordFinalSuccess();
2267
+ logFinalRequest(200, "", fallback.provider);
1225
2268
  return sseGenerator();
1226
2269
  }
1227
2270
  const streamResult = await ctx.neurolink.stream(opts);
@@ -1232,66 +2275,50 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1232
2275
  collectedText += text;
1233
2276
  }
1234
2277
  }
2278
+ if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
2279
+ throw new Error(`Translated provider ${fallback.provider}/${fallback.model} returned no content or tool calls`);
2280
+ }
1235
2281
  const internal = {
1236
2282
  content: collectedText,
1237
2283
  model: streamResult.model,
1238
2284
  finishReason: streamResult.finishReason ?? "end_turn",
1239
2285
  reasoning: undefined,
1240
- usage: streamResult.usage
1241
- ? {
1242
- input: streamResult.usage.input ??
1243
- 0,
1244
- output: streamResult.usage
1245
- .output ?? 0,
1246
- total: streamResult.usage.total ??
1247
- 0,
1248
- }
1249
- : undefined,
2286
+ usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
1250
2287
  toolCalls: streamResult.toolCalls,
1251
2288
  };
1252
- return serializeClaudeResponse(internal, body.model);
2289
+ tracer?.end(200, Date.now() - requestStartTime);
2290
+ recordFinalSuccess();
2291
+ const clientResponse = serializeClaudeResponse(internal, body.model);
2292
+ logFinalRequest(200, "", fallback.provider, undefined, undefined, {
2293
+ inputTokens: internal.usage?.input,
2294
+ outputTokens: internal.usage?.output,
2295
+ });
2296
+ const clientResponseText = JSON.stringify(clientResponse);
2297
+ logProxyBody({
2298
+ phase: "client_response",
2299
+ headers: { "content-type": "application/json" },
2300
+ body: clientResponseText,
2301
+ bodySize: Buffer.byteLength(clientResponseText, "utf8"),
2302
+ contentType: "application/json",
2303
+ responseStatus: 200,
2304
+ durationMs: Date.now() - requestStartTime,
2305
+ });
2306
+ return clientResponse;
1253
2307
  }
1254
2308
  catch (fallbackErr) {
1255
2309
  logger.debug(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1256
- continue;
1257
2310
  }
1258
2311
  }
1259
2312
  // If no explicit fallback chain is configured, try SDK auto-provider fallback.
1260
- if (chain.length === 0) {
2313
+ // Skip auto-provider when all accounts are rate-limited — the client
2314
+ // (e.g. Claude Code) understands 429 + Retry-After and will retry on
2315
+ // its own. Silently routing to a different provider (e.g. OpenAI)
2316
+ // produces confusing errors like "insufficient_quota".
2317
+ if (chain.length === 0 && !sawRateLimit) {
1261
2318
  try {
1262
2319
  logger.always("[proxy] fallback → auto-provider");
1263
2320
  const parsed = parseClaudeRequest(body);
1264
- const opts = {
1265
- input: {
1266
- text: parsed.prompt,
1267
- ...(parsed.images.length > 0
1268
- ? { images: parsed.images }
1269
- : {}),
1270
- },
1271
- systemPrompt: parsed.systemPrompt,
1272
- maxTokens: parsed.maxTokens,
1273
- ...(parsed.temperature !== undefined
1274
- ? { temperature: parsed.temperature }
1275
- : {}),
1276
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1277
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1278
- ...(parsed.stopSequences?.length
1279
- ? { stopSequences: parsed.stopSequences }
1280
- : {}),
1281
- tools: parsed.tools,
1282
- ...(parsed.toolChoice
1283
- ? { toolChoice: parsed.toolChoice }
1284
- : {}),
1285
- ...(parsed.thinkingConfig
1286
- ? { thinkingConfig: parsed.thinkingConfig }
1287
- : {}),
1288
- ...(parsed.conversationMessages?.length
1289
- ? {
1290
- conversationMessages: parsed.conversationMessages.slice(0, -1),
1291
- }
1292
- : {}),
1293
- maxSteps: 1,
1294
- };
2321
+ const opts = buildProxyFallbackOptions(parsed);
1295
2322
  if (body.stream) {
1296
2323
  const streamResult = await ctx.neurolink.stream(opts);
1297
2324
  const serializer = new ClaudeStreamSerializer(body.model, 0);
@@ -1299,33 +2326,38 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1299
2326
  for (const frame of serializer.start()) {
1300
2327
  yield frame;
1301
2328
  }
2329
+ let collectedText = "";
1302
2330
  for await (const chunk of streamResult.stream) {
1303
2331
  const text = extractText(chunk);
1304
2332
  if (text) {
2333
+ collectedText += text;
1305
2334
  for (const frame of serializer.pushDelta(text)) {
1306
2335
  yield frame;
1307
2336
  }
1308
2337
  }
1309
2338
  }
1310
2339
  // Emit tool_use blocks if model wants to call tools
1311
- if (streamResult.toolCalls?.length) {
1312
- for (const tc of streamResult.toolCalls) {
1313
- const toolName = tc.toolName ??
1314
- tc.name ??
1315
- "unknown";
1316
- const toolArgs = tc.args ??
1317
- tc.parameters ??
1318
- {};
1319
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
2340
+ const toolCalls = streamResult.toolCalls ?? [];
2341
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
2342
+ throw new Error("Translated provider auto-provider returned no content or tool calls");
2343
+ }
2344
+ if (toolCalls.length) {
2345
+ for (const tc of toolCalls) {
2346
+ const toolName = tc.toolName ?? tc.name ?? "unknown";
2347
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
1320
2348
  yield frame;
1321
2349
  }
1322
2350
  }
1323
2351
  }
1324
2352
  const reason = streamResult.finishReason ?? "end_turn";
1325
- for (const frame of serializer.finish(0, reason)) {
2353
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
2354
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1326
2355
  yield frame;
1327
2356
  }
1328
2357
  }
2358
+ tracer?.end(200, Date.now() - requestStartTime);
2359
+ recordFinalSuccess();
2360
+ logFinalRequest(200, "", "auto-provider");
1329
2361
  return sseGenerator();
1330
2362
  }
1331
2363
  const streamResult = await ctx.neurolink.stream(opts);
@@ -1336,48 +2368,102 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1336
2368
  collectedText += text;
1337
2369
  }
1338
2370
  }
2371
+ if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
2372
+ throw new Error("Translated provider auto-provider returned no content or tool calls");
2373
+ }
1339
2374
  const internal = {
1340
2375
  content: collectedText,
1341
2376
  model: streamResult.model,
1342
2377
  finishReason: streamResult.finishReason ?? "end_turn",
1343
2378
  reasoning: undefined,
1344
- usage: streamResult.usage
1345
- ? {
1346
- input: streamResult.usage.input ??
1347
- 0,
1348
- output: streamResult.usage
1349
- .output ?? 0,
1350
- total: streamResult.usage.total ??
1351
- 0,
1352
- }
1353
- : undefined,
2379
+ usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
1354
2380
  toolCalls: streamResult.toolCalls,
1355
2381
  };
1356
- return serializeClaudeResponse(internal, body.model);
2382
+ tracer?.end(200, Date.now() - requestStartTime);
2383
+ recordFinalSuccess();
2384
+ const clientResponse = serializeClaudeResponse(internal, body.model);
2385
+ logFinalRequest(200, "", "auto-provider", undefined, undefined, {
2386
+ inputTokens: internal.usage?.input,
2387
+ outputTokens: internal.usage?.output,
2388
+ });
2389
+ const clientResponseText = JSON.stringify(clientResponse);
2390
+ logProxyBody({
2391
+ phase: "client_response",
2392
+ headers: { "content-type": "application/json" },
2393
+ body: clientResponseText,
2394
+ bodySize: Buffer.byteLength(clientResponseText, "utf8"),
2395
+ contentType: "application/json",
2396
+ responseStatus: 200,
2397
+ durationMs: Date.now() - requestStartTime,
2398
+ });
2399
+ return clientResponse;
1357
2400
  }
1358
2401
  catch (fallbackErr) {
1359
- logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error
1360
- ? fallbackErr.message
1361
- : String(fallbackErr)}`);
2402
+ logger.debug(`[proxy] fallback auto-provider failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1362
2403
  }
1363
2404
  }
1364
2405
  if (authFailureMessage && !sawRateLimit) {
1365
- return buildClaudeError(401, authFailureMessage);
2406
+ tracer?.setError("authentication_error", authFailureMessage);
2407
+ tracer?.end(401, Date.now() - requestStartTime);
2408
+ return buildLoggedClaudeError(401, authFailureMessage);
2409
+ }
2410
+ if (invalidRequestFailure) {
2411
+ tracer?.setError("invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
2412
+ tracer?.end(invalidRequestFailure.status, Date.now() - requestStartTime);
2413
+ recordFinalError(invalidRequestFailure.status);
2414
+ try {
2415
+ const parsedError = JSON.parse(invalidRequestFailure.body);
2416
+ logFinalRequest(invalidRequestFailure.status, "", "final", "invalid_request_error", summarizeErrorMessage(invalidRequestFailure.body));
2417
+ logProxyBody({
2418
+ phase: "client_response",
2419
+ headers: {
2420
+ "content-type": invalidRequestFailure.contentType ?? "application/json",
2421
+ },
2422
+ body: invalidRequestFailure.body,
2423
+ bodySize: Buffer.byteLength(invalidRequestFailure.body, "utf8"),
2424
+ contentType: invalidRequestFailure.contentType ?? "application/json",
2425
+ responseStatus: invalidRequestFailure.status,
2426
+ durationMs: Date.now() - requestStartTime,
2427
+ });
2428
+ return parsedError;
2429
+ }
2430
+ catch {
2431
+ return buildLoggedClaudeError(invalidRequestFailure.status, summarizeErrorMessage(invalidRequestFailure.body), "invalid_request_error");
2432
+ }
1366
2433
  }
1367
2434
  if ((sawNetworkError || sawTransientFailure) && !sawRateLimit) {
1368
- return buildClaudeError(502, `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error
1369
- ? lastError.message
1370
- : String(lastError ?? "unknown")}`);
2435
+ const msg = `All Anthropic accounts failed due to transient upstream/network errors. Last error: ${lastError instanceof Error ? lastError.message : String(lastError ?? "unknown")}`;
2436
+ tracer?.setError("transient_error", msg.slice(0, 500));
2437
+ tracer?.end(502, Date.now() - requestStartTime);
2438
+ return buildLoggedClaudeError(502, msg);
1371
2439
  }
1372
2440
  if (!sawRateLimit) {
1373
- return buildClaudeError(502, `All Anthropic accounts failed. Last error: ${lastError instanceof Error
1374
- ? lastError.message
1375
- : String(lastError ?? "unknown")}`);
2441
+ const msg = `All Anthropic accounts failed. Last error: ${lastError instanceof Error ? lastError.message : String(lastError ?? "unknown")}`;
2442
+ tracer?.setError("all_accounts_failed", msg.slice(0, 500));
2443
+ tracer?.end(502, Date.now() - requestStartTime);
2444
+ return buildLoggedClaudeError(502, msg);
1376
2445
  }
1377
2446
  // All accounts AND all fallbacks exhausted — return 429 with Retry-After
1378
2447
  logger.always(`[proxy] all accounts rate-limited, retry in ${retryAfterSec}s`);
1379
2448
  const errorBody = buildClaudeError(429, `All accounts rate-limited. Earliest recovery in ${retryAfterSec}s.`, "overloaded_error");
1380
- return new Response(JSON.stringify(errorBody), {
2449
+ tracer?.setError("rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
2450
+ tracer?.end(429, Date.now() - requestStartTime);
2451
+ recordFinalError(429);
2452
+ logFinalRequest(429, "", "final", "rate_limit_error", `All accounts rate-limited. Retry in ${retryAfterSec}s.`);
2453
+ const errorBodyText = JSON.stringify(errorBody);
2454
+ logProxyBody({
2455
+ phase: "client_response",
2456
+ headers: {
2457
+ "content-type": "application/json",
2458
+ "retry-after": String(retryAfterSec),
2459
+ },
2460
+ body: errorBodyText,
2461
+ bodySize: Buffer.byteLength(errorBodyText, "utf8"),
2462
+ contentType: "application/json",
2463
+ responseStatus: 429,
2464
+ durationMs: Date.now() - requestStartTime,
2465
+ });
2466
+ return new Response(errorBodyText, {
1381
2467
  status: 429,
1382
2468
  headers: {
1383
2469
  "content-type": "application/json",
@@ -1387,40 +2473,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1387
2473
  }
1388
2474
  else {
1389
2475
  // ─── TRANSLATION MODE (Claude → Other Provider) ───────
2476
+ tracer?.setMode("full");
1390
2477
  // Parse into NeuroLink format, call generate/stream, serialize back
1391
2478
  const parsed = parseClaudeRequest(body);
1392
- const historyMessages = parsed.conversationMessages.slice(0, -1);
1393
- const options = {
1394
- input: {
1395
- text: parsed.prompt,
1396
- ...(parsed.images.length > 0
1397
- ? { images: parsed.images }
1398
- : {}),
1399
- },
2479
+ const attempts = buildProxyTranslationAttempts({
1400
2480
  provider: route.provider,
1401
2481
  model: route.model,
1402
- systemPrompt: parsed.systemPrompt,
1403
- maxTokens: parsed.maxTokens,
1404
- ...(parsed.temperature !== undefined
1405
- ? { temperature: parsed.temperature }
1406
- : {}),
1407
- ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
1408
- ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
1409
- ...(parsed.stopSequences?.length
1410
- ? { stopSequences: parsed.stopSequences }
1411
- : {}),
1412
- ...(parsed.thinkingConfig
1413
- ? { thinkingConfig: parsed.thinkingConfig }
1414
- : {}),
1415
- tools: parsed.tools,
1416
- ...(parsed.toolChoice ? { toolChoice: parsed.toolChoice } : {}),
1417
- maxSteps: 1,
1418
- ...(historyMessages.length > 0
1419
- ? { conversationMessages: historyMessages }
1420
- : {}),
1421
- };
2482
+ }, modelRouter);
1422
2483
  if (body.stream) {
1423
- const streamResult = await ctx.neurolink.stream(options);
1424
2484
  const serializer = new ClaudeStreamSerializer(body.model, 0);
1425
2485
  const KEEPALIVE_INTERVAL_MS = 15_000; // 15 seconds
1426
2486
  // Return a ReadableStream that emits SSE keep-alive comments
@@ -1429,6 +2489,9 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1429
2489
  const encoder = new TextEncoder();
1430
2490
  let translationKeepAliveTimer;
1431
2491
  let translationCancelled = false;
2492
+ let translationSucceeded = false;
2493
+ let translatedModel;
2494
+ let finalStreamError = "No translation providers succeeded";
1432
2495
  // Hold a reference to the upstream async iterator so
1433
2496
  // we can abort it when the client disconnects.
1434
2497
  let upstreamIterator;
@@ -1448,59 +2511,85 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1448
2511
  }
1449
2512
  }, KEEPALIVE_INTERVAL_MS);
1450
2513
  try {
1451
- const iterable = streamResult.stream;
1452
- upstreamIterator = iterable[Symbol.asyncIterator]();
1453
- // Manually drive the async iterator so we can cancel it
1454
- while (true) {
1455
- if (translationCancelled) {
1456
- break;
2514
+ for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
2515
+ const attempt = attempts[attemptIndex];
2516
+ if (attemptIndex > 0) {
2517
+ logger.always(`[proxy] fallback → ${attempt.label}`);
1457
2518
  }
1458
- const { value: chunk, done } = await upstreamIterator.next();
1459
- if (done) {
1460
- break;
1461
- }
1462
- if (translationCancelled) {
1463
- break;
1464
- }
1465
- const text = extractText(chunk);
1466
- if (text) {
1467
- for (const frame of serializer.pushDelta(text)) {
1468
- controller.enqueue(encoder.encode(frame));
2519
+ let collectedText = "";
2520
+ try {
2521
+ const options = buildProxyFallbackOptions(parsed, attempt.provider
2522
+ ? {
2523
+ provider: attempt.provider,
2524
+ model: attempt.model,
2525
+ }
2526
+ : {});
2527
+ const streamResult = await ctx.neurolink.stream(options);
2528
+ const iterable = streamResult.stream;
2529
+ upstreamIterator = iterable[Symbol.asyncIterator]();
2530
+ while (true) {
2531
+ if (translationCancelled) {
2532
+ break;
2533
+ }
2534
+ const { value: chunk, done } = await upstreamIterator.next();
2535
+ if (done) {
2536
+ break;
2537
+ }
2538
+ if (translationCancelled) {
2539
+ break;
2540
+ }
2541
+ const text = extractText(chunk);
2542
+ if (text) {
2543
+ collectedText += text;
2544
+ for (const frame of serializer.pushDelta(text)) {
2545
+ controller.enqueue(encoder.encode(frame));
2546
+ }
2547
+ }
2548
+ }
2549
+ const toolCalls = streamResult.toolCalls ?? [];
2550
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
2551
+ finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`;
2552
+ logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
2553
+ continue;
1469
2554
  }
2555
+ if (!translationCancelled && toolCalls.length) {
2556
+ for (const tc of toolCalls) {
2557
+ const toolName = tc.toolName ?? tc.name ?? "unknown";
2558
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(tc))) {
2559
+ controller.enqueue(encoder.encode(frame));
2560
+ }
2561
+ }
2562
+ }
2563
+ if (!translationCancelled) {
2564
+ const reason = streamResult.finishReason ?? "end_turn";
2565
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
2566
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
2567
+ controller.enqueue(encoder.encode(frame));
2568
+ }
2569
+ }
2570
+ translatedModel = streamResult.model;
2571
+ translationSucceeded = true;
2572
+ return;
1470
2573
  }
1471
- }
1472
- // Emit tool_use blocks if model wants to call tools
1473
- if (!translationCancelled &&
1474
- streamResult.toolCalls?.length) {
1475
- for (const tc of streamResult.toolCalls) {
1476
- const toolName = tc.toolName ??
1477
- tc.name ??
1478
- "unknown";
1479
- const toolArgs = tc.args ??
1480
- tc.parameters ??
1481
- {};
1482
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, toolArgs)) {
1483
- controller.enqueue(encoder.encode(frame));
2574
+ catch (streamErr) {
2575
+ if (translationCancelled) {
2576
+ return;
2577
+ }
2578
+ finalStreamError = streamErr instanceof Error ? streamErr.message : String(streamErr);
2579
+ if (collectedText.trim().length > 0) {
2580
+ logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
2581
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
2582
+ controller.enqueue(encoder.encode(errorEvent));
2583
+ return;
1484
2584
  }
2585
+ logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`);
1485
2586
  }
1486
2587
  }
1487
2588
  if (!translationCancelled) {
1488
- const reason = streamResult.finishReason ?? "end_turn";
1489
- for (const frame of serializer.finish(0, reason)) {
1490
- controller.enqueue(encoder.encode(frame));
1491
- }
1492
- }
1493
- }
1494
- catch (streamErr) {
1495
- if (translationCancelled) {
1496
- return;
2589
+ logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
2590
+ const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
2591
+ controller.enqueue(encoder.encode(errorEvent));
1497
2592
  }
1498
- const errMsg = streamErr instanceof Error
1499
- ? streamErr.message
1500
- : String(streamErr);
1501
- logger.always(`[proxy] mid-stream error (translation mode): ${errMsg}`);
1502
- const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${errMsg}` } })}\n\n`;
1503
- controller.enqueue(encoder.encode(errorEvent));
1504
2593
  }
1505
2594
  finally {
1506
2595
  if (translationKeepAliveTimer) {
@@ -1509,6 +2598,14 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1509
2598
  if (!translationCancelled) {
1510
2599
  controller.close();
1511
2600
  }
2601
+ // OTel: record model substitution if proxy routed to a different model
2602
+ if (tracer && translatedModel && translatedModel !== body.model) {
2603
+ tracer.setModelSubstitution(body.model, translatedModel);
2604
+ }
2605
+ if (!translationSucceeded) {
2606
+ tracer?.setError("generation_error", finalStreamError.slice(0, 500));
2607
+ }
2608
+ tracer?.end(200, Date.now() - requestStartTime);
1512
2609
  }
1513
2610
  },
1514
2611
  cancel() {
@@ -1533,34 +2630,72 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1533
2630
  },
1534
2631
  });
1535
2632
  }
1536
- const streamResult = await ctx.neurolink.stream(options);
1537
- let collectedText = "";
1538
- for await (const chunk of streamResult.stream) {
1539
- const text = extractText(chunk);
1540
- if (text) {
1541
- collectedText += text;
2633
+ let lastAttemptError = "No translation providers succeeded";
2634
+ for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
2635
+ const attempt = attempts[attemptIndex];
2636
+ if (attemptIndex > 0) {
2637
+ logger.always(`[proxy] fallback → ${attempt.label}`);
1542
2638
  }
1543
- }
1544
- const internal = {
1545
- content: collectedText,
1546
- model: streamResult.model,
1547
- finishReason: streamResult.finishReason ?? "end_turn",
1548
- reasoning: undefined,
1549
- usage: streamResult.usage
1550
- ? {
1551
- input: streamResult.usage.input ?? 0,
1552
- output: streamResult.usage.output ?? 0,
1553
- total: streamResult.usage.total ?? 0,
2639
+ try {
2640
+ const options = buildProxyFallbackOptions(parsed, attempt.provider
2641
+ ? {
2642
+ provider: attempt.provider,
2643
+ model: attempt.model,
2644
+ }
2645
+ : {});
2646
+ const streamResult = await ctx.neurolink.stream(options);
2647
+ let collectedText = "";
2648
+ for await (const chunk of streamResult.stream) {
2649
+ const text = extractText(chunk);
2650
+ if (text) {
2651
+ collectedText += text;
2652
+ }
1554
2653
  }
1555
- : undefined,
1556
- toolCalls: streamResult.toolCalls,
1557
- };
1558
- return serializeClaudeResponse(internal, body.model);
2654
+ if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
2655
+ lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`;
2656
+ logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
2657
+ continue;
2658
+ }
2659
+ const internal = {
2660
+ content: collectedText,
2661
+ model: streamResult.model,
2662
+ finishReason: streamResult.finishReason ?? "end_turn",
2663
+ reasoning: undefined,
2664
+ usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined,
2665
+ toolCalls: streamResult.toolCalls,
2666
+ };
2667
+ // OTel: record model substitution if proxy routed to a different model
2668
+ if (tracer && streamResult.model && streamResult.model !== body.model) {
2669
+ tracer.setModelSubstitution(body.model, streamResult.model);
2670
+ }
2671
+ tracer?.end(200, Date.now() - requestStartTime);
2672
+ const clientResponse = serializeClaudeResponse(internal, body.model);
2673
+ const clientResponseText = JSON.stringify(clientResponse);
2674
+ logProxyBody({
2675
+ phase: "client_response",
2676
+ headers: { "content-type": "application/json" },
2677
+ body: clientResponseText,
2678
+ bodySize: Buffer.byteLength(clientResponseText, "utf8"),
2679
+ contentType: "application/json",
2680
+ responseStatus: 200,
2681
+ durationMs: Date.now() - requestStartTime,
2682
+ });
2683
+ return clientResponse;
2684
+ }
2685
+ catch (attemptError) {
2686
+ lastAttemptError = attemptError instanceof Error ? attemptError.message : String(attemptError);
2687
+ logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`);
2688
+ }
2689
+ }
2690
+ throw new Error(lastAttemptError);
1559
2691
  }
1560
2692
  }
1561
2693
  catch (error) {
1562
- logger.error(`[claude-proxy] Generation error for ${body.model}: ${error instanceof Error ? error.message : String(error)}`);
1563
- return buildClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
2694
+ const errMsg = error instanceof Error ? error.message : String(error);
2695
+ logger.error(`[claude-proxy] Generation error for ${body.model}: ${errMsg}`);
2696
+ tracer?.setError("generation_error", errMsg.slice(0, 500));
2697
+ tracer?.end(502, Date.now() - requestStartTime);
2698
+ return buildLoggedClaudeError(502, `Generation failed: ${error instanceof Error ? error.message : "unknown error"}`);
1564
2699
  }
1565
2700
  },
1566
2701
  description: "Claude-compatible messages endpoint routed through NeuroLink",
@@ -1606,9 +2741,7 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1606
2741
  }
1607
2742
  // Simple estimation using character-to-token heuristic
1608
2743
  const text = body.messages
1609
- .map((m) => typeof m.content === "string"
1610
- ? m.content
1611
- : JSON.stringify(m.content))
2744
+ .map((m) => (typeof m.content === "string" ? m.content : JSON.stringify(m.content)))
1612
2745
  .join(" ");
1613
2746
  return { input_tokens: Math.ceil(text.length / 4) };
1614
2747
  },
@@ -1621,6 +2754,26 @@ export function createClaudeProxyRoutes(modelRouter, basePath = "", accountStrat
1621
2754
  // ---------------------------------------------------------------------------
1622
2755
  // Helpers
1623
2756
  // ---------------------------------------------------------------------------
2757
+ /**
2758
+ * Extract token usage from a StreamResult.usage object, handling multiple
2759
+ * naming conventions across AI SDK versions and providers:
2760
+ * - AI SDK v6: inputTokens / outputTokens
2761
+ * - AI SDK v4: promptTokens / completionTokens
2762
+ * - NeuroLink internal: input / output
2763
+ */
2764
+ function extractUsageFromStreamResult(usage) {
2765
+ if (!usage || typeof usage !== "object") {
2766
+ return { input: 0, output: 0, total: 0 };
2767
+ }
2768
+ const u = usage;
2769
+ const input = (typeof u.inputTokens === "number" ? u.inputTokens : 0) ||
2770
+ (typeof u.promptTokens === "number" ? u.promptTokens : 0) ||
2771
+ (typeof u.input === "number" ? u.input : 0);
2772
+ const output = (typeof u.outputTokens === "number" ? u.outputTokens : 0) ||
2773
+ (typeof u.completionTokens === "number" ? u.completionTokens : 0) ||
2774
+ (typeof u.output === "number" ? u.output : 0);
2775
+ return { input, output, total: input + output };
2776
+ }
1624
2777
  /**
1625
2778
  * Extract text content from a stream chunk (handles various chunk formats).
1626
2779
  */
@@ -1744,24 +2897,17 @@ function isRetryableNetworkError(error) {
1744
2897
  normalized.includes("fetch failed") ||
1745
2898
  normalized.includes("socket hang up"));
1746
2899
  }
1747
- const TRANSIENT_HTTP_STATUSES = new Set([
1748
- 408, 500, 502, 503, 504, 520, 521, 522, 523, 524, 525, 526, 529,
1749
- ]);
2900
+ const TRANSIENT_HTTP_STATUSES = new Set([408, 500, 502, 503, 504, 520, 521, 522, 523, 524, 525, 526, 529]);
1750
2901
  /**
1751
2902
  * Parse a Claude error payload when available.
1752
2903
  */
1753
2904
  export function parseClaudeErrorBody(errBody) {
1754
2905
  try {
1755
2906
  const parsed = JSON.parse(errBody);
1756
- if (parsed &&
1757
- parsed.type === "error" &&
1758
- parsed.error &&
1759
- typeof parsed.error === "object") {
2907
+ if (parsed && parsed.type === "error" && parsed.error && typeof parsed.error === "object") {
1760
2908
  return {
1761
2909
  errorType: typeof parsed.error.type === "string" ? parsed.error.type : undefined,
1762
- message: typeof parsed.error.message === "string"
1763
- ? parsed.error.message
1764
- : undefined,
2910
+ message: typeof parsed.error.message === "string" ? parsed.error.message : undefined,
1765
2911
  };
1766
2912
  }
1767
2913
  }
@@ -1778,8 +2924,91 @@ export function isInvalidRequestError(status, errBody) {
1778
2924
  return true;
1779
2925
  }
1780
2926
  const parsed = parseClaudeErrorBody(errBody);
1781
- return (parsed.errorType === "invalid_request_error" ||
1782
- errBody.includes("invalid_request_error"));
2927
+ return parsed.errorType === "invalid_request_error" || errBody.includes("invalid_request_error");
2928
+ }
2929
+ function normalizeClaudeRequestForAnthropic(body) {
2930
+ return {
2931
+ ...body,
2932
+ messages: body.messages.map((msg) => {
2933
+ if (typeof msg.content !== "string") {
2934
+ return msg;
2935
+ }
2936
+ return {
2937
+ ...msg,
2938
+ content: [{ type: "text", text: msg.content }],
2939
+ };
2940
+ }),
2941
+ };
2942
+ }
2943
+ export function buildProxyFallbackOptions(parsed, overrides = {}) {
2944
+ const historyMessages = parsed.conversationMessages.slice(0, -1);
2945
+ const toolNames = Object.keys(parsed.tools);
2946
+ const toolChoice = parsed.toolChoiceName
2947
+ ? { type: "tool", toolName: parsed.toolChoiceName }
2948
+ : parsed.toolChoice;
2949
+ return {
2950
+ input: {
2951
+ text: parsed.prompt,
2952
+ ...(parsed.images.length > 0 ? { images: parsed.images } : {}),
2953
+ },
2954
+ ...(overrides.provider ? { provider: overrides.provider } : {}),
2955
+ ...(overrides.model ? { model: overrides.model } : {}),
2956
+ systemPrompt: parsed.systemPrompt,
2957
+ maxTokens: parsed.maxTokens,
2958
+ ...(parsed.temperature !== undefined ? { temperature: parsed.temperature } : {}),
2959
+ ...(parsed.topP !== undefined ? { topP: parsed.topP } : {}),
2960
+ ...(parsed.topK !== undefined ? { topK: parsed.topK } : {}),
2961
+ ...(parsed.stopSequences?.length ? { stopSequences: parsed.stopSequences } : {}),
2962
+ ...(parsed.thinkingConfig ? { thinkingConfig: parsed.thinkingConfig } : {}),
2963
+ ...(toolNames.length === 0 ? { disableTools: true } : {}),
2964
+ // Claude-compatible requests already declare the exact tool contract.
2965
+ // Filter out NeuroLink's built-in agent tools so translated fallbacks only
2966
+ // expose the tools the client actually knows how to handle.
2967
+ ...(toolNames.length > 0
2968
+ ? {
2969
+ tools: parsed.tools,
2970
+ toolFilter: toolNames,
2971
+ }
2972
+ : {}),
2973
+ ...(toolChoice ? { toolChoice } : {}),
2974
+ ...(historyMessages.length > 0 ? { conversationMessages: historyMessages } : {}),
2975
+ disableInternalFallback: true,
2976
+ skipToolPromptInjection: true,
2977
+ maxSteps: 1,
2978
+ };
2979
+ }
2980
+ function buildProxyTranslationAttempts(primary, modelRouter) {
2981
+ const attempts = [
2982
+ {
2983
+ provider: primary.provider,
2984
+ model: primary.model,
2985
+ label: `${primary.provider}/${primary.model ?? "unknown"}`,
2986
+ },
2987
+ ];
2988
+ const chain = modelRouter?.getFallbackChain() ?? [];
2989
+ for (const fallback of chain) {
2990
+ if (fallback.provider === primary.provider && fallback.model === primary.model) {
2991
+ continue;
2992
+ }
2993
+ attempts.push({
2994
+ provider: fallback.provider,
2995
+ model: fallback.model,
2996
+ label: `${fallback.provider}/${fallback.model}`,
2997
+ });
2998
+ }
2999
+ if (chain.length === 0) {
3000
+ attempts.push({ label: "auto-provider" });
3001
+ }
3002
+ return attempts;
3003
+ }
3004
+ function hasTranslatedOutput(collectedText, toolCalls) {
3005
+ return collectedText.trim().length > 0 || (toolCalls?.length ?? 0) > 0;
3006
+ }
3007
+ function extractToolArgs(toolCall) {
3008
+ return (toolCall.args ??
3009
+ toolCall.parameters ??
3010
+ toolCall.input ??
3011
+ {});
1783
3012
  }
1784
3013
  /**
1785
3014
  * Detect transient upstream failures that should trigger account/provider failover.