@juspay/neurolink 9.50.0 → 9.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
356
356
  }
357
357
  catch (streamError) {
358
358
  if (NoOutputGeneratedError.isInstance(streamError)) {
359
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
- return;
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
360
+ throw streamError;
361
361
  }
362
362
  throw streamError;
363
363
  }
@@ -90,6 +90,13 @@ declare class ProxyTracer {
90
90
  * Sets span attributes and increments the substitution metric counter.
91
91
  */
92
92
  setModelSubstitution(requestedModel: string, actualModel: string): void;
93
+ setFallbackInfo(info: {
94
+ triggered: boolean;
95
+ provider?: string;
96
+ model?: string;
97
+ attemptCount: number;
98
+ reason: string;
99
+ }): void;
93
100
  /** Log the incoming client request body (redacted). */
94
101
  logRequestBody(body: string): void;
95
102
  /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
129
136
  */
130
137
  getTraceHeaders(): Record<string, string>;
131
138
  }
139
+ export declare function recordFallbackAttempt(attrs: {
140
+ provider: string;
141
+ model: string;
142
+ status: "success" | "failure";
143
+ errorMessage?: string;
144
+ durationMs: number;
145
+ }): void;
132
146
  export { ProxyTracer };
133
147
  export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };
@@ -79,6 +79,18 @@ function getProxyMetrics() {
79
79
  description: "Response body size in bytes received from upstream",
80
80
  unit: "By",
81
81
  }),
82
+ fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
83
+ description: "Total fallback provider attempts",
84
+ unit: "{attempt}",
85
+ }),
86
+ fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
87
+ description: "Total successful fallback provider responses",
88
+ unit: "{success}",
89
+ }),
90
+ fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
91
+ description: "Total failed fallback provider responses",
92
+ unit: "{failure}",
93
+ }),
82
94
  };
83
95
  _metrics = createdMetrics;
84
96
  return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
396
408
  actual_model: actualModel,
397
409
  });
398
410
  }
411
+ setFallbackInfo(info) {
412
+ if (!this.rootSpan) {
413
+ return;
414
+ }
415
+ this.rootSpan.setAttributes({
416
+ "proxy.fallback.triggered": info.triggered,
417
+ ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
418
+ ...(info.model ? { "proxy.fallback.model": info.model } : {}),
419
+ "proxy.fallback.attempt_count": info.attemptCount,
420
+ "proxy.fallback.reason": info.reason,
421
+ });
422
+ }
399
423
  // -------------------------------------------------------------------------
400
424
  // Log payloads as span events
401
425
  // -------------------------------------------------------------------------
@@ -641,5 +665,24 @@ class ProxyTracer {
641
665
  return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
642
666
  }
643
667
  }
668
+ export function recordFallbackAttempt(attrs) {
669
+ try {
670
+ const m = getProxyMetrics();
671
+ const labels = { provider: attrs.provider, model: attrs.model };
672
+ m.fallbackAttemptsTotal.add(1, labels);
673
+ if (attrs.status === "success") {
674
+ m.fallbackSuccessTotal.add(1, labels);
675
+ }
676
+ else {
677
+ m.fallbackFailureTotal.add(1, {
678
+ ...labels,
679
+ error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
680
+ });
681
+ }
682
+ }
683
+ catch {
684
+ // metrics are best-effort
685
+ }
686
+ }
644
687
  export { ProxyTracer };
645
688
  //# sourceMappingURL=proxyTracer.js.map
@@ -15,7 +15,7 @@ import { join } from "node:path";
15
15
  import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
16
16
  import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
17
17
  import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
18
- import { ProxyTracer } from "../../proxy/proxyTracer.js";
18
+ import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js";
19
19
  import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
@@ -1246,43 +1246,64 @@ async function executeClaudeFallbackTranslation(args) {
1246
1246
  if (body.stream) {
1247
1247
  const streamResult = await ctx.neurolink.stream(options);
1248
1248
  const serializer = new ClaudeStreamSerializer(body.model, 0);
1249
- async function* sseGenerator() {
1250
- for (const frame of serializer.start()) {
1251
- yield frame;
1252
- }
1253
- let collectedText = "";
1254
- for await (const chunk of streamResult.stream) {
1255
- const text = extractText(chunk);
1256
- if (text) {
1257
- collectedText += text;
1258
- for (const frame of serializer.pushDelta(text)) {
1259
- yield frame;
1260
- }
1249
+ // Eagerly consume stream so errors fire synchronously and the
1250
+ // fallback loop in tryConfiguredClaudeFallbackChain can catch them.
1251
+ const frames = [];
1252
+ let collectedText = "";
1253
+ for (const frame of serializer.start()) {
1254
+ frames.push(frame);
1255
+ }
1256
+ for await (const chunk of streamResult.stream) {
1257
+ const text = extractText(chunk);
1258
+ if (text) {
1259
+ collectedText += text;
1260
+ for (const frame of serializer.pushDelta(text)) {
1261
+ frames.push(frame);
1261
1262
  }
1262
1263
  }
1263
- const toolCalls = streamResult.toolCalls ?? [];
1264
- if (!hasTranslatedOutput(collectedText, toolCalls)) {
1265
- throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1266
- }
1267
- if (toolCalls.length) {
1268
- for (const toolCall of toolCalls) {
1269
- const toolName = toolCall.toolName ??
1270
- toolCall.name ??
1271
- "unknown";
1272
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1273
- yield frame;
1274
- }
1264
+ }
1265
+ const toolCalls = streamResult.toolCalls ?? [];
1266
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
1267
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1268
+ }
1269
+ if (toolCalls.length) {
1270
+ for (const toolCall of toolCalls) {
1271
+ const toolName = toolCall.toolName ??
1272
+ toolCall.name ??
1273
+ "unknown";
1274
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1275
+ frames.push(frame);
1275
1276
  }
1276
1277
  }
1277
- const reason = streamResult.finishReason ?? "end_turn";
1278
- const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1279
- for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1280
- yield frame;
1281
- }
1282
1278
  }
1279
+ const reason = streamResult.finishReason ?? "end_turn";
1280
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1281
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1282
+ frames.push(frame);
1283
+ }
1284
+ // Telemetry AFTER validation — not before like the old lazy path
1283
1285
  tracer?.end(200, Date.now() - requestStartTime);
1284
1286
  recordFinalSuccess();
1285
- logFinalRequest(200, "", providerLabel);
1287
+ logFinalRequest(200, "", providerLabel, undefined, undefined, {
1288
+ inputTokens: resolvedUsage.input,
1289
+ outputTokens: resolvedUsage.output,
1290
+ });
1291
+ const bufferedBody = frames.join("");
1292
+ logProxyBody({
1293
+ phase: "client_response",
1294
+ headers: { "content-type": "text/event-stream" },
1295
+ body: bufferedBody,
1296
+ bodySize: Buffer.byteLength(bufferedBody, "utf8"),
1297
+ contentType: "text/event-stream",
1298
+ responseStatus: 200,
1299
+ durationMs: Date.now() - requestStartTime,
1300
+ });
1301
+ // Return generator that yields pre-buffered frames
1302
+ async function* sseGenerator() {
1303
+ for (const frame of frames) {
1304
+ yield frame;
1305
+ }
1306
+ }
1286
1307
  return sseGenerator();
1287
1308
  }
1288
1309
  const streamResult = await ctx.neurolink.stream(options);
@@ -1346,6 +1367,11 @@ async function tryConfiguredClaudeFallbackChain(args) {
1346
1367
  : "auto-provider";
1347
1368
  logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1348
1369
  }
1370
+ tracer?.setFallbackInfo({
1371
+ triggered: true,
1372
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1373
+ reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1374
+ });
1349
1375
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1350
1376
  if (!fallback.provider || !fallback.model) {
1351
1377
  continue;
@@ -1354,6 +1380,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1354
1380
  if (!availability.available) {
1355
1381
  logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
1356
1382
  }
1383
+ const fallbackStart = Date.now();
1357
1384
  try {
1358
1385
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1359
1386
  const options = buildProxyFallbackOptions(parsedFallbackRequest, {
@@ -1370,13 +1397,57 @@ async function tryConfiguredClaudeFallbackChain(args) {
1370
1397
  options: options,
1371
1398
  providerLabel: fallback.provider,
1372
1399
  });
1400
+ recordFallbackAttempt({
1401
+ provider: fallback.provider,
1402
+ model: fallback.model,
1403
+ status: "success",
1404
+ durationMs: Date.now() - fallbackStart,
1405
+ });
1406
+ tracer?.setFallbackInfo({
1407
+ triggered: true,
1408
+ provider: fallback.provider,
1409
+ model: fallback.model,
1410
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1411
+ reason: "fallback_success",
1412
+ });
1373
1413
  return {
1374
1414
  response,
1375
1415
  fallbackPolicyReason,
1376
1416
  };
1377
1417
  }
1378
1418
  catch (fallbackErr) {
1379
- logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1419
+ const errMsg = fallbackErr instanceof Error
1420
+ ? fallbackErr.message
1421
+ : String(fallbackErr);
1422
+ let errorClass = "unknown";
1423
+ if (errMsg.includes("Rate limit") ||
1424
+ errMsg.includes("rate_limit") ||
1425
+ errMsg.includes("max_parallel_requests")) {
1426
+ errorClass = "rate_limit";
1427
+ }
1428
+ else if (errMsg.includes("context length") ||
1429
+ errMsg.includes("ContextWindowExceeded")) {
1430
+ errorClass = "context_overflow";
1431
+ }
1432
+ else if (errMsg.includes("no content or tool calls") ||
1433
+ errMsg.includes("NoOutputGenerated")) {
1434
+ errorClass = "empty_response";
1435
+ }
1436
+ else if (errMsg.includes("thinking_level") ||
1437
+ errMsg.includes("Field required")) {
1438
+ errorClass = "schema_mismatch";
1439
+ }
1440
+ else if (errMsg.includes("Resource exhausted")) {
1441
+ errorClass = "provider_quota";
1442
+ }
1443
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed [${errorClass}]: ${errMsg}`);
1444
+ recordFallbackAttempt({
1445
+ provider: fallback.provider,
1446
+ model: fallback.model,
1447
+ status: "failure",
1448
+ errorMessage: `[${errorClass}] ${errMsg}`,
1449
+ durationMs: Date.now() - fallbackStart,
1450
+ });
1380
1451
  }
1381
1452
  }
1382
1453
  return {
@@ -3541,7 +3612,15 @@ function shouldOmitImagesForTarget(provider, model) {
3541
3612
  return provider === "litellm" && model === "open-large";
3542
3613
  }
3543
3614
  function shouldOmitThinkingConfigForTarget(provider, model) {
3544
- return provider === "vertex" && model === "gemini-2.5-flash";
3615
+ if (provider === "litellm") {
3616
+ return true;
3617
+ }
3618
+ if (provider !== "vertex") {
3619
+ return false;
3620
+ }
3621
+ // Only Gemini 2.5+ and 3.x support thinking_level on Vertex.
3622
+ const m = model?.toLowerCase() ?? "";
3623
+ return !/gemini-(2\.5|3)/.test(m);
3545
3624
  }
3546
3625
  function extractToolArgs(toolCall) {
3547
3626
  return (toolCall.args ??
@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
356
356
  }
357
357
  catch (streamError) {
358
358
  if (NoOutputGeneratedError.isInstance(streamError)) {
359
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
- return;
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
360
+ throw streamError;
361
361
  }
362
362
  throw streamError;
363
363
  }
@@ -90,6 +90,13 @@ declare class ProxyTracer {
90
90
  * Sets span attributes and increments the substitution metric counter.
91
91
  */
92
92
  setModelSubstitution(requestedModel: string, actualModel: string): void;
93
+ setFallbackInfo(info: {
94
+ triggered: boolean;
95
+ provider?: string;
96
+ model?: string;
97
+ attemptCount: number;
98
+ reason: string;
99
+ }): void;
93
100
  /** Log the incoming client request body (redacted). */
94
101
  logRequestBody(body: string): void;
95
102
  /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
129
136
  */
130
137
  getTraceHeaders(): Record<string, string>;
131
138
  }
139
+ export declare function recordFallbackAttempt(attrs: {
140
+ provider: string;
141
+ model: string;
142
+ status: "success" | "failure";
143
+ errorMessage?: string;
144
+ durationMs: number;
145
+ }): void;
132
146
  export { ProxyTracer };
133
147
  export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };
@@ -79,6 +79,18 @@ function getProxyMetrics() {
79
79
  description: "Response body size in bytes received from upstream",
80
80
  unit: "By",
81
81
  }),
82
+ fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
83
+ description: "Total fallback provider attempts",
84
+ unit: "{attempt}",
85
+ }),
86
+ fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
87
+ description: "Total successful fallback provider responses",
88
+ unit: "{success}",
89
+ }),
90
+ fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
91
+ description: "Total failed fallback provider responses",
92
+ unit: "{failure}",
93
+ }),
82
94
  };
83
95
  _metrics = createdMetrics;
84
96
  return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
396
408
  actual_model: actualModel,
397
409
  });
398
410
  }
411
+ setFallbackInfo(info) {
412
+ if (!this.rootSpan) {
413
+ return;
414
+ }
415
+ this.rootSpan.setAttributes({
416
+ "proxy.fallback.triggered": info.triggered,
417
+ ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
418
+ ...(info.model ? { "proxy.fallback.model": info.model } : {}),
419
+ "proxy.fallback.attempt_count": info.attemptCount,
420
+ "proxy.fallback.reason": info.reason,
421
+ });
422
+ }
399
423
  // -------------------------------------------------------------------------
400
424
  // Log payloads as span events
401
425
  // -------------------------------------------------------------------------
@@ -641,4 +665,23 @@ class ProxyTracer {
641
665
  return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
642
666
  }
643
667
  }
668
+ export function recordFallbackAttempt(attrs) {
669
+ try {
670
+ const m = getProxyMetrics();
671
+ const labels = { provider: attrs.provider, model: attrs.model };
672
+ m.fallbackAttemptsTotal.add(1, labels);
673
+ if (attrs.status === "success") {
674
+ m.fallbackSuccessTotal.add(1, labels);
675
+ }
676
+ else {
677
+ m.fallbackFailureTotal.add(1, {
678
+ ...labels,
679
+ error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
680
+ });
681
+ }
682
+ }
683
+ catch {
684
+ // metrics are best-effort
685
+ }
686
+ }
644
687
  export { ProxyTracer };
@@ -15,7 +15,7 @@ import { join } from "node:path";
15
15
  import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
16
16
  import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
17
17
  import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
18
- import { ProxyTracer } from "../../proxy/proxyTracer.js";
18
+ import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js";
19
19
  import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
@@ -1246,43 +1246,64 @@ async function executeClaudeFallbackTranslation(args) {
1246
1246
  if (body.stream) {
1247
1247
  const streamResult = await ctx.neurolink.stream(options);
1248
1248
  const serializer = new ClaudeStreamSerializer(body.model, 0);
1249
- async function* sseGenerator() {
1250
- for (const frame of serializer.start()) {
1251
- yield frame;
1252
- }
1253
- let collectedText = "";
1254
- for await (const chunk of streamResult.stream) {
1255
- const text = extractText(chunk);
1256
- if (text) {
1257
- collectedText += text;
1258
- for (const frame of serializer.pushDelta(text)) {
1259
- yield frame;
1260
- }
1249
+ // Eagerly consume stream so errors fire synchronously and the
1250
+ // fallback loop in tryConfiguredClaudeFallbackChain can catch them.
1251
+ const frames = [];
1252
+ let collectedText = "";
1253
+ for (const frame of serializer.start()) {
1254
+ frames.push(frame);
1255
+ }
1256
+ for await (const chunk of streamResult.stream) {
1257
+ const text = extractText(chunk);
1258
+ if (text) {
1259
+ collectedText += text;
1260
+ for (const frame of serializer.pushDelta(text)) {
1261
+ frames.push(frame);
1261
1262
  }
1262
1263
  }
1263
- const toolCalls = streamResult.toolCalls ?? [];
1264
- if (!hasTranslatedOutput(collectedText, toolCalls)) {
1265
- throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1266
- }
1267
- if (toolCalls.length) {
1268
- for (const toolCall of toolCalls) {
1269
- const toolName = toolCall.toolName ??
1270
- toolCall.name ??
1271
- "unknown";
1272
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1273
- yield frame;
1274
- }
1264
+ }
1265
+ const toolCalls = streamResult.toolCalls ?? [];
1266
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
1267
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1268
+ }
1269
+ if (toolCalls.length) {
1270
+ for (const toolCall of toolCalls) {
1271
+ const toolName = toolCall.toolName ??
1272
+ toolCall.name ??
1273
+ "unknown";
1274
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1275
+ frames.push(frame);
1275
1276
  }
1276
1277
  }
1277
- const reason = streamResult.finishReason ?? "end_turn";
1278
- const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1279
- for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1280
- yield frame;
1281
- }
1282
1278
  }
1279
+ const reason = streamResult.finishReason ?? "end_turn";
1280
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1281
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1282
+ frames.push(frame);
1283
+ }
1284
+ // Telemetry AFTER validation — not before like the old lazy path
1283
1285
  tracer?.end(200, Date.now() - requestStartTime);
1284
1286
  recordFinalSuccess();
1285
- logFinalRequest(200, "", providerLabel);
1287
+ logFinalRequest(200, "", providerLabel, undefined, undefined, {
1288
+ inputTokens: resolvedUsage.input,
1289
+ outputTokens: resolvedUsage.output,
1290
+ });
1291
+ const bufferedBody = frames.join("");
1292
+ logProxyBody({
1293
+ phase: "client_response",
1294
+ headers: { "content-type": "text/event-stream" },
1295
+ body: bufferedBody,
1296
+ bodySize: Buffer.byteLength(bufferedBody, "utf8"),
1297
+ contentType: "text/event-stream",
1298
+ responseStatus: 200,
1299
+ durationMs: Date.now() - requestStartTime,
1300
+ });
1301
+ // Return generator that yields pre-buffered frames
1302
+ async function* sseGenerator() {
1303
+ for (const frame of frames) {
1304
+ yield frame;
1305
+ }
1306
+ }
1286
1307
  return sseGenerator();
1287
1308
  }
1288
1309
  const streamResult = await ctx.neurolink.stream(options);
@@ -1346,6 +1367,11 @@ async function tryConfiguredClaudeFallbackChain(args) {
1346
1367
  : "auto-provider";
1347
1368
  logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1348
1369
  }
1370
+ tracer?.setFallbackInfo({
1371
+ triggered: true,
1372
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1373
+ reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1374
+ });
1349
1375
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1350
1376
  if (!fallback.provider || !fallback.model) {
1351
1377
  continue;
@@ -1354,6 +1380,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1354
1380
  if (!availability.available) {
1355
1381
  logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
1356
1382
  }
1383
+ const fallbackStart = Date.now();
1357
1384
  try {
1358
1385
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1359
1386
  const options = buildProxyFallbackOptions(parsedFallbackRequest, {
@@ -1370,13 +1397,57 @@ async function tryConfiguredClaudeFallbackChain(args) {
1370
1397
  options: options,
1371
1398
  providerLabel: fallback.provider,
1372
1399
  });
1400
+ recordFallbackAttempt({
1401
+ provider: fallback.provider,
1402
+ model: fallback.model,
1403
+ status: "success",
1404
+ durationMs: Date.now() - fallbackStart,
1405
+ });
1406
+ tracer?.setFallbackInfo({
1407
+ triggered: true,
1408
+ provider: fallback.provider,
1409
+ model: fallback.model,
1410
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1411
+ reason: "fallback_success",
1412
+ });
1373
1413
  return {
1374
1414
  response,
1375
1415
  fallbackPolicyReason,
1376
1416
  };
1377
1417
  }
1378
1418
  catch (fallbackErr) {
1379
- logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1419
+ const errMsg = fallbackErr instanceof Error
1420
+ ? fallbackErr.message
1421
+ : String(fallbackErr);
1422
+ let errorClass = "unknown";
1423
+ if (errMsg.includes("Rate limit") ||
1424
+ errMsg.includes("rate_limit") ||
1425
+ errMsg.includes("max_parallel_requests")) {
1426
+ errorClass = "rate_limit";
1427
+ }
1428
+ else if (errMsg.includes("context length") ||
1429
+ errMsg.includes("ContextWindowExceeded")) {
1430
+ errorClass = "context_overflow";
1431
+ }
1432
+ else if (errMsg.includes("no content or tool calls") ||
1433
+ errMsg.includes("NoOutputGenerated")) {
1434
+ errorClass = "empty_response";
1435
+ }
1436
+ else if (errMsg.includes("thinking_level") ||
1437
+ errMsg.includes("Field required")) {
1438
+ errorClass = "schema_mismatch";
1439
+ }
1440
+ else if (errMsg.includes("Resource exhausted")) {
1441
+ errorClass = "provider_quota";
1442
+ }
1443
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed [${errorClass}]: ${errMsg}`);
1444
+ recordFallbackAttempt({
1445
+ provider: fallback.provider,
1446
+ model: fallback.model,
1447
+ status: "failure",
1448
+ errorMessage: `[${errorClass}] ${errMsg}`,
1449
+ durationMs: Date.now() - fallbackStart,
1450
+ });
1380
1451
  }
1381
1452
  }
1382
1453
  return {
@@ -3541,7 +3612,15 @@ function shouldOmitImagesForTarget(provider, model) {
3541
3612
  return provider === "litellm" && model === "open-large";
3542
3613
  }
3543
3614
  function shouldOmitThinkingConfigForTarget(provider, model) {
3544
- return provider === "vertex" && model === "gemini-2.5-flash";
3615
+ if (provider === "litellm") {
3616
+ return true;
3617
+ }
3618
+ if (provider !== "vertex") {
3619
+ return false;
3620
+ }
3621
+ // Only Gemini 2.5+ and 3.x support thinking_level on Vertex.
3622
+ const m = model?.toLowerCase() ?? "";
3623
+ return !/gemini-(2\.5|3)/.test(m);
3545
3624
  }
3546
3625
  function extractToolArgs(toolCall) {
3547
3626
  return (toolCall.args ??
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.50.0",
3
+ "version": "9.50.1",
4
4
  "packageManager": "pnpm@10.15.1",
5
5
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
6
6
  "author": {