@juspay/neurolink 9.49.0 → 9.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
356
356
  }
357
357
  catch (streamError) {
358
358
  if (NoOutputGeneratedError.isInstance(streamError)) {
359
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
- return;
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
360
+ throw streamError;
361
361
  }
362
362
  throw streamError;
363
363
  }
@@ -90,6 +90,13 @@ declare class ProxyTracer {
90
90
  * Sets span attributes and increments the substitution metric counter.
91
91
  */
92
92
  setModelSubstitution(requestedModel: string, actualModel: string): void;
93
+ setFallbackInfo(info: {
94
+ triggered: boolean;
95
+ provider?: string;
96
+ model?: string;
97
+ attemptCount: number;
98
+ reason: string;
99
+ }): void;
93
100
  /** Log the incoming client request body (redacted). */
94
101
  logRequestBody(body: string): void;
95
102
  /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
129
136
  */
130
137
  getTraceHeaders(): Record<string, string>;
131
138
  }
139
+ export declare function recordFallbackAttempt(attrs: {
140
+ provider: string;
141
+ model: string;
142
+ status: "success" | "failure";
143
+ errorMessage?: string;
144
+ durationMs: number;
145
+ }): void;
132
146
  export { ProxyTracer };
133
147
  export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };
@@ -79,6 +79,18 @@ function getProxyMetrics() {
79
79
  description: "Response body size in bytes received from upstream",
80
80
  unit: "By",
81
81
  }),
82
+ fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
83
+ description: "Total fallback provider attempts",
84
+ unit: "{attempt}",
85
+ }),
86
+ fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
87
+ description: "Total successful fallback provider responses",
88
+ unit: "{success}",
89
+ }),
90
+ fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
91
+ description: "Total failed fallback provider responses",
92
+ unit: "{failure}",
93
+ }),
82
94
  };
83
95
  _metrics = createdMetrics;
84
96
  return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
396
408
  actual_model: actualModel,
397
409
  });
398
410
  }
411
+ setFallbackInfo(info) {
412
+ if (!this.rootSpan) {
413
+ return;
414
+ }
415
+ this.rootSpan.setAttributes({
416
+ "proxy.fallback.triggered": info.triggered,
417
+ ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
418
+ ...(info.model ? { "proxy.fallback.model": info.model } : {}),
419
+ "proxy.fallback.attempt_count": info.attemptCount,
420
+ "proxy.fallback.reason": info.reason,
421
+ });
422
+ }
399
423
  // -------------------------------------------------------------------------
400
424
  // Log payloads as span events
401
425
  // -------------------------------------------------------------------------
@@ -641,5 +665,24 @@ class ProxyTracer {
641
665
  return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
642
666
  }
643
667
  }
668
+ export function recordFallbackAttempt(attrs) {
669
+ try {
670
+ const m = getProxyMetrics();
671
+ const labels = { provider: attrs.provider, model: attrs.model };
672
+ m.fallbackAttemptsTotal.add(1, labels);
673
+ if (attrs.status === "success") {
674
+ m.fallbackSuccessTotal.add(1, labels);
675
+ }
676
+ else {
677
+ m.fallbackFailureTotal.add(1, {
678
+ ...labels,
679
+ error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
680
+ });
681
+ }
682
+ }
683
+ catch {
684
+ // metrics are best-effort
685
+ }
686
+ }
644
687
  export { ProxyTracer };
645
688
  //# sourceMappingURL=proxyTracer.js.map
@@ -15,7 +15,7 @@ import { join } from "node:path";
15
15
  import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
16
16
  import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
17
17
  import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
18
- import { ProxyTracer } from "../../proxy/proxyTracer.js";
18
+ import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js";
19
19
  import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
@@ -1246,43 +1246,64 @@ async function executeClaudeFallbackTranslation(args) {
1246
1246
  if (body.stream) {
1247
1247
  const streamResult = await ctx.neurolink.stream(options);
1248
1248
  const serializer = new ClaudeStreamSerializer(body.model, 0);
1249
- async function* sseGenerator() {
1250
- for (const frame of serializer.start()) {
1251
- yield frame;
1252
- }
1253
- let collectedText = "";
1254
- for await (const chunk of streamResult.stream) {
1255
- const text = extractText(chunk);
1256
- if (text) {
1257
- collectedText += text;
1258
- for (const frame of serializer.pushDelta(text)) {
1259
- yield frame;
1260
- }
1249
+ // Eagerly consume stream so errors fire synchronously and the
1250
+ // fallback loop in tryConfiguredClaudeFallbackChain can catch them.
1251
+ const frames = [];
1252
+ let collectedText = "";
1253
+ for (const frame of serializer.start()) {
1254
+ frames.push(frame);
1255
+ }
1256
+ for await (const chunk of streamResult.stream) {
1257
+ const text = extractText(chunk);
1258
+ if (text) {
1259
+ collectedText += text;
1260
+ for (const frame of serializer.pushDelta(text)) {
1261
+ frames.push(frame);
1261
1262
  }
1262
1263
  }
1263
- const toolCalls = streamResult.toolCalls ?? [];
1264
- if (!hasTranslatedOutput(collectedText, toolCalls)) {
1265
- throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1266
- }
1267
- if (toolCalls.length) {
1268
- for (const toolCall of toolCalls) {
1269
- const toolName = toolCall.toolName ??
1270
- toolCall.name ??
1271
- "unknown";
1272
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1273
- yield frame;
1274
- }
1264
+ }
1265
+ const toolCalls = streamResult.toolCalls ?? [];
1266
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
1267
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1268
+ }
1269
+ if (toolCalls.length) {
1270
+ for (const toolCall of toolCalls) {
1271
+ const toolName = toolCall.toolName ??
1272
+ toolCall.name ??
1273
+ "unknown";
1274
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1275
+ frames.push(frame);
1275
1276
  }
1276
1277
  }
1277
- const reason = streamResult.finishReason ?? "end_turn";
1278
- const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1279
- for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1280
- yield frame;
1281
- }
1282
1278
  }
1279
+ const reason = streamResult.finishReason ?? "end_turn";
1280
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1281
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1282
+ frames.push(frame);
1283
+ }
1284
+ // Telemetry AFTER validation — not before like the old lazy path
1283
1285
  tracer?.end(200, Date.now() - requestStartTime);
1284
1286
  recordFinalSuccess();
1285
- logFinalRequest(200, "", providerLabel);
1287
+ logFinalRequest(200, "", providerLabel, undefined, undefined, {
1288
+ inputTokens: resolvedUsage.input,
1289
+ outputTokens: resolvedUsage.output,
1290
+ });
1291
+ const bufferedBody = frames.join("");
1292
+ logProxyBody({
1293
+ phase: "client_response",
1294
+ headers: { "content-type": "text/event-stream" },
1295
+ body: bufferedBody,
1296
+ bodySize: Buffer.byteLength(bufferedBody, "utf8"),
1297
+ contentType: "text/event-stream",
1298
+ responseStatus: 200,
1299
+ durationMs: Date.now() - requestStartTime,
1300
+ });
1301
+ // Return generator that yields pre-buffered frames
1302
+ async function* sseGenerator() {
1303
+ for (const frame of frames) {
1304
+ yield frame;
1305
+ }
1306
+ }
1286
1307
  return sseGenerator();
1287
1308
  }
1288
1309
  const streamResult = await ctx.neurolink.stream(options);
@@ -1346,6 +1367,11 @@ async function tryConfiguredClaudeFallbackChain(args) {
1346
1367
  : "auto-provider";
1347
1368
  logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1348
1369
  }
1370
+ tracer?.setFallbackInfo({
1371
+ triggered: true,
1372
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1373
+ reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1374
+ });
1349
1375
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1350
1376
  if (!fallback.provider || !fallback.model) {
1351
1377
  continue;
@@ -1354,6 +1380,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1354
1380
  if (!availability.available) {
1355
1381
  logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
1356
1382
  }
1383
+ const fallbackStart = Date.now();
1357
1384
  try {
1358
1385
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1359
1386
  const options = buildProxyFallbackOptions(parsedFallbackRequest, {
@@ -1370,13 +1397,57 @@ async function tryConfiguredClaudeFallbackChain(args) {
1370
1397
  options: options,
1371
1398
  providerLabel: fallback.provider,
1372
1399
  });
1400
+ recordFallbackAttempt({
1401
+ provider: fallback.provider,
1402
+ model: fallback.model,
1403
+ status: "success",
1404
+ durationMs: Date.now() - fallbackStart,
1405
+ });
1406
+ tracer?.setFallbackInfo({
1407
+ triggered: true,
1408
+ provider: fallback.provider,
1409
+ model: fallback.model,
1410
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1411
+ reason: "fallback_success",
1412
+ });
1373
1413
  return {
1374
1414
  response,
1375
1415
  fallbackPolicyReason,
1376
1416
  };
1377
1417
  }
1378
1418
  catch (fallbackErr) {
1379
- logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1419
+ const errMsg = fallbackErr instanceof Error
1420
+ ? fallbackErr.message
1421
+ : String(fallbackErr);
1422
+ let errorClass = "unknown";
1423
+ if (errMsg.includes("Rate limit") ||
1424
+ errMsg.includes("rate_limit") ||
1425
+ errMsg.includes("max_parallel_requests")) {
1426
+ errorClass = "rate_limit";
1427
+ }
1428
+ else if (errMsg.includes("context length") ||
1429
+ errMsg.includes("ContextWindowExceeded")) {
1430
+ errorClass = "context_overflow";
1431
+ }
1432
+ else if (errMsg.includes("no content or tool calls") ||
1433
+ errMsg.includes("NoOutputGenerated")) {
1434
+ errorClass = "empty_response";
1435
+ }
1436
+ else if (errMsg.includes("thinking_level") ||
1437
+ errMsg.includes("Field required")) {
1438
+ errorClass = "schema_mismatch";
1439
+ }
1440
+ else if (errMsg.includes("Resource exhausted")) {
1441
+ errorClass = "provider_quota";
1442
+ }
1443
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed [${errorClass}]: ${errMsg}`);
1444
+ recordFallbackAttempt({
1445
+ provider: fallback.provider,
1446
+ model: fallback.model,
1447
+ status: "failure",
1448
+ errorMessage: `[${errorClass}] ${errMsg}`,
1449
+ durationMs: Date.now() - fallbackStart,
1450
+ });
1380
1451
  }
1381
1452
  }
1382
1453
  return {
@@ -3541,7 +3612,15 @@ function shouldOmitImagesForTarget(provider, model) {
3541
3612
  return provider === "litellm" && model === "open-large";
3542
3613
  }
3543
3614
  function shouldOmitThinkingConfigForTarget(provider, model) {
3544
- return provider === "vertex" && model === "gemini-2.5-flash";
3615
+ if (provider === "litellm") {
3616
+ return true;
3617
+ }
3618
+ if (provider !== "vertex") {
3619
+ return false;
3620
+ }
3621
+ // Only Gemini 2.5+ and 3.x support thinking_level on Vertex.
3622
+ const m = model?.toLowerCase() ?? "";
3623
+ return !/gemini-(2\.5|3)/.test(m);
3545
3624
  }
3546
3625
  function extractToolArgs(toolCall) {
3547
3626
  return (toolCall.args ??
@@ -0,0 +1,45 @@
1
+ import type { ProviderName } from "../types/providers.js";
2
+ declare const SUPPORTED_FORMATS: readonly ["jpeg", "png", "webp"];
3
+ type SupportedFormat = (typeof SUPPORTED_FORMATS)[number];
4
+ /**
5
+ * Provider-specific image size limits in bytes
6
+ */
7
+ export declare const PROVIDER_IMAGE_LIMITS: Record<ProviderName, number>;
8
+ export interface CompressionOptions {
9
+ provider: ProviderName;
10
+ quality?: number;
11
+ maxDimension?: number;
12
+ format?: SupportedFormat;
13
+ }
14
+ export interface CompressionResult {
15
+ buffer: Buffer;
16
+ originalSize: number;
17
+ compressedSize: number;
18
+ compressionRatio: number;
19
+ metadata: {
20
+ width: number;
21
+ height: number;
22
+ format: string;
23
+ };
24
+ }
25
+ /**
26
+ * Compress an image to meet provider-specific size limits
27
+ * @param imageBuffer - Input image buffer
28
+ * @param options - Compression options including provider name
29
+ * @returns Compressed image buffer with metadata
30
+ */
31
+ export declare function compressImage(imageBuffer: Buffer, options: CompressionOptions): Promise<CompressionResult>;
32
+ /**
33
+ * Check if an image needs compression for a specific provider
34
+ * @param imageBuffer - Input image buffer
35
+ * @param provider - AI provider name
36
+ * @returns True if compression is needed
37
+ */
38
+ export declare function needsCompression(imageBuffer: Buffer, provider: ProviderName): boolean;
39
+ /**
40
+ * Get the size limit for a specific provider
41
+ * @param provider - AI provider name
42
+ * @returns Size limit in bytes
43
+ */
44
+ export declare function getProviderSizeLimit(provider: ProviderName): number;
45
+ export {};
@@ -0,0 +1,137 @@
1
+ import sharp from "sharp";
2
+ import { withTimeout } from "./async/index.js";
3
+ const SUPPORTED_FORMATS = ["jpeg", "png", "webp"];
4
+ const IMAGE_COMPRESSION_TIMEOUT_MS = 30_000;
5
+ /**
6
+ * Provider-specific image size limits in bytes
7
+ */
8
+ export const PROVIDER_IMAGE_LIMITS = {
9
+ openai: 20 * 1024 * 1024, // 20MB
10
+ "openai-compatible": 20 * 1024 * 1024, // 20MB (same as OpenAI)
11
+ anthropic: 5 * 1024 * 1024, // 5MB
12
+ "google-ai": 4 * 1024 * 1024, // 4MB
13
+ vertex: 4 * 1024 * 1024, // 4MB
14
+ bedrock: 5 * 1024 * 1024, // 5MB
15
+ azure: 20 * 1024 * 1024, // 20MB
16
+ mistral: 5 * 1024 * 1024, // 5MB
17
+ huggingface: 10 * 1024 * 1024, // 10MB
18
+ ollama: 100 * 1024 * 1024, // 100MB (local, no strict limit)
19
+ openrouter: 20 * 1024 * 1024, // 20MB
20
+ sagemaker: 5 * 1024 * 1024, // 5MB
21
+ litellm: 20 * 1024 * 1024, // 20MB (proxy, use OpenAI default)
22
+ auto: 5 * 1024 * 1024, // 5MB (conservative fallback)
23
+ };
24
+ /**
25
+ * Compress an image to meet provider-specific size limits
26
+ * @param imageBuffer - Input image buffer
27
+ * @param options - Compression options including provider name
28
+ * @returns Compressed image buffer with metadata
29
+ */
30
+ export async function compressImage(imageBuffer, options) {
31
+ const { provider, quality = 80, maxDimension, format } = options;
32
+ const sizeLimit = PROVIDER_IMAGE_LIMITS[provider];
33
+ const originalSize = imageBuffer.length;
34
+ // Get original metadata
35
+ const image = sharp(imageBuffer);
36
+ const metadata = await withTimeout(image.metadata(), IMAGE_COMPRESSION_TIMEOUT_MS, "Timed out reading image metadata");
37
+ if (!metadata.width || !metadata.height) {
38
+ throw new Error("Unable to read image dimensions");
39
+ }
40
+ // If image is already under limit and no format conversion needed, return as-is
41
+ if (originalSize <= sizeLimit && !format && !maxDimension) {
42
+ return {
43
+ buffer: imageBuffer,
44
+ originalSize,
45
+ compressedSize: originalSize,
46
+ compressionRatio: 1,
47
+ metadata: {
48
+ width: metadata.width,
49
+ height: metadata.height,
50
+ format: metadata.format ?? "unknown",
51
+ },
52
+ };
53
+ }
54
+ // Prepare compression pipeline
55
+ let pipeline = sharp(imageBuffer);
56
+ // Resize if needed
57
+ if (maxDimension) {
58
+ const needsResize = metadata.width > maxDimension || metadata.height > maxDimension;
59
+ if (needsResize) {
60
+ pipeline = pipeline.resize(maxDimension, maxDimension, {
61
+ fit: "inside",
62
+ withoutEnlargement: true,
63
+ });
64
+ }
65
+ }
66
+ // Resolve target format — validate metadata.format against supported set
67
+ const rawFormat = metadata.format;
68
+ const targetFormat = format ??
69
+ (SUPPORTED_FORMATS.includes(rawFormat)
70
+ ? rawFormat
71
+ : "jpeg");
72
+ const applyFormat = (p, q) => {
73
+ switch (targetFormat) {
74
+ case "jpeg":
75
+ return p.jpeg({ quality: q, mozjpeg: true });
76
+ case "png":
77
+ return p.png({ quality: q, compressionLevel: 9 });
78
+ case "webp":
79
+ return p.webp({ quality: q });
80
+ }
81
+ };
82
+ // Compress
83
+ let compressedBuffer = await withTimeout(applyFormat(pipeline, quality).toBuffer(), IMAGE_COMPRESSION_TIMEOUT_MS, "Timed out compressing image");
84
+ let currentQuality = quality;
85
+ // Iteratively reduce quality if still over limit
86
+ // Note: the sharp pipeline must be rebuilt on each iteration because
87
+ // sharp does not support modifying quality settings after creation.
88
+ while (compressedBuffer.length > sizeLimit && currentQuality > 10) {
89
+ currentQuality -= 10;
90
+ let p = sharp(imageBuffer);
91
+ if (maxDimension) {
92
+ p = p.resize(maxDimension, maxDimension, {
93
+ fit: "inside",
94
+ withoutEnlargement: true,
95
+ });
96
+ }
97
+ compressedBuffer = await withTimeout(applyFormat(p, currentQuality).toBuffer(), IMAGE_COMPRESSION_TIMEOUT_MS, "Timed out compressing image");
98
+ }
99
+ // Final check
100
+ if (compressedBuffer.length > sizeLimit) {
101
+ throw new Error(`Unable to compress image to ${sizeLimit} bytes for provider ${provider}. ` +
102
+ `Final size: ${compressedBuffer.length} bytes. ` +
103
+ `Try using a smaller image or lower maxDimension.`);
104
+ }
105
+ // Get final metadata
106
+ const finalMetadata = await withTimeout(sharp(compressedBuffer).metadata(), IMAGE_COMPRESSION_TIMEOUT_MS, "Timed out reading compressed image metadata");
107
+ return {
108
+ buffer: compressedBuffer,
109
+ originalSize,
110
+ compressedSize: compressedBuffer.length,
111
+ compressionRatio: originalSize / compressedBuffer.length,
112
+ metadata: {
113
+ width: finalMetadata.width ?? 0,
114
+ height: finalMetadata.height ?? 0,
115
+ format: targetFormat,
116
+ },
117
+ };
118
+ }
119
+ /**
120
+ * Check if an image needs compression for a specific provider
121
+ * @param imageBuffer - Input image buffer
122
+ * @param provider - AI provider name
123
+ * @returns True if compression is needed
124
+ */
125
+ export function needsCompression(imageBuffer, provider) {
126
+ const sizeLimit = PROVIDER_IMAGE_LIMITS[provider];
127
+ return imageBuffer.length > sizeLimit;
128
+ }
129
+ /**
130
+ * Get the size limit for a specific provider
131
+ * @param provider - AI provider name
132
+ * @returns Size limit in bytes
133
+ */
134
+ export function getProviderSizeLimit(provider) {
135
+ return PROVIDER_IMAGE_LIMITS[provider];
136
+ }
137
+ //# sourceMappingURL=imageCompressor.js.map
@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
356
356
  }
357
357
  catch (streamError) {
358
358
  if (NoOutputGeneratedError.isInstance(streamError)) {
359
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
- return;
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
360
+ throw streamError;
361
361
  }
362
362
  throw streamError;
363
363
  }
@@ -90,6 +90,13 @@ declare class ProxyTracer {
90
90
  * Sets span attributes and increments the substitution metric counter.
91
91
  */
92
92
  setModelSubstitution(requestedModel: string, actualModel: string): void;
93
+ setFallbackInfo(info: {
94
+ triggered: boolean;
95
+ provider?: string;
96
+ model?: string;
97
+ attemptCount: number;
98
+ reason: string;
99
+ }): void;
93
100
  /** Log the incoming client request body (redacted). */
94
101
  logRequestBody(body: string): void;
95
102
  /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
129
136
  */
130
137
  getTraceHeaders(): Record<string, string>;
131
138
  }
139
+ export declare function recordFallbackAttempt(attrs: {
140
+ provider: string;
141
+ model: string;
142
+ status: "success" | "failure";
143
+ errorMessage?: string;
144
+ durationMs: number;
145
+ }): void;
132
146
  export { ProxyTracer };
133
147
  export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };
@@ -79,6 +79,18 @@ function getProxyMetrics() {
79
79
  description: "Response body size in bytes received from upstream",
80
80
  unit: "By",
81
81
  }),
82
+ fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
83
+ description: "Total fallback provider attempts",
84
+ unit: "{attempt}",
85
+ }),
86
+ fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
87
+ description: "Total successful fallback provider responses",
88
+ unit: "{success}",
89
+ }),
90
+ fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
91
+ description: "Total failed fallback provider responses",
92
+ unit: "{failure}",
93
+ }),
82
94
  };
83
95
  _metrics = createdMetrics;
84
96
  return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
396
408
  actual_model: actualModel,
397
409
  });
398
410
  }
411
+ setFallbackInfo(info) {
412
+ if (!this.rootSpan) {
413
+ return;
414
+ }
415
+ this.rootSpan.setAttributes({
416
+ "proxy.fallback.triggered": info.triggered,
417
+ ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
418
+ ...(info.model ? { "proxy.fallback.model": info.model } : {}),
419
+ "proxy.fallback.attempt_count": info.attemptCount,
420
+ "proxy.fallback.reason": info.reason,
421
+ });
422
+ }
399
423
  // -------------------------------------------------------------------------
400
424
  // Log payloads as span events
401
425
  // -------------------------------------------------------------------------
@@ -641,4 +665,23 @@ class ProxyTracer {
641
665
  return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
642
666
  }
643
667
  }
668
+ export function recordFallbackAttempt(attrs) {
669
+ try {
670
+ const m = getProxyMetrics();
671
+ const labels = { provider: attrs.provider, model: attrs.model };
672
+ m.fallbackAttemptsTotal.add(1, labels);
673
+ if (attrs.status === "success") {
674
+ m.fallbackSuccessTotal.add(1, labels);
675
+ }
676
+ else {
677
+ m.fallbackFailureTotal.add(1, {
678
+ ...labels,
679
+ error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
680
+ });
681
+ }
682
+ }
683
+ catch {
684
+ // metrics are best-effort
685
+ }
686
+ }
644
687
  export { ProxyTracer };