npm - @juspay/neurolink - Versions diffs - 9.50.0 → 9.50.2 - Mend

@juspay/neurolink 9.50.0 → 9.50.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +292 -292
package/dist/context/contextCompactor.js +2 -2
package/dist/context/stages/slidingWindowTruncator.d.ts +1 -1
package/dist/context/stages/slidingWindowTruncator.js +3 -3
package/dist/core/modules/Utilities.d.ts +5 -0
package/dist/core/modules/Utilities.js +29 -18
package/dist/lib/context/contextCompactor.js +2 -2
package/dist/lib/context/stages/slidingWindowTruncator.d.ts +1 -1
package/dist/lib/context/stages/slidingWindowTruncator.js +3 -3
package/dist/lib/core/modules/Utilities.d.ts +5 -0
package/dist/lib/core/modules/Utilities.js +29 -18
package/dist/lib/mcp/externalServerManager.d.ts +5 -0
package/dist/lib/mcp/externalServerManager.js +24 -2
package/dist/lib/neurolink.js +37 -3
package/dist/lib/providers/litellm.js +2 -2
package/dist/lib/proxy/proxyTracer.d.ts +14 -0
package/dist/lib/proxy/proxyTracer.js +43 -0
package/dist/lib/server/routes/claudeProxyRoutes.js +112 -33
package/dist/lib/services/server/ai/observability/instrumentation.js +39 -1
package/dist/lib/types/externalMcp.d.ts +7 -0
package/dist/mcp/externalServerManager.d.ts +5 -0
package/dist/mcp/externalServerManager.js +24 -2
package/dist/neurolink.js +37 -3
package/dist/providers/litellm.js +2 -2
package/dist/proxy/proxyTracer.d.ts +14 -0
package/dist/proxy/proxyTracer.js +43 -0
package/dist/server/routes/claudeProxyRoutes.js +112 -33
package/dist/services/server/ai/observability/instrumentation.js +39 -1
package/dist/types/externalMcp.d.ts +7 -0
package/package.json +1 -1

package/dist/context/contextCompactor.js CHANGED Viewed

@@ -23,7 +23,7 @@ const DEFAULT_CONFIG = {
     enableSummarize: true,
     enableTruncate: true,
     pruneProtectTokens: 40_000,
-    pruneMinimumSavings: 20_000,
+    pruneMinimumSavings: 500,
     pruneProtectedTools: ["skill"],
     summarizationProvider: "vertex",
     summarizationModel: "gemini-2.5-flash",
@@ -151,7 +151,7 @@ export class ContextCompactor {
                     targetTokens: targetTokens,
                     provider: provider,
                     adaptiveBuffer: 0.15,
-                    maxIterations: 3,
+                    maxIterations: 6,
                 });
                 if (truncResult.truncated) {
                     currentMessages = truncResult.messages;

package/dist/context/stages/slidingWindowTruncator.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * Features:
  * - Adaptive truncation (PERF-001): calculates fraction from actual overage
- *   instead of fixed 50%, with iterative refinement up to 3 passes.
+ *   instead of fixed 50%, with iterative refinement up to 6 passes.
  * - Small conversation handling (BUG-005): for <= 4 messages, truncates
  *   message content proportionally instead of returning no-op.
  */

package/dist/context/stages/slidingWindowTruncator.js CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * Features:
  * - Adaptive truncation (PERF-001): calculates fraction from actual overage
- *   instead of fixed 50%, with iterative refinement up to 3 passes.
+ *   instead of fixed 50%, with iterative refinement up to 6 passes.
  * - Small conversation handling (BUG-005): for <= 4 messages, truncates
  *   message content proportionally instead of returning no-op.
  */
@@ -153,8 +153,8 @@ export function truncateWithSlidingWindow(messages, config) {
                     messagesRemoved: evenRemoveCount,
                 };
             }
-            // Not enough -- increase fraction by 25% for next iteration
-            currentFraction = Math.min(0.95, currentFraction + 0.25);
+            // Not enough -- increase fraction by 10% for finer-grained escalation
+            currentFraction = Math.min(0.95, currentFraction + 0.1);
             continue;
         }
         // No token targets -- single-pass with calculated fraction

package/dist/core/modules/Utilities.d.ts CHANGED Viewed

@@ -57,6 +57,11 @@ export declare class Utilities {
      * Supports number or string formats (e.g., '30s', '2m', '1h')
      */
     getTimeout(options: TextGenerationOptions | StreamOptions): number;
+    /**
+     * Get timeout scaled by estimated input token count.
+     * For large contexts (>100K tokens), increase timeout proportionally.
+     */
+    getContextAwareTimeout(options: TextGenerationOptions | StreamOptions, estimatedTokens?: number): number;
     /**
      * Check if a schema is a Zod schema
      */

package/dist/core/modules/Utilities.js CHANGED Viewed

@@ -19,7 +19,7 @@
 import { z } from "zod";
 import { logger } from "../../utils/logger.js";
 import { getSafeMaxTokens } from "../../utils/tokenLimits.js";
-import { TimeoutError } from "../../utils/timeout.js";
+import { TimeoutError, getDefaultTimeout, parseTimeout, } from "../../utils/timeout.js";
 import { validateStreamOptions as validateStreamOpts, validateTextGenerationOptions, ValidationError, createValidationSummary, } from "../../utils/parameterValidation.js";
 import { STEP_LIMITS } from "../constants.js";
 /**
@@ -157,25 +157,36 @@ export class Utilities {
      * Supports number or string formats (e.g., '30s', '2m', '1h')
      */
     getTimeout(options) {
-        if (!options.timeout) {
-            return this.defaultTimeout;
-        }
-        if (typeof options.timeout === "number") {
-            return options.timeout;
-        }
-        // Parse string timeout (e.g., '30s', '2m', '1h')
-        const timeoutStr = options.timeout.toLowerCase();
-        const value = parseInt(timeoutStr);
-        if (timeoutStr.includes("h")) {
-            return value * 60 * 60 * 1000;
-        }
-        else if (timeoutStr.includes("m")) {
-            return value * 60 * 1000;
+        // If caller specified a timeout, use it (supports number ms and string formats)
+        if (options.timeout !== undefined && options.timeout !== null) {
+            const parsed = parseTimeout(options.timeout);
+            if (parsed !== undefined) {
+                return parsed;
+            }
         }
-        else if (timeoutStr.includes("s")) {
-            return value * 1000;
+        // Use per-provider default (e.g., vertex=60s, ollama=5m) instead of global 30s.
+        // Always use "generate" operation here — streaming operations have their own
+        // longer timeout (DEFAULT_TIMEOUTS.streaming = 2m) applied by the streaming
+        // infrastructure in BaseProvider.stream(). Both TextGenerationOptions and
+        // StreamOptions share the same `input` property, so there is no reliable
+        // discriminator to detect streaming at this level.
+        const providerDefault = parseTimeout(getDefaultTimeout(this.providerName, "generate"));
+        return providerDefault ?? this.defaultTimeout;
+    }
+    /**
+     * Get timeout scaled by estimated input token count.
+     * For large contexts (>100K tokens), increase timeout proportionally.
+     */
+    getContextAwareTimeout(options, estimatedTokens) {
+        const baseTimeout = this.getTimeout(options);
+        if (!estimatedTokens || estimatedTokens <= 100_000) {
+            return baseTimeout;
         }
-        return this.defaultTimeout;
+        // Scale: >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x)
+        // Use (estimatedTokens - 1) so exact multiples stay in the lower tier
+        // (e.g., 100_000 → 1x, 100_001 → 1.5x)
+        const scale = 1 + Math.floor((estimatedTokens - 1) / 100_000) * 0.5;
+        return Math.round(baseTimeout * Math.min(scale, 4));
     }
     /**
      * Check if a schema is a Zod schema

package/dist/lib/context/contextCompactor.js CHANGED Viewed

@@ -23,7 +23,7 @@ const DEFAULT_CONFIG = {
     enableSummarize: true,
     enableTruncate: true,
     pruneProtectTokens: 40_000,
-    pruneMinimumSavings: 20_000,
+    pruneMinimumSavings: 500,
     pruneProtectedTools: ["skill"],
     summarizationProvider: "vertex",
     summarizationModel: "gemini-2.5-flash",
@@ -151,7 +151,7 @@ export class ContextCompactor {
                     targetTokens: targetTokens,
                     provider: provider,
                     adaptiveBuffer: 0.15,
-                    maxIterations: 3,
+                    maxIterations: 6,
                 });
                 if (truncResult.truncated) {
                     currentMessages = truncResult.messages;

package/dist/lib/context/stages/slidingWindowTruncator.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * Features:
  * - Adaptive truncation (PERF-001): calculates fraction from actual overage
- *   instead of fixed 50%, with iterative refinement up to 3 passes.
+ *   instead of fixed 50%, with iterative refinement up to 6 passes.
  * - Small conversation handling (BUG-005): for <= 4 messages, truncates
  *   message content proportionally instead of returning no-op.
  */

package/dist/lib/context/stages/slidingWindowTruncator.js CHANGED Viewed

@@ -7,7 +7,7 @@
  *
  * Features:
  * - Adaptive truncation (PERF-001): calculates fraction from actual overage
- *   instead of fixed 50%, with iterative refinement up to 3 passes.
+ *   instead of fixed 50%, with iterative refinement up to 6 passes.
  * - Small conversation handling (BUG-005): for <= 4 messages, truncates
  *   message content proportionally instead of returning no-op.
  */
@@ -153,8 +153,8 @@ export function truncateWithSlidingWindow(messages, config) {
                     messagesRemoved: evenRemoveCount,
                 };
             }
-            // Not enough -- increase fraction by 25% for next iteration
-            currentFraction = Math.min(0.95, currentFraction + 0.25);
+            // Not enough -- increase fraction by 10% for finer-grained escalation
+            currentFraction = Math.min(0.95, currentFraction + 0.1);
             continue;
         }
         // No token targets -- single-pass with calculated fraction

package/dist/lib/core/modules/Utilities.d.ts CHANGED Viewed

@@ -57,6 +57,11 @@ export declare class Utilities {
      * Supports number or string formats (e.g., '30s', '2m', '1h')
      */
     getTimeout(options: TextGenerationOptions | StreamOptions): number;
+    /**
+     * Get timeout scaled by estimated input token count.
+     * For large contexts (>100K tokens), increase timeout proportionally.
+     */
+    getContextAwareTimeout(options: TextGenerationOptions | StreamOptions, estimatedTokens?: number): number;
     /**
      * Check if a schema is a Zod schema
      */

package/dist/lib/core/modules/Utilities.js CHANGED Viewed

@@ -19,7 +19,7 @@
 import { z } from "zod";
 import { logger } from "../../utils/logger.js";
 import { getSafeMaxTokens } from "../../utils/tokenLimits.js";
-import { TimeoutError } from "../../utils/timeout.js";
+import { TimeoutError, getDefaultTimeout, parseTimeout, } from "../../utils/timeout.js";
 import { validateStreamOptions as validateStreamOpts, validateTextGenerationOptions, ValidationError, createValidationSummary, } from "../../utils/parameterValidation.js";
 import { STEP_LIMITS } from "../constants.js";
 /**
@@ -157,25 +157,36 @@ export class Utilities {
      * Supports number or string formats (e.g., '30s', '2m', '1h')
      */
     getTimeout(options) {
-        if (!options.timeout) {
-            return this.defaultTimeout;
-        }
-        if (typeof options.timeout === "number") {
-            return options.timeout;
-        }
-        // Parse string timeout (e.g., '30s', '2m', '1h')
-        const timeoutStr = options.timeout.toLowerCase();
-        const value = parseInt(timeoutStr);
-        if (timeoutStr.includes("h")) {
-            return value * 60 * 60 * 1000;
-        }
-        else if (timeoutStr.includes("m")) {
-            return value * 60 * 1000;
+        // If caller specified a timeout, use it (supports number ms and string formats)
+        if (options.timeout !== undefined && options.timeout !== null) {
+            const parsed = parseTimeout(options.timeout);
+            if (parsed !== undefined) {
+                return parsed;
+            }
         }
-        else if (timeoutStr.includes("s")) {
-            return value * 1000;
+        // Use per-provider default (e.g., vertex=60s, ollama=5m) instead of global 30s.
+        // Always use "generate" operation here — streaming operations have their own
+        // longer timeout (DEFAULT_TIMEOUTS.streaming = 2m) applied by the streaming
+        // infrastructure in BaseProvider.stream(). Both TextGenerationOptions and
+        // StreamOptions share the same `input` property, so there is no reliable
+        // discriminator to detect streaming at this level.
+        const providerDefault = parseTimeout(getDefaultTimeout(this.providerName, "generate"));
+        return providerDefault ?? this.defaultTimeout;
+    }
+    /**
+     * Get timeout scaled by estimated input token count.
+     * For large contexts (>100K tokens), increase timeout proportionally.
+     */
+    getContextAwareTimeout(options, estimatedTokens) {
+        const baseTimeout = this.getTimeout(options);
+        if (!estimatedTokens || estimatedTokens <= 100_000) {
+            return baseTimeout;
         }
-        return this.defaultTimeout;
+        // Scale: >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x)
+        // Use (estimatedTokens - 1) so exact multiples stay in the lower tier
+        // (e.g., 100_000 → 1x, 100_001 → 1.5x)
+        const scale = 1 + Math.floor((estimatedTokens - 1) / 100_000) * 0.5;
+        return Math.round(baseTimeout * Math.min(scale, 4));
     }
     /**
      * Check if a schema is a Zod schema

package/dist/lib/mcp/externalServerManager.d.ts CHANGED Viewed

@@ -36,6 +36,11 @@ export declare class ExternalServerManager extends EventEmitter {
      * Get current HITL manager
      */
     getHITLManager(): HITLManager | undefined;
+    /**
+     * Resolve the human-readable server name for an event payload.
+     * Falls back to serverId if the instance or config.name isn't available.
+     */
+    getServerName(serverId: string): string;
     /**
      * Load MCP server configurations from .mcp-config.json file with parallel loading support
      * Automatically registers servers found in the configuration

package/dist/lib/mcp/externalServerManager.js CHANGED Viewed

@@ -194,10 +194,16 @@ export class ExternalServerManager extends EventEmitter {
         this.toolDiscovery = new ToolDiscoveryService();
         // Forward tool discovery events
         this.toolDiscovery.on("toolRegistered", (event) => {
-            this.emit("toolDiscovered", event);
+            this.emit("toolDiscovered", {
+                ...event,
+                serverName: this.getServerName(event.serverId),
+            });
         });
         this.toolDiscovery.on("toolUnregistered", (event) => {
-            this.emit("toolRemoved", event);
+            this.emit("toolRemoved", {
+                ...event,
+                serverName: this.getServerName(event.serverId),
+            });
         });
         // Handle process cleanup
         process.on("SIGINT", () => this.shutdown());
@@ -223,6 +229,14 @@ export class ExternalServerManager extends EventEmitter {
     getHITLManager() {
         return this.hitlManager;
     }
+    /**
+     * Resolve the human-readable server name for an event payload.
+     * Falls back to serverId if the instance or config.name isn't available.
+     */
+    getServerName(serverId) {
+        const instance = this.servers.get(serverId);
+        return instance?.config?.name || serverId;
+    }
     /**
      * Load MCP server configurations from .mcp-config.json file with parallel loading support
      * Automatically registers servers found in the configuration
@@ -712,6 +726,8 @@ export class ExternalServerManager extends EventEmitter {
                 };
             }
             mcpLogger.info(`[ExternalServerManager] Removing server: ${serverId}`);
+            // Capture name before deletion removes the instance
+            const serverName = this.getServerName(serverId);
             // Stop the server
             await this.stopServer(serverId);
             // Remove from registry
@@ -719,6 +735,7 @@ export class ExternalServerManager extends EventEmitter {
             // Emit event
             this.emit("disconnected", {
                 serverId,
+                serverName,
                 reason: "Manually removed",
                 timestamp: new Date(),
             });
@@ -816,6 +833,7 @@ export class ExternalServerManager extends EventEmitter {
             // Emit connected event
             this.emit("connected", {
                 serverId,
+                serverName: this.getServerName(serverId),
                 toolCount: instance.toolsMap.size,
                 timestamp: new Date(),
             });
@@ -921,6 +939,7 @@ export class ExternalServerManager extends EventEmitter {
         // Emit status change event
         this.emit("statusChanged", {
             serverId,
+            serverName: this.getServerName(serverId),
             oldStatus,
             newStatus,
             timestamp: new Date(),
@@ -941,6 +960,7 @@ export class ExternalServerManager extends EventEmitter {
         // Emit failed event
         this.emit("failed", {
             serverId,
+            serverName: this.getServerName(serverId),
             error: error.message,
             timestamp: new Date(),
         });
@@ -965,6 +985,7 @@ export class ExternalServerManager extends EventEmitter {
         // Emit disconnected event
         this.emit("disconnected", {
             serverId,
+            serverName: this.getServerName(serverId),
             reason,
             timestamp: new Date(),
         });
@@ -1078,6 +1099,7 @@ export class ExternalServerManager extends EventEmitter {
             // Emit health check event
             this.emit("healthCheck", {
                 serverId,
+                serverName: this.getServerName(serverId),
                 health,
                 timestamp: new Date(),
             });

package/dist/lib/neurolink.js CHANGED Viewed

@@ -50,7 +50,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
 import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
 import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
 import { SpanSerializer } from "./observability/utils/spanSerializer.js";
-import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
+import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
 import { TaskManager } from "./tasks/taskManager.js";
 import { createTaskTools } from "./tasks/tools/taskTools.js";
 import { ATTR } from "./telemetry/attributes.js";
@@ -1129,7 +1129,10 @@ Current user's request: ${currentInput}`;
      * Supports additional users with per-user prompt and maxWords overrides.
      */
     storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
-        setImmediate(async () => {
+        // Preserve AsyncLocalStorage context across setImmediate boundary so that
+        // memory writes appear under the originating Langfuse trace instead of
+        // becoming orphan spans.
+        const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
             try {
                 const client = this.ensureMemoryReady();
                 if (!client) {
@@ -1145,12 +1148,18 @@ Current user's request: ${currentInput}`;
                         : undefined;
                     writeOps.push(client.add(user.userId, content, addOptions));
                 }
-                await Promise.all(writeOps);
+                // withTimeout races against Promise.all — if the timeout fires, the
+                // await resolves with an error but the underlying client.add() calls
+                // may still complete in the background. This is acceptable: the memory
+                // client API (Mem0) doesn't support AbortSignal, and these are
+                // fire-and-forget background writes where a stale completion is harmless.
+                await withTimeout(Promise.all(writeOps), 30_000, new Error("Background memory write timed out after 30s"));
             }
             catch (error) {
                 logger.warn("Memory storage failed:", error);
             }
         });
+        setImmediate(wrappedMemoryWrite);
     }
     /**
      * Set up HITL event forwarding to main emitter
@@ -3723,6 +3732,21 @@ Current user's request: ${currentInput}`;
             conversationMessageCount: conversationMessages.length,
             shouldCompact: budgetResult.shouldCompact,
         });
+        // Scale timeout for large contexts if caller didn't set one explicitly.
+        // Providers read options.timeout via getTimeout(), so setting it here
+        // propagates to any downstream provider call.
+        if (options.timeout === undefined &&
+            budgetResult.estimatedInputTokens > 100_000) {
+            // >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x) of 60s base
+            const scale = 1 + Math.floor((budgetResult.estimatedInputTokens - 1) / 100_000) * 0.5;
+            const scaledMs = Math.round(60_000 * Math.min(scale, 4));
+            options.timeout = scaledMs;
+            logger.info("[TokenBudget] Scaled timeout for large context", {
+                requestId,
+                estimatedTokens: budgetResult.estimatedInputTokens,
+                scaledTimeoutMs: scaledMs,
+            });
+        }
         const compactionSessionId = this.getCompactionSessionId(options);
         const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
         if (!budgetResult.shouldCompact ||
@@ -3798,6 +3822,8 @@ Current user's request: ${currentInput}`;
             toolDefinitions: availableTools,
         });
         if (!finalBudget.withinBudget) {
+            // Clear watermark so handleContextOverflow recovery can re-compact
+            this.lastCompactionMessageCount.delete(compactionSessionId);
             throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
                 `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                 `Budget: ${finalBudget.availableInputTokens} tokens. ` +
@@ -3993,6 +4019,8 @@ Current user's request: ${currentInput}`;
                                 : undefined,
                         });
                         if (!finalBudget.withinBudget) {
+                            // Clear watermark so handleContextOverflow recovery can re-compact
+                            this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
                             throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
                                 `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                                 `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5016,6 +5044,8 @@ Current user's request: ${currentInput}`;
                     toolDefinitions: availableTools,
                 });
                 if (!finalBudget.withinBudget) {
+                    // Clear watermark so handleContextOverflow recovery can re-compact
+                    this.lastCompactionMessageCount.delete(streamCompactionSessionId);
                     throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
                         `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
                         `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -7508,6 +7538,7 @@ Current user's request: ${currentInput}`;
                 // Emit server added event
                 this.emitter.emit("externalMCP:serverAdded", {
                     serverId,
+                    serverName: config.name || serverId,
                     config,
                     toolCount: result.metadata?.toolsDiscovered || 0,
                     timestamp: Date.now(),
@@ -7535,12 +7566,15 @@ Current user's request: ${currentInput}`;
         this.invalidateToolCache(); // Invalidate cache when an external server is removed
         try {
             mcpLogger.info(`[NeuroLink] Removing external MCP server: ${serverId}`);
+            // Capture the configured name before removal destroys the instance
+            const serverName = this.externalServerManager.getServerName(serverId);
             const result = await this.externalServerManager.removeServer(serverId);
             if (result.success) {
                 mcpLogger.info(`[NeuroLink] External MCP server removed successfully: ${serverId}`);
                 // Emit server removed event
                 this.emitter.emit("externalMCP:serverRemoved", {
                     serverId,
+                    serverName,
                     timestamp: Date.now(),
                 });
             }

package/dist/lib/providers/litellm.js CHANGED Viewed

@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
         }
         catch (streamError) {
             if (NoOutputGeneratedError.isInstance(streamError)) {
-                logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
-                return;
+                logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
+                throw streamError;
             }
             throw streamError;
         }

package/dist/lib/proxy/proxyTracer.d.ts CHANGED Viewed

@@ -90,6 +90,13 @@ declare class ProxyTracer {
      * Sets span attributes and increments the substitution metric counter.
      */
     setModelSubstitution(requestedModel: string, actualModel: string): void;
+    setFallbackInfo(info: {
+        triggered: boolean;
+        provider?: string;
+        model?: string;
+        attemptCount: number;
+        reason: string;
+    }): void;
     /** Log the incoming client request body (redacted). */
     logRequestBody(body: string): void;
     /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
      */
     getTraceHeaders(): Record<string, string>;
 }
+export declare function recordFallbackAttempt(attrs: {
+    provider: string;
+    model: string;
+    status: "success" | "failure";
+    errorMessage?: string;
+    durationMs: number;
+}): void;
 export { ProxyTracer };
 export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };

package/dist/lib/proxy/proxyTracer.js CHANGED Viewed

@@ -79,6 +79,18 @@ function getProxyMetrics() {
             description: "Response body size in bytes received from upstream",
             unit: "By",
         }),
+        fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
+            description: "Total fallback provider attempts",
+            unit: "{attempt}",
+        }),
+        fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
+            description: "Total successful fallback provider responses",
+            unit: "{success}",
+        }),
+        fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
+            description: "Total failed fallback provider responses",
+            unit: "{failure}",
+        }),
     };
     _metrics = createdMetrics;
     return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
             actual_model: actualModel,
         });
     }
+    setFallbackInfo(info) {
+        if (!this.rootSpan) {
+            return;
+        }
+        this.rootSpan.setAttributes({
+            "proxy.fallback.triggered": info.triggered,
+            ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
+            ...(info.model ? { "proxy.fallback.model": info.model } : {}),
+            "proxy.fallback.attempt_count": info.attemptCount,
+            "proxy.fallback.reason": info.reason,
+        });
+    }
     // -------------------------------------------------------------------------
     // Log payloads as span events
     // -------------------------------------------------------------------------
@@ -641,5 +665,24 @@ class ProxyTracer {
         return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
     }
 }
+export function recordFallbackAttempt(attrs) {
+    try {
+        const m = getProxyMetrics();
+        const labels = { provider: attrs.provider, model: attrs.model };
+        m.fallbackAttemptsTotal.add(1, labels);
+        if (attrs.status === "success") {
+            m.fallbackSuccessTotal.add(1, labels);
+        }
+        else {
+            m.fallbackFailureTotal.add(1, {
+                ...labels,
+                error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
+            });
+        }
+    }
+    catch {
+        // metrics are best-effort
+    }
+}
 export { ProxyTracer };
 //# sourceMappingURL=proxyTracer.js.map