@juspay/neurolink 9.50.0 → 9.50.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +292 -292
  3. package/dist/context/contextCompactor.js +2 -2
  4. package/dist/context/stages/slidingWindowTruncator.d.ts +1 -1
  5. package/dist/context/stages/slidingWindowTruncator.js +3 -3
  6. package/dist/core/modules/Utilities.d.ts +5 -0
  7. package/dist/core/modules/Utilities.js +29 -18
  8. package/dist/lib/context/contextCompactor.js +2 -2
  9. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +1 -1
  10. package/dist/lib/context/stages/slidingWindowTruncator.js +3 -3
  11. package/dist/lib/core/modules/Utilities.d.ts +5 -0
  12. package/dist/lib/core/modules/Utilities.js +29 -18
  13. package/dist/lib/mcp/externalServerManager.d.ts +5 -0
  14. package/dist/lib/mcp/externalServerManager.js +24 -2
  15. package/dist/lib/neurolink.js +37 -3
  16. package/dist/lib/providers/litellm.js +2 -2
  17. package/dist/lib/proxy/proxyTracer.d.ts +14 -0
  18. package/dist/lib/proxy/proxyTracer.js +43 -0
  19. package/dist/lib/server/routes/claudeProxyRoutes.js +112 -33
  20. package/dist/lib/services/server/ai/observability/instrumentation.js +39 -1
  21. package/dist/lib/types/externalMcp.d.ts +7 -0
  22. package/dist/mcp/externalServerManager.d.ts +5 -0
  23. package/dist/mcp/externalServerManager.js +24 -2
  24. package/dist/neurolink.js +37 -3
  25. package/dist/providers/litellm.js +2 -2
  26. package/dist/proxy/proxyTracer.d.ts +14 -0
  27. package/dist/proxy/proxyTracer.js +43 -0
  28. package/dist/server/routes/claudeProxyRoutes.js +112 -33
  29. package/dist/services/server/ai/observability/instrumentation.js +39 -1
  30. package/dist/types/externalMcp.d.ts +7 -0
  31. package/package.json +1 -1
@@ -15,7 +15,7 @@ import { join } from "node:path";
15
15
  import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
16
16
  import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
17
17
  import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
18
- import { ProxyTracer } from "../../proxy/proxyTracer.js";
18
+ import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js";
19
19
  import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
20
20
  import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
21
21
  import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
@@ -1246,43 +1246,64 @@ async function executeClaudeFallbackTranslation(args) {
1246
1246
  if (body.stream) {
1247
1247
  const streamResult = await ctx.neurolink.stream(options);
1248
1248
  const serializer = new ClaudeStreamSerializer(body.model, 0);
1249
- async function* sseGenerator() {
1250
- for (const frame of serializer.start()) {
1251
- yield frame;
1252
- }
1253
- let collectedText = "";
1254
- for await (const chunk of streamResult.stream) {
1255
- const text = extractText(chunk);
1256
- if (text) {
1257
- collectedText += text;
1258
- for (const frame of serializer.pushDelta(text)) {
1259
- yield frame;
1260
- }
1249
+ // Eagerly consume stream so errors fire synchronously and the
1250
+ // fallback loop in tryConfiguredClaudeFallbackChain can catch them.
1251
+ const frames = [];
1252
+ let collectedText = "";
1253
+ for (const frame of serializer.start()) {
1254
+ frames.push(frame);
1255
+ }
1256
+ for await (const chunk of streamResult.stream) {
1257
+ const text = extractText(chunk);
1258
+ if (text) {
1259
+ collectedText += text;
1260
+ for (const frame of serializer.pushDelta(text)) {
1261
+ frames.push(frame);
1261
1262
  }
1262
1263
  }
1263
- const toolCalls = streamResult.toolCalls ?? [];
1264
- if (!hasTranslatedOutput(collectedText, toolCalls)) {
1265
- throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1266
- }
1267
- if (toolCalls.length) {
1268
- for (const toolCall of toolCalls) {
1269
- const toolName = toolCall.toolName ??
1270
- toolCall.name ??
1271
- "unknown";
1272
- for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1273
- yield frame;
1274
- }
1264
+ }
1265
+ const toolCalls = streamResult.toolCalls ?? [];
1266
+ if (!hasTranslatedOutput(collectedText, toolCalls)) {
1267
+ throw new Error(`Translated provider ${providerLabel} returned no content or tool calls`);
1268
+ }
1269
+ if (toolCalls.length) {
1270
+ for (const toolCall of toolCalls) {
1271
+ const toolName = toolCall.toolName ??
1272
+ toolCall.name ??
1273
+ "unknown";
1274
+ for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
1275
+ frames.push(frame);
1275
1276
  }
1276
1277
  }
1277
- const reason = streamResult.finishReason ?? "end_turn";
1278
- const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1279
- for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1280
- yield frame;
1281
- }
1282
1278
  }
1279
+ const reason = streamResult.finishReason ?? "end_turn";
1280
+ const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
1281
+ for (const frame of serializer.finish(resolvedUsage.output, reason)) {
1282
+ frames.push(frame);
1283
+ }
1284
+ // Telemetry AFTER validation — not before like the old lazy path
1283
1285
  tracer?.end(200, Date.now() - requestStartTime);
1284
1286
  recordFinalSuccess();
1285
- logFinalRequest(200, "", providerLabel);
1287
+ logFinalRequest(200, "", providerLabel, undefined, undefined, {
1288
+ inputTokens: resolvedUsage.input,
1289
+ outputTokens: resolvedUsage.output,
1290
+ });
1291
+ const bufferedBody = frames.join("");
1292
+ logProxyBody({
1293
+ phase: "client_response",
1294
+ headers: { "content-type": "text/event-stream" },
1295
+ body: bufferedBody,
1296
+ bodySize: Buffer.byteLength(bufferedBody, "utf8"),
1297
+ contentType: "text/event-stream",
1298
+ responseStatus: 200,
1299
+ durationMs: Date.now() - requestStartTime,
1300
+ });
1301
+ // Return generator that yields pre-buffered frames
1302
+ async function* sseGenerator() {
1303
+ for (const frame of frames) {
1304
+ yield frame;
1305
+ }
1306
+ }
1286
1307
  return sseGenerator();
1287
1308
  }
1288
1309
  const streamResult = await ctx.neurolink.stream(options);
@@ -1346,6 +1367,11 @@ async function tryConfiguredClaudeFallbackChain(args) {
1346
1367
  : "auto-provider";
1347
1368
  logger.always(`[proxy] skipping fallback ${label}: ${skipped.reason}`);
1348
1369
  }
1370
+ tracer?.setFallbackInfo({
1371
+ triggered: true,
1372
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1373
+ reason: fallbackPolicyReason ?? "all_anthropic_accounts_exhausted",
1374
+ });
1349
1375
  for (const fallback of fallbackPlan.attempts.slice(1)) {
1350
1376
  if (!fallback.provider || !fallback.model) {
1351
1377
  continue;
@@ -1354,6 +1380,7 @@ async function tryConfiguredClaudeFallbackChain(args) {
1354
1380
  if (!availability.available) {
1355
1381
  logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} health-check failed (${availability.reason ?? "provider unavailable"}), attempting anyway`);
1356
1382
  }
1383
+ const fallbackStart = Date.now();
1357
1384
  try {
1358
1385
  logger.always(`[proxy] fallback → ${fallback.provider}/${fallback.model}`);
1359
1386
  const options = buildProxyFallbackOptions(parsedFallbackRequest, {
@@ -1370,13 +1397,57 @@ async function tryConfiguredClaudeFallbackChain(args) {
1370
1397
  options: options,
1371
1398
  providerLabel: fallback.provider,
1372
1399
  });
1400
+ recordFallbackAttempt({
1401
+ provider: fallback.provider,
1402
+ model: fallback.model,
1403
+ status: "success",
1404
+ durationMs: Date.now() - fallbackStart,
1405
+ });
1406
+ tracer?.setFallbackInfo({
1407
+ triggered: true,
1408
+ provider: fallback.provider,
1409
+ model: fallback.model,
1410
+ attemptCount: fallbackPlan.attempts.slice(1).length,
1411
+ reason: "fallback_success",
1412
+ });
1373
1413
  return {
1374
1414
  response,
1375
1415
  fallbackPolicyReason,
1376
1416
  };
1377
1417
  }
1378
1418
  catch (fallbackErr) {
1379
- logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed: ${fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr)}`);
1419
+ const errMsg = fallbackErr instanceof Error
1420
+ ? fallbackErr.message
1421
+ : String(fallbackErr);
1422
+ let errorClass = "unknown";
1423
+ if (errMsg.includes("Rate limit") ||
1424
+ errMsg.includes("rate_limit") ||
1425
+ errMsg.includes("max_parallel_requests")) {
1426
+ errorClass = "rate_limit";
1427
+ }
1428
+ else if (errMsg.includes("context length") ||
1429
+ errMsg.includes("ContextWindowExceeded")) {
1430
+ errorClass = "context_overflow";
1431
+ }
1432
+ else if (errMsg.includes("no content or tool calls") ||
1433
+ errMsg.includes("NoOutputGenerated")) {
1434
+ errorClass = "empty_response";
1435
+ }
1436
+ else if (errMsg.includes("thinking_level") ||
1437
+ errMsg.includes("Field required")) {
1438
+ errorClass = "schema_mismatch";
1439
+ }
1440
+ else if (errMsg.includes("Resource exhausted")) {
1441
+ errorClass = "provider_quota";
1442
+ }
1443
+ logger.always(`[proxy] fallback ${fallback.provider}/${fallback.model} failed [${errorClass}]: ${errMsg}`);
1444
+ recordFallbackAttempt({
1445
+ provider: fallback.provider,
1446
+ model: fallback.model,
1447
+ status: "failure",
1448
+ errorMessage: `[${errorClass}] ${errMsg}`,
1449
+ durationMs: Date.now() - fallbackStart,
1450
+ });
1380
1451
  }
1381
1452
  }
1382
1453
  return {
@@ -3541,7 +3612,15 @@ function shouldOmitImagesForTarget(provider, model) {
3541
3612
  return provider === "litellm" && model === "open-large";
3542
3613
  }
3543
3614
  function shouldOmitThinkingConfigForTarget(provider, model) {
3544
- return provider === "vertex" && model === "gemini-2.5-flash";
3615
+ if (provider === "litellm") {
3616
+ return true;
3617
+ }
3618
+ if (provider !== "vertex") {
3619
+ return false;
3620
+ }
3621
+ // Only Gemini 2.5+ and 3.x support thinking_level on Vertex.
3622
+ const m = model?.toLowerCase() ?? "";
3623
+ return !/gemini-(2\.5|3)/.test(m);
3545
3624
  }
3546
3625
  function extractToolArgs(toolCall) {
3547
3626
  return (toolCall.args ??
@@ -445,7 +445,45 @@ function initializeExternalOpenTelemetryMode(config, resource, otlpEndpoint, ser
445
445
  const provider = globalProvider;
446
446
  if (globalProvider && typeof provider.addSpanProcessor === "function") {
447
447
  provider.addSpanProcessor(new ContextEnricher());
448
- const skipLangfuse = config.skipLangfuseSpanProcessor === true || !langfuseProcessor;
448
+ // Auto-detect: skip if consumer already registered a LangfuseSpanProcessor.
449
+ //
450
+ // Detection strategy (ordered by robustness):
451
+ // 1. `instanceof LangfuseSpanProcessor` — reliable when both sides use
452
+ // the same @langfuse/otel package instance (same module identity).
453
+ // 2. Duck-type check for Langfuse-specific public member
454
+ // (`langfuseClient` property) — survives minification.
455
+ // 3. `constructor.name === "LangfuseSpanProcessor"` — last resort,
456
+ // brittle under minification or bundler renaming.
457
+ //
458
+ // NOTE: `_registeredSpanProcessors` is an internal OpenTelemetry field.
459
+ // If the OTel SDK removes or renames it, the array defaults to [] and
460
+ // `hasExistingLangfuse` is false — NeuroLink registers its own processor
461
+ // (same behavior as before this check). Consumers can always force skip
462
+ // via `skipLangfuseSpanProcessor: true`.
463
+ const existingProcessors = provider
464
+ ._registeredSpanProcessors ?? [];
465
+ const hasExistingLangfuse = existingProcessors.some((p) => {
466
+ if (p === null || p === undefined || typeof p !== "object") {
467
+ return false;
468
+ }
469
+ // Prefer instanceof — works when same @langfuse/otel package is shared
470
+ if (p instanceof LangfuseSpanProcessor) {
471
+ return true;
472
+ }
473
+ // Duck-type: Langfuse processor exposes a langfuseClient property
474
+ if ("langfuseClient" in p) {
475
+ return true;
476
+ }
477
+ // Fallback: constructor name (brittle under minification)
478
+ return (p.constructor?.name ===
479
+ "LangfuseSpanProcessor");
480
+ });
481
+ const skipLangfuse = config.skipLangfuseSpanProcessor === true ||
482
+ !langfuseProcessor ||
483
+ hasExistingLangfuse;
484
+ if (hasExistingLangfuse && !config.skipLangfuseSpanProcessor) {
485
+ logger.info(`${LOG_PREFIX} Auto-detected existing LangfuseSpanProcessor — skipping SDK registration to avoid duplicates`);
486
+ }
449
487
  if (!skipLangfuse && langfuseProcessor) {
450
488
  provider.addSpanProcessor(langfuseProcessor);
451
489
  }
@@ -228,6 +228,7 @@ export type ExternalMCPServerEvents = {
228
228
  /** Server status changed */
229
229
  statusChanged: {
230
230
  serverId: string;
231
+ serverName: string;
231
232
  oldStatus: ExternalMCPServerStatus;
232
233
  newStatus: ExternalMCPServerStatus;
233
234
  timestamp: Date;
@@ -235,24 +236,28 @@ export type ExternalMCPServerEvents = {
235
236
  /** Server connected successfully */
236
237
  connected: {
237
238
  serverId: string;
239
+ serverName: string;
238
240
  toolCount: number;
239
241
  timestamp: Date;
240
242
  };
241
243
  /** Server disconnected */
242
244
  disconnected: {
243
245
  serverId: string;
246
+ serverName: string;
244
247
  reason?: string;
245
248
  timestamp: Date;
246
249
  };
247
250
  /** Server failed */
248
251
  failed: {
249
252
  serverId: string;
253
+ serverName: string;
250
254
  error: string;
251
255
  timestamp: Date;
252
256
  };
253
257
  /** Tool discovered */
254
258
  toolDiscovered: {
255
259
  serverId: string;
260
+ serverName: string;
256
261
  toolName: string;
257
262
  toolInfo: ExternalMCPToolInfo;
258
263
  timestamp: Date;
@@ -260,12 +265,14 @@ export type ExternalMCPServerEvents = {
260
265
  /** Tool removed */
261
266
  toolRemoved: {
262
267
  serverId: string;
268
+ serverName: string;
263
269
  toolName: string;
264
270
  timestamp: Date;
265
271
  };
266
272
  /** Health check completed */
267
273
  healthCheck: {
268
274
  serverId: string;
275
+ serverName: string;
269
276
  health: ExternalMCPServerHealth;
270
277
  timestamp: Date;
271
278
  };
@@ -36,6 +36,11 @@ export declare class ExternalServerManager extends EventEmitter {
36
36
  * Get current HITL manager
37
37
  */
38
38
  getHITLManager(): HITLManager | undefined;
39
+ /**
40
+ * Resolve the human-readable server name for an event payload.
41
+ * Falls back to serverId if the instance or config.name isn't available.
42
+ */
43
+ getServerName(serverId: string): string;
39
44
  /**
40
45
  * Load MCP server configurations from .mcp-config.json file with parallel loading support
41
46
  * Automatically registers servers found in the configuration
@@ -194,10 +194,16 @@ export class ExternalServerManager extends EventEmitter {
194
194
  this.toolDiscovery = new ToolDiscoveryService();
195
195
  // Forward tool discovery events
196
196
  this.toolDiscovery.on("toolRegistered", (event) => {
197
- this.emit("toolDiscovered", event);
197
+ this.emit("toolDiscovered", {
198
+ ...event,
199
+ serverName: this.getServerName(event.serverId),
200
+ });
198
201
  });
199
202
  this.toolDiscovery.on("toolUnregistered", (event) => {
200
- this.emit("toolRemoved", event);
203
+ this.emit("toolRemoved", {
204
+ ...event,
205
+ serverName: this.getServerName(event.serverId),
206
+ });
201
207
  });
202
208
  // Handle process cleanup
203
209
  process.on("SIGINT", () => this.shutdown());
@@ -223,6 +229,14 @@ export class ExternalServerManager extends EventEmitter {
223
229
  getHITLManager() {
224
230
  return this.hitlManager;
225
231
  }
232
+ /**
233
+ * Resolve the human-readable server name for an event payload.
234
+ * Falls back to serverId if the instance or config.name isn't available.
235
+ */
236
+ getServerName(serverId) {
237
+ const instance = this.servers.get(serverId);
238
+ return instance?.config?.name || serverId;
239
+ }
226
240
  /**
227
241
  * Load MCP server configurations from .mcp-config.json file with parallel loading support
228
242
  * Automatically registers servers found in the configuration
@@ -712,6 +726,8 @@ export class ExternalServerManager extends EventEmitter {
712
726
  };
713
727
  }
714
728
  mcpLogger.info(`[ExternalServerManager] Removing server: ${serverId}`);
729
+ // Capture name before deletion removes the instance
730
+ const serverName = this.getServerName(serverId);
715
731
  // Stop the server
716
732
  await this.stopServer(serverId);
717
733
  // Remove from registry
@@ -719,6 +735,7 @@ export class ExternalServerManager extends EventEmitter {
719
735
  // Emit event
720
736
  this.emit("disconnected", {
721
737
  serverId,
738
+ serverName,
722
739
  reason: "Manually removed",
723
740
  timestamp: new Date(),
724
741
  });
@@ -816,6 +833,7 @@ export class ExternalServerManager extends EventEmitter {
816
833
  // Emit connected event
817
834
  this.emit("connected", {
818
835
  serverId,
836
+ serverName: this.getServerName(serverId),
819
837
  toolCount: instance.toolsMap.size,
820
838
  timestamp: new Date(),
821
839
  });
@@ -921,6 +939,7 @@ export class ExternalServerManager extends EventEmitter {
921
939
  // Emit status change event
922
940
  this.emit("statusChanged", {
923
941
  serverId,
942
+ serverName: this.getServerName(serverId),
924
943
  oldStatus,
925
944
  newStatus,
926
945
  timestamp: new Date(),
@@ -941,6 +960,7 @@ export class ExternalServerManager extends EventEmitter {
941
960
  // Emit failed event
942
961
  this.emit("failed", {
943
962
  serverId,
963
+ serverName: this.getServerName(serverId),
944
964
  error: error.message,
945
965
  timestamp: new Date(),
946
966
  });
@@ -965,6 +985,7 @@ export class ExternalServerManager extends EventEmitter {
965
985
  // Emit disconnected event
966
986
  this.emit("disconnected", {
967
987
  serverId,
988
+ serverName: this.getServerName(serverId),
968
989
  reason,
969
990
  timestamp: new Date(),
970
991
  });
@@ -1078,6 +1099,7 @@ export class ExternalServerManager extends EventEmitter {
1078
1099
  // Emit health check event
1079
1100
  this.emit("healthCheck", {
1080
1101
  serverId,
1102
+ serverName: this.getServerName(serverId),
1081
1103
  health,
1082
1104
  timestamp: new Date(),
1083
1105
  });
package/dist/neurolink.js CHANGED
@@ -50,7 +50,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
50
50
  import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
51
51
  import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
52
52
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
53
- import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
53
+ import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
54
54
  import { TaskManager } from "./tasks/taskManager.js";
55
55
  import { createTaskTools } from "./tasks/tools/taskTools.js";
56
56
  import { ATTR } from "./telemetry/attributes.js";
@@ -1129,7 +1129,10 @@ Current user's request: ${currentInput}`;
1129
1129
  * Supports additional users with per-user prompt and maxWords overrides.
1130
1130
  */
1131
1131
  storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
1132
- setImmediate(async () => {
1132
+ // Preserve AsyncLocalStorage context across setImmediate boundary so that
1133
+ // memory writes appear under the originating Langfuse trace instead of
1134
+ // becoming orphan spans.
1135
+ const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
1133
1136
  try {
1134
1137
  const client = this.ensureMemoryReady();
1135
1138
  if (!client) {
@@ -1145,12 +1148,18 @@ Current user's request: ${currentInput}`;
1145
1148
  : undefined;
1146
1149
  writeOps.push(client.add(user.userId, content, addOptions));
1147
1150
  }
1148
- await Promise.all(writeOps);
1151
+ // withTimeout races against Promise.all — if the timeout fires, the
1152
+ // await resolves with an error but the underlying client.add() calls
1153
+ // may still complete in the background. This is acceptable: the memory
1154
+ // client API (Mem0) doesn't support AbortSignal, and these are
1155
+ // fire-and-forget background writes where a stale completion is harmless.
1156
+ await withTimeout(Promise.all(writeOps), 30_000, new Error("Background memory write timed out after 30s"));
1149
1157
  }
1150
1158
  catch (error) {
1151
1159
  logger.warn("Memory storage failed:", error);
1152
1160
  }
1153
1161
  });
1162
+ setImmediate(wrappedMemoryWrite);
1154
1163
  }
1155
1164
  /**
1156
1165
  * Set up HITL event forwarding to main emitter
@@ -3723,6 +3732,21 @@ Current user's request: ${currentInput}`;
3723
3732
  conversationMessageCount: conversationMessages.length,
3724
3733
  shouldCompact: budgetResult.shouldCompact,
3725
3734
  });
3735
+ // Scale timeout for large contexts if caller didn't set one explicitly.
3736
+ // Providers read options.timeout via getTimeout(), so setting it here
3737
+ // propagates to any downstream provider call.
3738
+ if (options.timeout === undefined &&
3739
+ budgetResult.estimatedInputTokens > 100_000) {
3740
+ // >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x) of 60s base
3741
+ const scale = 1 + Math.floor((budgetResult.estimatedInputTokens - 1) / 100_000) * 0.5;
3742
+ const scaledMs = Math.round(60_000 * Math.min(scale, 4));
3743
+ options.timeout = scaledMs;
3744
+ logger.info("[TokenBudget] Scaled timeout for large context", {
3745
+ requestId,
3746
+ estimatedTokens: budgetResult.estimatedInputTokens,
3747
+ scaledTimeoutMs: scaledMs,
3748
+ });
3749
+ }
3726
3750
  const compactionSessionId = this.getCompactionSessionId(options);
3727
3751
  const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
3728
3752
  if (!budgetResult.shouldCompact ||
@@ -3798,6 +3822,8 @@ Current user's request: ${currentInput}`;
3798
3822
  toolDefinitions: availableTools,
3799
3823
  });
3800
3824
  if (!finalBudget.withinBudget) {
3825
+ // Clear watermark so handleContextOverflow recovery can re-compact
3826
+ this.lastCompactionMessageCount.delete(compactionSessionId);
3801
3827
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3802
3828
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3803
3829
  `Budget: ${finalBudget.availableInputTokens} tokens. ` +
@@ -3993,6 +4019,8 @@ Current user's request: ${currentInput}`;
3993
4019
  : undefined,
3994
4020
  });
3995
4021
  if (!finalBudget.withinBudget) {
4022
+ // Clear watermark so handleContextOverflow recovery can re-compact
4023
+ this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
3996
4024
  throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
3997
4025
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
3998
4026
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -5016,6 +5044,8 @@ Current user's request: ${currentInput}`;
5016
5044
  toolDefinitions: availableTools,
5017
5045
  });
5018
5046
  if (!finalBudget.withinBudget) {
5047
+ // Clear watermark so handleContextOverflow recovery can re-compact
5048
+ this.lastCompactionMessageCount.delete(streamCompactionSessionId);
5019
5049
  throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
5020
5050
  `Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
5021
5051
  `Budget: ${finalBudget.availableInputTokens} tokens.`, {
@@ -7508,6 +7538,7 @@ Current user's request: ${currentInput}`;
7508
7538
  // Emit server added event
7509
7539
  this.emitter.emit("externalMCP:serverAdded", {
7510
7540
  serverId,
7541
+ serverName: config.name || serverId,
7511
7542
  config,
7512
7543
  toolCount: result.metadata?.toolsDiscovered || 0,
7513
7544
  timestamp: Date.now(),
@@ -7535,12 +7566,15 @@ Current user's request: ${currentInput}`;
7535
7566
  this.invalidateToolCache(); // Invalidate cache when an external server is removed
7536
7567
  try {
7537
7568
  mcpLogger.info(`[NeuroLink] Removing external MCP server: ${serverId}`);
7569
+ // Capture the configured name before removal destroys the instance
7570
+ const serverName = this.externalServerManager.getServerName(serverId);
7538
7571
  const result = await this.externalServerManager.removeServer(serverId);
7539
7572
  if (result.success) {
7540
7573
  mcpLogger.info(`[NeuroLink] External MCP server removed successfully: ${serverId}`);
7541
7574
  // Emit server removed event
7542
7575
  this.emitter.emit("externalMCP:serverRemoved", {
7543
7576
  serverId,
7577
+ serverName,
7544
7578
  timestamp: Date.now(),
7545
7579
  });
7546
7580
  }
@@ -356,8 +356,8 @@ export class LiteLLMProvider extends BaseProvider {
356
356
  }
357
357
  catch (streamError) {
358
358
  if (NoOutputGeneratedError.isInstance(streamError)) {
359
- logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError)");
360
- return;
359
+ logger.warn("LiteLLM: Stream produced no output (NoOutputGeneratedError) — propagating to fallback chain");
360
+ throw streamError;
361
361
  }
362
362
  throw streamError;
363
363
  }
@@ -90,6 +90,13 @@ declare class ProxyTracer {
90
90
  * Sets span attributes and increments the substitution metric counter.
91
91
  */
92
92
  setModelSubstitution(requestedModel: string, actualModel: string): void;
93
+ setFallbackInfo(info: {
94
+ triggered: boolean;
95
+ provider?: string;
96
+ model?: string;
97
+ attemptCount: number;
98
+ reason: string;
99
+ }): void;
93
100
  /** Log the incoming client request body (redacted). */
94
101
  logRequestBody(body: string): void;
95
102
  /** Log the incoming client request headers (redacted). */
@@ -129,5 +136,12 @@ declare class ProxyTracer {
129
136
  */
130
137
  getTraceHeaders(): Record<string, string>;
131
138
  }
139
+ export declare function recordFallbackAttempt(attrs: {
140
+ provider: string;
141
+ model: string;
142
+ status: "success" | "failure";
143
+ errorMessage?: string;
144
+ durationMs: number;
145
+ }): void;
132
146
  export { ProxyTracer };
133
147
  export type { ProxyRequestContext, AccountSelectionContext, UpstreamAttemptContext, UsageContext, };
@@ -79,6 +79,18 @@ function getProxyMetrics() {
79
79
  description: "Response body size in bytes received from upstream",
80
80
  unit: "By",
81
81
  }),
82
+ fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
83
+ description: "Total fallback provider attempts",
84
+ unit: "{attempt}",
85
+ }),
86
+ fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
87
+ description: "Total successful fallback provider responses",
88
+ unit: "{success}",
89
+ }),
90
+ fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
91
+ description: "Total failed fallback provider responses",
92
+ unit: "{failure}",
93
+ }),
82
94
  };
83
95
  _metrics = createdMetrics;
84
96
  return createdMetrics;
@@ -396,6 +408,18 @@ class ProxyTracer {
396
408
  actual_model: actualModel,
397
409
  });
398
410
  }
411
+ setFallbackInfo(info) {
412
+ if (!this.rootSpan) {
413
+ return;
414
+ }
415
+ this.rootSpan.setAttributes({
416
+ "proxy.fallback.triggered": info.triggered,
417
+ ...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
418
+ ...(info.model ? { "proxy.fallback.model": info.model } : {}),
419
+ "proxy.fallback.attempt_count": info.attemptCount,
420
+ "proxy.fallback.reason": info.reason,
421
+ });
422
+ }
399
423
  // -------------------------------------------------------------------------
400
424
  // Log payloads as span events
401
425
  // -------------------------------------------------------------------------
@@ -641,4 +665,23 @@ class ProxyTracer {
641
665
  return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
642
666
  }
643
667
  }
668
+ export function recordFallbackAttempt(attrs) {
669
+ try {
670
+ const m = getProxyMetrics();
671
+ const labels = { provider: attrs.provider, model: attrs.model };
672
+ m.fallbackAttemptsTotal.add(1, labels);
673
+ if (attrs.status === "success") {
674
+ m.fallbackSuccessTotal.add(1, labels);
675
+ }
676
+ else {
677
+ m.fallbackFailureTotal.add(1, {
678
+ ...labels,
679
+ error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
680
+ });
681
+ }
682
+ }
683
+ catch {
684
+ // metrics are best-effort
685
+ }
686
+ }
644
687
  export { ProxyTracer };