@juspay/neurolink 9.70.7 → 9.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +362 -344
  3. package/dist/core/constants.d.ts +1 -0
  4. package/dist/core/constants.js +2 -0
  5. package/dist/core/toolRouting.d.ts +59 -0
  6. package/dist/core/toolRouting.js +232 -0
  7. package/dist/lib/core/constants.d.ts +1 -0
  8. package/dist/lib/core/constants.js +2 -0
  9. package/dist/lib/core/toolRouting.d.ts +59 -0
  10. package/dist/lib/core/toolRouting.js +233 -0
  11. package/dist/lib/neurolink.d.ts +31 -1
  12. package/dist/lib/neurolink.js +241 -17
  13. package/dist/lib/providers/googleVertex.js +257 -30
  14. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
  15. package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
  16. package/dist/lib/telemetry/attributes.d.ts +31 -0
  17. package/dist/lib/telemetry/attributes.js +48 -0
  18. package/dist/lib/telemetry/index.d.ts +1 -1
  19. package/dist/lib/telemetry/index.js +1 -1
  20. package/dist/lib/types/config.d.ts +8 -0
  21. package/dist/lib/types/index.d.ts +1 -0
  22. package/dist/lib/types/index.js +1 -0
  23. package/dist/lib/types/toolRouting.d.ts +91 -0
  24. package/dist/lib/types/toolRouting.js +19 -0
  25. package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
  26. package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
  27. package/dist/lib/utils/mcpErrorText.d.ts +16 -0
  28. package/dist/lib/utils/mcpErrorText.js +36 -0
  29. package/dist/neurolink.d.ts +31 -1
  30. package/dist/neurolink.js +241 -17
  31. package/dist/providers/googleVertex.js +257 -30
  32. package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
  33. package/dist/services/server/ai/observability/instrumentation.js +36 -1
  34. package/dist/telemetry/attributes.d.ts +31 -0
  35. package/dist/telemetry/attributes.js +48 -0
  36. package/dist/telemetry/index.d.ts +1 -1
  37. package/dist/telemetry/index.js +1 -1
  38. package/dist/types/config.d.ts +8 -0
  39. package/dist/types/index.d.ts +1 -0
  40. package/dist/types/index.js +1 -0
  41. package/dist/types/toolRouting.d.ts +91 -0
  42. package/dist/types/toolRouting.js +18 -0
  43. package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
  44. package/dist/utils/anthropicTraceSanitizer.js +25 -0
  45. package/dist/utils/mcpErrorText.d.ts +16 -0
  46. package/dist/utils/mcpErrorText.js +36 -0
  47. package/package.json +2 -1
@@ -5,7 +5,7 @@
5
5
  * Enhanced AI provider system with natural MCP tool access.
6
6
  * Uses real MCP infrastructure for tool discovery and execution.
7
7
  */
8
- import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
8
+ import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext, ToolRoutingServerDescriptor } from "./types/index.js";
9
9
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
10
10
  import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
11
11
  import { ExternalServerManager } from "./mcp/externalServerManager.js";
@@ -100,6 +100,7 @@ export declare class NeuroLink {
100
100
  conversationMemory?: ConversationMemoryManager | RedisConversationMemoryManager | null;
101
101
  private conversationMemoryNeedsInit;
102
102
  private conversationMemoryConfig?;
103
+ private toolRoutingConfig?;
103
104
  private enableOrchestration;
104
105
  private authProvider?;
105
106
  private pendingAuthConfig?;
@@ -803,6 +804,35 @@ export declare class NeuroLink {
803
804
  */
804
805
  private streamWithIterationFallback;
805
806
  private executeStreamRequest;
807
+ /**
808
+ * Pre-call tool routing for stream(): runs the router LLM once per turn
809
+ * and appends the unpicked servers' registered tool names to
810
+ * `options.excludeTools` — the per-call denylist enforced by
811
+ * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
812
+ * is true and a non-empty server catalog has been supplied. Never throws
813
+ * (the resolver fails open to an empty exclusion list).
814
+ */
815
+ private applyToolRoutingExclusions;
816
+ /**
817
+ * Loads a bounded window of prior conversation turns for the router so a
818
+ * follow-up turn carries the context it needs to classify intent. Reads this
819
+ * turn's conversation memory (keyed by `context.sessionId`) with
820
+ * summarization disabled to keep the router cheap. Fails open to an empty
821
+ * list — routing then falls back to the current query alone (prior
822
+ * behaviour). On the first turn of a conversation memory may not be
823
+ * initialised yet; that also yields an empty list, which is fine since the
824
+ * opening message already carries its own context.
825
+ */
826
+ private fetchRecentRoutingHistory;
827
+ /**
828
+ * Supplies (or replaces) the pre-call tool routing server catalog.
829
+ *
830
+ * For hosts that only know their tool servers after constructing NeuroLink
831
+ * (e.g. tools are registered per session/conversation). Routing must still
832
+ * be enabled via the constructor's `toolRouting.enabled` — setting servers
833
+ * alone does not activate it.
834
+ */
835
+ setToolRoutingServers(servers: ToolRoutingServerDescriptor[]): void;
806
836
  private validateStreamRequestOptions;
807
837
  private maybeHandleWorkflowStreamRequest;
808
838
  private runStandardStreamRequest;
@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
28
28
  import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
29
29
  import { ContextBudgetExceededError } from "./context/errors.js";
30
30
  import { repairToolPairs } from "./context/toolPairRepair.js";
31
- import { SYSTEM_LIMITS } from "./core/constants.js";
31
+ import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
32
32
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
33
+ import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
33
34
  import { AIProviderFactory } from "./core/factory.js";
34
35
  import { createToolEventPayload } from "./core/toolEvents.js";
35
36
  import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -55,7 +56,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
55
56
  import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
56
57
  import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
57
58
  import { SpanSerializer } from "./observability/utils/spanSerializer.js";
58
- import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
59
+ import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
59
60
  import { TaskManager } from "./tasks/taskManager.js";
60
61
  import { createTaskTools } from "./tasks/tools/taskTools.js";
61
62
  import { ATTR } from "./telemetry/attributes.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
436
437
  conversationMemory;
437
438
  conversationMemoryNeedsInit = false;
438
439
  conversationMemoryConfig;
440
+ // Pre-call tool routing: instance-level config from the constructor.
441
+ // The server catalog inside it can be supplied/replaced later via
442
+ // setToolRoutingServers() for hosts that register tools after construction.
443
+ toolRoutingConfig;
439
444
  // Add orchestration property
440
445
  enableOrchestration;
441
446
  // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
842
847
  if (config?.modelChain) {
843
848
  this.fallbackConfig.modelChain = config.modelChain;
844
849
  }
850
+ if (config?.toolRouting) {
851
+ // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
852
+ // can't leak into the caller's config object, which may be shared across
853
+ // multiple NeuroLink instances.
854
+ this.toolRoutingConfig = { ...config.toolRouting };
855
+ }
845
856
  logger.setEventEmitter(this.emitter);
846
857
  // Read tool cache duration from environment variables, with a default
847
858
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -1378,11 +1389,8 @@ Current user's request: ${currentInput}`;
1378
1389
  * Calls add(userId, content) which internally condenses old + new via LLM.
1379
1390
  * Supports additional users with per-user prompt and maxWords overrides.
1380
1391
  */
1381
- storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
1382
- // Preserve AsyncLocalStorage context across setImmediate boundary so that
1383
- // memory writes appear under the originating Langfuse trace instead of
1384
- // becoming orphan spans.
1385
- const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
1392
+ storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
1393
+ const memoryWrite = async () => {
1386
1394
  try {
1387
1395
  const client = this.ensureMemoryReady();
1388
1396
  if (!client) {
@@ -1408,7 +1416,21 @@ Current user's request: ${currentInput}`;
1408
1416
  catch (error) {
1409
1417
  logger.warn("Memory storage failed:", error);
1410
1418
  }
1411
- });
1419
+ };
1420
+ // Carry the turn's identity across the setImmediate boundary so the
1421
+ // condensation generate + redis spans don't orphan to "guest". Keep the
1422
+ // ambient store when it survived (generate path — carries conversationId,
1423
+ // metadata, …); re-establish from the caller only when it was lost (stream
1424
+ // path, which fires after the caller consumed the stream).
1425
+ const ambient = getLangfuseContext();
1426
+ const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
1427
+ (langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
1428
+ ? () => setLangfuseContext({
1429
+ userId,
1430
+ sessionId: langfuseIdentity.sessionId ?? null,
1431
+ traceName: langfuseIdentity.traceName ?? null,
1432
+ }, memoryWrite)
1433
+ : runWithCurrentLangfuseContext(memoryWrite);
1412
1434
  setImmediate(wrappedMemoryWrite);
1413
1435
  }
1414
1436
  /**
@@ -2801,7 +2823,15 @@ Current user's request: ${currentInput}`;
2801
2823
  }
2802
2824
  const startedAt = Date.now();
2803
2825
  try {
2804
- return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
2826
+ return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
2827
+ // Capture root-ness before startActiveSpan makes generateSpan active.
2828
+ // The actual guest-rescue stamp is deferred to executeGenerateRequest,
2829
+ // AFTER prepareGenerateRequest merges auth/requestContext-derived
2830
+ // identity into options.context — otherwise an auth:{token} caller
2831
+ // with no pre-set context.userId would stamp the root span as guest.
2832
+ const generateIsRoot = !trace.getSpan(context.active());
2833
+ return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
2834
+ });
2805
2835
  }
2806
2836
  catch (error) {
2807
2837
  // Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
@@ -2973,14 +3003,17 @@ Current user's request: ${currentInput}`;
2973
3003
  return { error };
2974
3004
  }
2975
3005
  }
2976
- async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
2977
- return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
3006
+ async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
3007
+ return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
2978
3008
  }
2979
- async executeGenerateRequest(optionsOrPrompt, generateSpan) {
3009
+ async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
2980
3010
  let resolvedOptions;
2981
3011
  try {
2982
3012
  const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
2983
3013
  resolvedOptions = options;
3014
+ // Stamp now that prepareGenerateRequest has merged any auth/requestContext
3015
+ // identity into options.context (see capture of isRootSpan in generate()).
3016
+ stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
2984
3017
  const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
2985
3018
  if (earlyResult) {
2986
3019
  generateSpan.setStatus({ code: SpanStatusCode.OK });
@@ -3545,7 +3578,7 @@ Current user's request: ${currentInput}`;
3545
3578
  // Memory storage
3546
3579
  if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
3547
3580
  options.context?.userId) {
3548
- this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
3581
+ this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
3549
3582
  }
3550
3583
  }
3551
3584
  /**
@@ -5531,10 +5564,20 @@ Current user's request: ${currentInput}`;
5531
5564
  [ATTR.NL_PROVIDER]: options.provider || "default",
5532
5565
  [ATTR.GEN_AI_MODEL]: options.model || "default",
5533
5566
  [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
5534
- [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
5567
+ // Count registered custom tools too — chat hosts put their MCP tools
5568
+ // in the registry, so options.tools alone under-reports.
5569
+ [ATTR.NL_HAS_TOOLS]: !options.disableTools &&
5570
+ (!!(options.tools && Object.keys(options.tools).length > 0) ||
5571
+ this.getCustomTools().size > 0),
5535
5572
  [ATTR.NL_STREAM_MODE]: true,
5536
5573
  },
5537
5574
  });
5575
+ // streamSpan isn't active yet, so context.active() is its parent — empty =
5576
+ // root. Capture root-ness here, but defer the actual guest-rescue stamp to
5577
+ // after validateStreamRequestOptions merges auth/requestContext identity
5578
+ // into options.context (below) — otherwise an auth:{token} caller with no
5579
+ // pre-set context.userId would stamp the root span as guest.
5580
+ const streamIsRoot = !trace.getSpan(context.active());
5538
5581
  const spanStartTime = Date.now();
5539
5582
  this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
5540
5583
  try {
@@ -5576,6 +5619,8 @@ Current user's request: ${currentInput}`;
5576
5619
  const originalPrompt = options.input?.text ?? "";
5577
5620
  options.fileRegistry = this.fileRegistry;
5578
5621
  await this.validateStreamRequestOptions(options, startTime);
5622
+ // options.context now carries any auth/requestContext-derived identity.
5623
+ stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
5579
5624
  const workflowResult = await this.maybeHandleWorkflowStreamRequest({
5580
5625
  options,
5581
5626
  startTime,
@@ -5585,6 +5630,19 @@ Current user's request: ${currentInput}`;
5585
5630
  if (workflowResult) {
5586
5631
  return workflowResult;
5587
5632
  }
5633
+ // Make neurolink.stream the active span so every provider span (generations,
5634
+ // tool calls) parents under it — one Langfuse trace per turn, not a forest.
5635
+ const streamSpanContext = trace.setSpan(context.active(), streamSpan);
5636
+ // Pre-call tool routing: run inside the stream-span + Langfuse context so
5637
+ // the router's own generation span nests under this turn's trace instead
5638
+ // of starting a separate one. Asks a cheap router LLM which tool servers
5639
+ // the query needs and appends the unpicked servers' tools to
5640
+ // `excludeTools`. Fails open (no exclusions). Routes on the current
5641
+ // prompt enriched with a bounded window of recent conversation turns
5642
+ // (pulled from conversation memory) so contextless follow-ups still
5643
+ // classify correctly. After the workflow short-circuit, so workflow
5644
+ // streams skip it.
5645
+ await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
5588
5646
  // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
5589
5647
  // accumulated response into a single audio chunk at end-of-stream and
5590
5648
  // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5599,7 +5657,7 @@ Current user's request: ${currentInput}`;
5599
5657
  resolveStreamTtsAudio = resolve;
5600
5658
  })
5601
5659
  : undefined;
5602
- const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
5660
+ const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
5603
5661
  options,
5604
5662
  streamSpan,
5605
5663
  spanStartTime,
@@ -5608,7 +5666,7 @@ Current user's request: ${currentInput}`;
5608
5666
  streamId,
5609
5667
  originalPrompt,
5610
5668
  ttsResolver: resolveStreamTtsAudio,
5611
- }));
5669
+ })));
5612
5670
  if (streamSttTranscription) {
5613
5671
  streamResult.transcription = streamSttTranscription;
5614
5672
  }
@@ -5629,6 +5687,172 @@ Current user's request: ${currentInput}`;
5629
5687
  throw error;
5630
5688
  }
5631
5689
  }
5690
+ /**
5691
+ * Pre-call tool routing for stream(): runs the router LLM once per turn
5692
+ * and appends the unpicked servers' registered tool names to
5693
+ * `options.excludeTools` — the per-call denylist enforced by
5694
+ * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
5695
+ * is true and a non-empty server catalog has been supplied. Never throws
5696
+ * (the resolver fails open to an empty exclusion list).
5697
+ */
5698
+ async applyToolRoutingExclusions(options, userQuery) {
5699
+ const routingConfig = this.toolRoutingConfig;
5700
+ if (!routingConfig?.enabled || options.disableTools) {
5701
+ return;
5702
+ }
5703
+ const servers = routingConfig.servers ?? [];
5704
+ if (servers.length === 0) {
5705
+ return;
5706
+ }
5707
+ // Whole setup is fail-open: catalog building (getCustomTools /
5708
+ // buildToolRoutingCatalog) and the router call degrade to no exclusions
5709
+ // rather than killing the stream, honoring this method's "never throws"
5710
+ // contract. Genuine stream cancellations still propagate.
5711
+ try {
5712
+ const registeredToolNames = Array.from(this.getCustomTools().keys());
5713
+ const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
5714
+ if (catalog.length === 0) {
5715
+ return;
5716
+ }
5717
+ // Fold a bounded window of recent conversation turns into the routing query.
5718
+ // The router runs pre-memory and would otherwise see only this turn's raw
5719
+ // text, so a contextless follow-up ("yes please") gives it nothing to
5720
+ // classify — it fails open and routing narrows nothing. The main model
5721
+ // still receives full history later via conversation memory; this only
5722
+ // enriches the router's view. Fails open to the current query alone.
5723
+ const recentMessages = await this.fetchRecentRoutingHistory(options);
5724
+ const routingQuery = recentMessages.length > 0
5725
+ ? buildRoutingQueryFromHistory(recentMessages, userQuery)
5726
+ : userQuery;
5727
+ // The router call below re-enters the public generate(), whose finally
5728
+ // block resets _disableToolCacheForCurrentRequest to false. That flag is
5729
+ // stream-scoped (set at the top of this turn) and read by the main tool
5730
+ // execution path that runs after routing, so save it before the router
5731
+ // call and restore it afterward to keep the turn's cache setting intact.
5732
+ const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
5733
+ let routedExcludeTools;
5734
+ try {
5735
+ routedExcludeTools = await resolveToolRoutingExclusions({
5736
+ catalog,
5737
+ alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
5738
+ userQuery: routingQuery,
5739
+ routerPromptPrefix: routingConfig.routerPromptPrefix,
5740
+ routerModel: {
5741
+ provider: routingConfig.routerModel?.provider ??
5742
+ options.provider,
5743
+ model: routingConfig.routerModel?.model ?? options.model,
5744
+ region: routingConfig.routerModel?.region ?? options.region,
5745
+ temperature: routingConfig.routerModel?.temperature,
5746
+ },
5747
+ timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
5748
+ // Forward the stream's abort signal so a cancelled stream aborts the
5749
+ // router call promptly instead of waiting out the routing timeout.
5750
+ generateFn: (generateOptions) => this.generate({
5751
+ ...generateOptions,
5752
+ abortSignal: options.abortSignal,
5753
+ }),
5754
+ });
5755
+ }
5756
+ finally {
5757
+ this._disableToolCacheForCurrentRequest =
5758
+ cacheDisabledForCurrentRequest;
5759
+ }
5760
+ // Aborted during the router call — skip applying now-stale exclusions;
5761
+ // the main generation path enforces the abort itself.
5762
+ if (options.abortSignal?.aborted) {
5763
+ return;
5764
+ }
5765
+ if (routedExcludeTools.length > 0) {
5766
+ options.excludeTools = [
5767
+ ...(options.excludeTools ?? []),
5768
+ ...routedExcludeTools,
5769
+ ];
5770
+ }
5771
+ }
5772
+ catch (error) {
5773
+ if (isAbortError(error)) {
5774
+ throw error;
5775
+ }
5776
+ logger.warn("[ToolRouting] Routing setup failed, failing open", {
5777
+ error: error instanceof Error ? error.message : String(error),
5778
+ });
5779
+ }
5780
+ }
5781
+ /**
5782
+ * Loads a bounded window of prior conversation turns for the router so a
5783
+ * follow-up turn carries the context it needs to classify intent. Reads this
5784
+ * turn's conversation memory (keyed by `context.sessionId`) with
5785
+ * summarization disabled to keep the router cheap. Fails open to an empty
5786
+ * list — routing then falls back to the current query alone (prior
5787
+ * behaviour). On the first turn of a conversation memory may not be
5788
+ * initialised yet; that also yields an empty list, which is fine since the
5789
+ * opening message already carries its own context.
5790
+ */
5791
+ async fetchRecentRoutingHistory(options) {
5792
+ try {
5793
+ const requestContext = options.context;
5794
+ // Inline multi-turn callers pass prior turns via options.conversationMessages
5795
+ // (the same field the main model reads) rather than server-side session
5796
+ // memory. Honor it directly so a contextless follow-up still routes with
5797
+ // context even when no sessionId is present.
5798
+ if (options.conversationMessages &&
5799
+ options.conversationMessages.length > 0) {
5800
+ return options.conversationMessages;
5801
+ }
5802
+ const sessionId = requestContext?.sessionId;
5803
+ if (typeof sessionId !== "string" || !sessionId) {
5804
+ return [];
5805
+ }
5806
+ // The pre-call router runs earlier in the stream pipeline than the main
5807
+ // generation path's own memory init (initializeConversationMemoryForGeneration),
5808
+ // so this.conversationMemory is still undefined at router time and the
5809
+ // router would only ever see the current turn. Trigger the same lazy init
5810
+ // the main path uses — it is idempotent, so the later call is a no-op —
5811
+ // so the router can read prior turns. Fails open via the surrounding catch.
5812
+ await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
5813
+ const memory = this.conversationMemory;
5814
+ if (!memory) {
5815
+ return [];
5816
+ }
5817
+ // Reuse the SAME reader the main model uses so the router sees identically
5818
+ // curated history: polluted turns dropped, read instrumented under the
5819
+ // neurolink.conversation.getMessages span. enableSummarization=false keeps
5820
+ // routing cheap and free of any summary-LLM side effect. The remaining
5821
+ // tool_call/tool_result turns are dropped at transcript-render time
5822
+ // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
5823
+ const messages = await getConversationMessages(memory, {
5824
+ ...options,
5825
+ enableSummarization: false,
5826
+ });
5827
+ logger.debug("[ToolRouting] Loaded conversation history for router", {
5828
+ sessionId,
5829
+ messageCount: messages.length,
5830
+ });
5831
+ return messages;
5832
+ }
5833
+ catch (error) {
5834
+ logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
5835
+ error: error instanceof Error ? error.message : String(error),
5836
+ });
5837
+ return [];
5838
+ }
5839
+ }
5840
+ /**
5841
+ * Supplies (or replaces) the pre-call tool routing server catalog.
5842
+ *
5843
+ * For hosts that only know their tool servers after constructing NeuroLink
5844
+ * (e.g. tools are registered per session/conversation). Routing must still
5845
+ * be enabled via the constructor's `toolRouting.enabled` — setting servers
5846
+ * alone does not activate it.
5847
+ */
5848
+ setToolRoutingServers(servers) {
5849
+ if (!this.toolRoutingConfig) {
5850
+ logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
5851
+ this.toolRoutingConfig = { enabled: false, servers };
5852
+ return;
5853
+ }
5854
+ this.toolRoutingConfig.servers = servers;
5855
+ }
5632
5856
  async validateStreamRequestOptions(options, startTime) {
5633
5857
  await this.validateStreamInput(options);
5634
5858
  this.enforceSessionBudget(options.maxBudgetUsd);
@@ -6512,7 +6736,7 @@ Current user's request: ${currentInput}`;
6512
6736
  }
6513
6737
  }
6514
6738
  if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
6515
- this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
6739
+ this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
6516
6740
  }
6517
6741
  }
6518
6742
  /**