npm - @juspay/neurolink - Versions diffs - 9.70.7 → 9.72.0 - Mend

@juspay/neurolink 9.70.7 → 9.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +362 -344
package/dist/core/constants.d.ts +1 -0
package/dist/core/constants.js +2 -0
package/dist/core/toolRouting.d.ts +59 -0
package/dist/core/toolRouting.js +232 -0
package/dist/lib/core/constants.d.ts +1 -0
package/dist/lib/core/constants.js +2 -0
package/dist/lib/core/toolRouting.d.ts +59 -0
package/dist/lib/core/toolRouting.js +233 -0
package/dist/lib/neurolink.d.ts +31 -1
package/dist/lib/neurolink.js +241 -17
package/dist/lib/providers/googleVertex.js +257 -30
package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
package/dist/lib/telemetry/attributes.d.ts +31 -0
package/dist/lib/telemetry/attributes.js +48 -0
package/dist/lib/telemetry/index.d.ts +1 -1
package/dist/lib/telemetry/index.js +1 -1
package/dist/lib/types/config.d.ts +8 -0
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/toolRouting.d.ts +91 -0
package/dist/lib/types/toolRouting.js +19 -0
package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
package/dist/lib/utils/mcpErrorText.d.ts +16 -0
package/dist/lib/utils/mcpErrorText.js +36 -0
package/dist/neurolink.d.ts +31 -1
package/dist/neurolink.js +241 -17
package/dist/providers/googleVertex.js +257 -30
package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/services/server/ai/observability/instrumentation.js +36 -1
package/dist/telemetry/attributes.d.ts +31 -0
package/dist/telemetry/attributes.js +48 -0
package/dist/telemetry/index.d.ts +1 -1
package/dist/telemetry/index.js +1 -1
package/dist/types/config.d.ts +8 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/toolRouting.d.ts +91 -0
package/dist/types/toolRouting.js +18 -0
package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/utils/anthropicTraceSanitizer.js +25 -0
package/dist/utils/mcpErrorText.d.ts +16 -0
package/dist/utils/mcpErrorText.js +36 -0
package/package.json +2 -1

package/dist/lib/neurolink.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * Enhanced AI provider system with natural MCP tool access.
  * Uses real MCP infrastructure for tool discovery and execution.
  */
-import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
+import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext, ToolRoutingServerDescriptor } from "./types/index.js";
 import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
 import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
 import { ExternalServerManager } from "./mcp/externalServerManager.js";
@@ -100,6 +100,7 @@ export declare class NeuroLink {
     conversationMemory?: ConversationMemoryManager | RedisConversationMemoryManager | null;
     private conversationMemoryNeedsInit;
     private conversationMemoryConfig?;
+    private toolRoutingConfig?;
     private enableOrchestration;
     private authProvider?;
     private pendingAuthConfig?;
@@ -803,6 +804,35 @@ export declare class NeuroLink {
      */
     private streamWithIterationFallback;
     private executeStreamRequest;
+    /**
+     * Pre-call tool routing for stream(): runs the router LLM once per turn
+     * and appends the unpicked servers' registered tool names to
+     * `options.excludeTools` — the per-call denylist enforced by
+     * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
+     * is true and a non-empty server catalog has been supplied. Never throws
+     * (the resolver fails open to an empty exclusion list).
+     */
+    private applyToolRoutingExclusions;
+    /**
+     * Loads a bounded window of prior conversation turns for the router so a
+     * follow-up turn carries the context it needs to classify intent. Reads this
+     * turn's conversation memory (keyed by `context.sessionId`) with
+     * summarization disabled to keep the router cheap. Fails open to an empty
+     * list — routing then falls back to the current query alone (prior
+     * behaviour). On the first turn of a conversation memory may not be
+     * initialised yet; that also yields an empty list, which is fine since the
+     * opening message already carries its own context.
+     */
+    private fetchRecentRoutingHistory;
+    /**
+     * Supplies (or replaces) the pre-call tool routing server catalog.
+     *
+     * For hosts that only know their tool servers after constructing NeuroLink
+     * (e.g. tools are registered per session/conversation). Routing must still
+     * be enabled via the constructor's `toolRouting.enabled` — setting servers
+     * alone does not activate it.
+     */
+    setToolRoutingServers(servers: ToolRoutingServerDescriptor[]): void;
     private validateStreamRequestOptions;
     private maybeHandleWorkflowStreamRequest;
     private runStandardStreamRequest;

package/dist/lib/neurolink.js CHANGED Viewed

@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
 import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
 import { ContextBudgetExceededError } from "./context/errors.js";
 import { repairToolPairs } from "./context/toolPairRepair.js";
-import { SYSTEM_LIMITS } from "./core/constants.js";
+import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
 import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
+import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
 import { AIProviderFactory } from "./core/factory.js";
 import { createToolEventPayload } from "./core/toolEvents.js";
 import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -55,7 +56,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
 import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
 import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
 import { SpanSerializer } from "./observability/utils/spanSerializer.js";
-import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
+import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
 import { TaskManager } from "./tasks/taskManager.js";
 import { createTaskTools } from "./tasks/tools/taskTools.js";
 import { ATTR } from "./telemetry/attributes.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
     conversationMemory;
     conversationMemoryNeedsInit = false;
     conversationMemoryConfig;
+    // Pre-call tool routing: instance-level config from the constructor.
+    // The server catalog inside it can be supplied/replaced later via
+    // setToolRoutingServers() for hosts that register tools after construction.
+    toolRoutingConfig;
     // Add orchestration property
     enableOrchestration;
     // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
         if (config?.modelChain) {
             this.fallbackConfig.modelChain = config.modelChain;
         }
+        if (config?.toolRouting) {
+            // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
+            // can't leak into the caller's config object, which may be shared across
+            // multiple NeuroLink instances.
+            this.toolRoutingConfig = { ...config.toolRouting };
+        }
         logger.setEventEmitter(this.emitter);
         // Read tool cache duration from environment variables, with a default
         const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -1378,11 +1389,8 @@ Current user's request: ${currentInput}`;
      * Calls add(userId, content) which internally condenses old + new via LLM.
      * Supports additional users with per-user prompt and maxWords overrides.
      */
-    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
-        // Preserve AsyncLocalStorage context across setImmediate boundary so that
-        // memory writes appear under the originating Langfuse trace instead of
-        // becoming orphan spans.
-        const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
+    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
+        const memoryWrite = async () => {
             try {
                 const client = this.ensureMemoryReady();
                 if (!client) {
@@ -1408,7 +1416,21 @@ Current user's request: ${currentInput}`;
             catch (error) {
                 logger.warn("Memory storage failed:", error);
             }
-        });
+        };
+        // Carry the turn's identity across the setImmediate boundary so the
+        // condensation generate + redis spans don't orphan to "guest". Keep the
+        // ambient store when it survived (generate path — carries conversationId,
+        // metadata, …); re-establish from the caller only when it was lost (stream
+        // path, which fires after the caller consumed the stream).
+        const ambient = getLangfuseContext();
+        const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
+            (langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
+            ? () => setLangfuseContext({
+                userId,
+                sessionId: langfuseIdentity.sessionId ?? null,
+                traceName: langfuseIdentity.traceName ?? null,
+            }, memoryWrite)
+            : runWithCurrentLangfuseContext(memoryWrite);
         setImmediate(wrappedMemoryWrite);
     }
     /**
@@ -2801,7 +2823,15 @@ Current user's request: ${currentInput}`;
         }
         const startedAt = Date.now();
         try {
-            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
+            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
+                // Capture root-ness before startActiveSpan makes generateSpan active.
+                // The actual guest-rescue stamp is deferred to executeGenerateRequest,
+                // AFTER prepareGenerateRequest merges auth/requestContext-derived
+                // identity into options.context — otherwise an auth:{token} caller
+                // with no pre-set context.userId would stamp the root span as guest.
+                const generateIsRoot = !trace.getSpan(context.active());
+                return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
+            });
         }
         catch (error) {
             // Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
@@ -2973,14 +3003,17 @@ Current user's request: ${currentInput}`;
             return { error };
         }
     }
-    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
-        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
+    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
+        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
     }
-    async executeGenerateRequest(optionsOrPrompt, generateSpan) {
+    async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
         let resolvedOptions;
         try {
             const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
             resolvedOptions = options;
+            // Stamp now that prepareGenerateRequest has merged any auth/requestContext
+            // identity into options.context (see capture of isRootSpan in generate()).
+            stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
             const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
             if (earlyResult) {
                 generateSpan.setStatus({ code: SpanStatusCode.OK });
@@ -3545,7 +3578,7 @@ Current user's request: ${currentInput}`;
         // Memory storage
         if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
             options.context?.userId) {
-            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
         }
     }
     /**
@@ -5531,10 +5564,20 @@ Current user's request: ${currentInput}`;
                 [ATTR.NL_PROVIDER]: options.provider || "default",
                 [ATTR.GEN_AI_MODEL]: options.model || "default",
                 [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
-                [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
+                // Count registered custom tools too — chat hosts put their MCP tools
+                // in the registry, so options.tools alone under-reports.
+                [ATTR.NL_HAS_TOOLS]: !options.disableTools &&
+                    (!!(options.tools && Object.keys(options.tools).length > 0) ||
+                        this.getCustomTools().size > 0),
                 [ATTR.NL_STREAM_MODE]: true,
             },
         });
+        // streamSpan isn't active yet, so context.active() is its parent — empty =
+        // root. Capture root-ness here, but defer the actual guest-rescue stamp to
+        // after validateStreamRequestOptions merges auth/requestContext identity
+        // into options.context (below) — otherwise an auth:{token} caller with no
+        // pre-set context.userId would stamp the root span as guest.
+        const streamIsRoot = !trace.getSpan(context.active());
         const spanStartTime = Date.now();
         this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
         try {
@@ -5576,6 +5619,8 @@ Current user's request: ${currentInput}`;
             const originalPrompt = options.input?.text ?? "";
             options.fileRegistry = this.fileRegistry;
             await this.validateStreamRequestOptions(options, startTime);
+            // options.context now carries any auth/requestContext-derived identity.
+            stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
             const workflowResult = await this.maybeHandleWorkflowStreamRequest({
                 options,
                 startTime,
@@ -5585,6 +5630,19 @@ Current user's request: ${currentInput}`;
             if (workflowResult) {
                 return workflowResult;
             }
+            // Make neurolink.stream the active span so every provider span (generations,
+            // tool calls) parents under it — one Langfuse trace per turn, not a forest.
+            const streamSpanContext = trace.setSpan(context.active(), streamSpan);
+            // Pre-call tool routing: run inside the stream-span + Langfuse context so
+            // the router's own generation span nests under this turn's trace instead
+            // of starting a separate one. Asks a cheap router LLM which tool servers
+            // the query needs and appends the unpicked servers' tools to
+            // `excludeTools`. Fails open (no exclusions). Routes on the current
+            // prompt enriched with a bounded window of recent conversation turns
+            // (pulled from conversation memory) so contextless follow-ups still
+            // classify correctly. After the workflow short-circuit, so workflow
+            // streams skip it.
+            await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
             // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
             // accumulated response into a single audio chunk at end-of-stream and
             // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5599,7 +5657,7 @@ Current user's request: ${currentInput}`;
                     resolveStreamTtsAudio = resolve;
                 })
                 : undefined;
-            const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
+            const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
                 options,
                 streamSpan,
                 spanStartTime,
@@ -5608,7 +5666,7 @@ Current user's request: ${currentInput}`;
                 streamId,
                 originalPrompt,
                 ttsResolver: resolveStreamTtsAudio,
-            }));
+            })));
             if (streamSttTranscription) {
                 streamResult.transcription = streamSttTranscription;
             }
@@ -5629,6 +5687,172 @@ Current user's request: ${currentInput}`;
             throw error;
         }
     }
+    /**
+     * Pre-call tool routing for stream(): runs the router LLM once per turn
+     * and appends the unpicked servers' registered tool names to
+     * `options.excludeTools` — the per-call denylist enforced by
+     * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
+     * is true and a non-empty server catalog has been supplied. Never throws
+     * (the resolver fails open to an empty exclusion list).
+     */
+    async applyToolRoutingExclusions(options, userQuery) {
+        const routingConfig = this.toolRoutingConfig;
+        if (!routingConfig?.enabled || options.disableTools) {
+            return;
+        }
+        const servers = routingConfig.servers ?? [];
+        if (servers.length === 0) {
+            return;
+        }
+        // Whole setup is fail-open: catalog building (getCustomTools /
+        // buildToolRoutingCatalog) and the router call degrade to no exclusions
+        // rather than killing the stream, honoring this method's "never throws"
+        // contract. Genuine stream cancellations still propagate.
+        try {
+            const registeredToolNames = Array.from(this.getCustomTools().keys());
+            const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
+            if (catalog.length === 0) {
+                return;
+            }
+            // Fold a bounded window of recent conversation turns into the routing query.
+            // The router runs pre-memory and would otherwise see only this turn's raw
+            // text, so a contextless follow-up ("yes please") gives it nothing to
+            // classify — it fails open and routing narrows nothing. The main model
+            // still receives full history later via conversation memory; this only
+            // enriches the router's view. Fails open to the current query alone.
+            const recentMessages = await this.fetchRecentRoutingHistory(options);
+            const routingQuery = recentMessages.length > 0
+                ? buildRoutingQueryFromHistory(recentMessages, userQuery)
+                : userQuery;
+            // The router call below re-enters the public generate(), whose finally
+            // block resets _disableToolCacheForCurrentRequest to false. That flag is
+            // stream-scoped (set at the top of this turn) and read by the main tool
+            // execution path that runs after routing, so save it before the router
+            // call and restore it afterward to keep the turn's cache setting intact.
+            const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
+            let routedExcludeTools;
+            try {
+                routedExcludeTools = await resolveToolRoutingExclusions({
+                    catalog,
+                    alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
+                    userQuery: routingQuery,
+                    routerPromptPrefix: routingConfig.routerPromptPrefix,
+                    routerModel: {
+                        provider: routingConfig.routerModel?.provider ??
+                            options.provider,
+                        model: routingConfig.routerModel?.model ?? options.model,
+                        region: routingConfig.routerModel?.region ?? options.region,
+                        temperature: routingConfig.routerModel?.temperature,
+                    },
+                    timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
+                    // Forward the stream's abort signal so a cancelled stream aborts the
+                    // router call promptly instead of waiting out the routing timeout.
+                    generateFn: (generateOptions) => this.generate({
+                        ...generateOptions,
+                        abortSignal: options.abortSignal,
+                    }),
+                });
+            }
+            finally {
+                this._disableToolCacheForCurrentRequest =
+                    cacheDisabledForCurrentRequest;
+            }
+            // Aborted during the router call — skip applying now-stale exclusions;
+            // the main generation path enforces the abort itself.
+            if (options.abortSignal?.aborted) {
+                return;
+            }
+            if (routedExcludeTools.length > 0) {
+                options.excludeTools = [
+                    ...(options.excludeTools ?? []),
+                    ...routedExcludeTools,
+                ];
+            }
+        }
+        catch (error) {
+            if (isAbortError(error)) {
+                throw error;
+            }
+            logger.warn("[ToolRouting] Routing setup failed, failing open", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+        }
+    }
+    /**
+     * Loads a bounded window of prior conversation turns for the router so a
+     * follow-up turn carries the context it needs to classify intent. Reads this
+     * turn's conversation memory (keyed by `context.sessionId`) with
+     * summarization disabled to keep the router cheap. Fails open to an empty
+     * list — routing then falls back to the current query alone (prior
+     * behaviour). On the first turn of a conversation memory may not be
+     * initialised yet; that also yields an empty list, which is fine since the
+     * opening message already carries its own context.
+     */
+    async fetchRecentRoutingHistory(options) {
+        try {
+            const requestContext = options.context;
+            // Inline multi-turn callers pass prior turns via options.conversationMessages
+            // (the same field the main model reads) rather than server-side session
+            // memory. Honor it directly so a contextless follow-up still routes with
+            // context even when no sessionId is present.
+            if (options.conversationMessages &&
+                options.conversationMessages.length > 0) {
+                return options.conversationMessages;
+            }
+            const sessionId = requestContext?.sessionId;
+            if (typeof sessionId !== "string" || !sessionId) {
+                return [];
+            }
+            // The pre-call router runs earlier in the stream pipeline than the main
+            // generation path's own memory init (initializeConversationMemoryForGeneration),
+            // so this.conversationMemory is still undefined at router time and the
+            // router would only ever see the current turn. Trigger the same lazy init
+            // the main path uses — it is idempotent, so the later call is a no-op —
+            // so the router can read prior turns. Fails open via the surrounding catch.
+            await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
+            const memory = this.conversationMemory;
+            if (!memory) {
+                return [];
+            }
+            // Reuse the SAME reader the main model uses so the router sees identically
+            // curated history: polluted turns dropped, read instrumented under the
+            // neurolink.conversation.getMessages span. enableSummarization=false keeps
+            // routing cheap and free of any summary-LLM side effect. The remaining
+            // tool_call/tool_result turns are dropped at transcript-render time
+            // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
+            const messages = await getConversationMessages(memory, {
+                ...options,
+                enableSummarization: false,
+            });
+            logger.debug("[ToolRouting] Loaded conversation history for router", {
+                sessionId,
+                messageCount: messages.length,
+            });
+            return messages;
+        }
+        catch (error) {
+            logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+            return [];
+        }
+    }
+    /**
+     * Supplies (or replaces) the pre-call tool routing server catalog.
+     *
+     * For hosts that only know their tool servers after constructing NeuroLink
+     * (e.g. tools are registered per session/conversation). Routing must still
+     * be enabled via the constructor's `toolRouting.enabled` — setting servers
+     * alone does not activate it.
+     */
+    setToolRoutingServers(servers) {
+        if (!this.toolRoutingConfig) {
+            logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
+            this.toolRoutingConfig = { enabled: false, servers };
+            return;
+        }
+        this.toolRoutingConfig.servers = servers;
+    }
     async validateStreamRequestOptions(options, startTime) {
         await this.validateStreamInput(options);
         this.enforceSessionBudget(options.maxBudgetUsd);
@@ -6512,7 +6736,7 @@ Current user's request: ${currentInput}`;
             }
         }
         if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
-            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
         }
     }
     /**