npm - @juspay/neurolink - Versions diffs - 9.71.0 → 9.72.0 - Mend

@juspay/neurolink 9.71.0 → 9.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +6 -0
package/dist/browser/neurolink.min.js +330 -312
package/dist/core/constants.d.ts +1 -0
package/dist/core/constants.js +2 -0
package/dist/core/toolRouting.d.ts +59 -0
package/dist/core/toolRouting.js +232 -0
package/dist/lib/core/constants.d.ts +1 -0
package/dist/lib/core/constants.js +2 -0
package/dist/lib/core/toolRouting.d.ts +59 -0
package/dist/lib/core/toolRouting.js +233 -0
package/dist/lib/neurolink.d.ts +31 -1
package/dist/lib/neurolink.js +188 -1
package/dist/lib/telemetry/attributes.js +3 -1
package/dist/lib/types/config.d.ts +8 -0
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/toolRouting.d.ts +91 -0
package/dist/lib/types/toolRouting.js +19 -0
package/dist/neurolink.d.ts +31 -1
package/dist/neurolink.js +188 -1
package/dist/telemetry/attributes.js +3 -1
package/dist/types/config.d.ts +8 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/toolRouting.d.ts +91 -0
package/dist/types/toolRouting.js +18 -0
package/package.json +1 -1

package/dist/neurolink.js CHANGED Viewed

@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
 import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
 import { ContextBudgetExceededError } from "./context/errors.js";
 import { repairToolPairs } from "./context/toolPairRepair.js";
-import { SYSTEM_LIMITS } from "./core/constants.js";
+import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
 import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
+import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
 import { AIProviderFactory } from "./core/factory.js";
 import { createToolEventPayload } from "./core/toolEvents.js";
 import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
     conversationMemory;
     conversationMemoryNeedsInit = false;
     conversationMemoryConfig;
+    // Pre-call tool routing: instance-level config from the constructor.
+    // The server catalog inside it can be supplied/replaced later via
+    // setToolRoutingServers() for hosts that register tools after construction.
+    toolRoutingConfig;
     // Add orchestration property
     enableOrchestration;
     // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
         if (config?.modelChain) {
             this.fallbackConfig.modelChain = config.modelChain;
         }
+        if (config?.toolRouting) {
+            // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
+            // can't leak into the caller's config object, which may be shared across
+            // multiple NeuroLink instances.
+            this.toolRoutingConfig = { ...config.toolRouting };
+        }
         logger.setEventEmitter(this.emitter);
         // Read tool cache duration from environment variables, with a default
         const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -5622,6 +5633,16 @@ Current user's request: ${currentInput}`;
             // Make neurolink.stream the active span so every provider span (generations,
             // tool calls) parents under it — one Langfuse trace per turn, not a forest.
             const streamSpanContext = trace.setSpan(context.active(), streamSpan);
+            // Pre-call tool routing: run inside the stream-span + Langfuse context so
+            // the router's own generation span nests under this turn's trace instead
+            // of starting a separate one. Asks a cheap router LLM which tool servers
+            // the query needs and appends the unpicked servers' tools to
+            // `excludeTools`. Fails open (no exclusions). Routes on the current
+            // prompt enriched with a bounded window of recent conversation turns
+            // (pulled from conversation memory) so contextless follow-ups still
+            // classify correctly. After the workflow short-circuit, so workflow
+            // streams skip it.
+            await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
             // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
             // accumulated response into a single audio chunk at end-of-stream and
             // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5666,6 +5687,172 @@ Current user's request: ${currentInput}`;
             throw error;
         }
     }
+    /**
+     * Pre-call tool routing for stream(): runs the router LLM once per turn
+     * and appends the unpicked servers' registered tool names to
+     * `options.excludeTools` — the per-call denylist enforced by
+     * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
+     * is true and a non-empty server catalog has been supplied. Never throws
+     * (the resolver fails open to an empty exclusion list).
+     */
+    async applyToolRoutingExclusions(options, userQuery) {
+        const routingConfig = this.toolRoutingConfig;
+        if (!routingConfig?.enabled || options.disableTools) {
+            return;
+        }
+        const servers = routingConfig.servers ?? [];
+        if (servers.length === 0) {
+            return;
+        }
+        // Whole setup is fail-open: catalog building (getCustomTools /
+        // buildToolRoutingCatalog) and the router call degrade to no exclusions
+        // rather than killing the stream, honoring this method's "never throws"
+        // contract. Genuine stream cancellations still propagate.
+        try {
+            const registeredToolNames = Array.from(this.getCustomTools().keys());
+            const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
+            if (catalog.length === 0) {
+                return;
+            }
+            // Fold a bounded window of recent conversation turns into the routing query.
+            // The router runs pre-memory and would otherwise see only this turn's raw
+            // text, so a contextless follow-up ("yes please") gives it nothing to
+            // classify — it fails open and routing narrows nothing. The main model
+            // still receives full history later via conversation memory; this only
+            // enriches the router's view. Fails open to the current query alone.
+            const recentMessages = await this.fetchRecentRoutingHistory(options);
+            const routingQuery = recentMessages.length > 0
+                ? buildRoutingQueryFromHistory(recentMessages, userQuery)
+                : userQuery;
+            // The router call below re-enters the public generate(), whose finally
+            // block resets _disableToolCacheForCurrentRequest to false. That flag is
+            // stream-scoped (set at the top of this turn) and read by the main tool
+            // execution path that runs after routing, so save it before the router
+            // call and restore it afterward to keep the turn's cache setting intact.
+            const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
+            let routedExcludeTools;
+            try {
+                routedExcludeTools = await resolveToolRoutingExclusions({
+                    catalog,
+                    alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
+                    userQuery: routingQuery,
+                    routerPromptPrefix: routingConfig.routerPromptPrefix,
+                    routerModel: {
+                        provider: routingConfig.routerModel?.provider ??
+                            options.provider,
+                        model: routingConfig.routerModel?.model ?? options.model,
+                        region: routingConfig.routerModel?.region ?? options.region,
+                        temperature: routingConfig.routerModel?.temperature,
+                    },
+                    timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
+                    // Forward the stream's abort signal so a cancelled stream aborts the
+                    // router call promptly instead of waiting out the routing timeout.
+                    generateFn: (generateOptions) => this.generate({
+                        ...generateOptions,
+                        abortSignal: options.abortSignal,
+                    }),
+                });
+            }
+            finally {
+                this._disableToolCacheForCurrentRequest =
+                    cacheDisabledForCurrentRequest;
+            }
+            // Aborted during the router call — skip applying now-stale exclusions;
+            // the main generation path enforces the abort itself.
+            if (options.abortSignal?.aborted) {
+                return;
+            }
+            if (routedExcludeTools.length > 0) {
+                options.excludeTools = [
+                    ...(options.excludeTools ?? []),
+                    ...routedExcludeTools,
+                ];
+            }
+        }
+        catch (error) {
+            if (isAbortError(error)) {
+                throw error;
+            }
+            logger.warn("[ToolRouting] Routing setup failed, failing open", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+        }
+    }
+    /**
+     * Loads a bounded window of prior conversation turns for the router so a
+     * follow-up turn carries the context it needs to classify intent. Reads this
+     * turn's conversation memory (keyed by `context.sessionId`) with
+     * summarization disabled to keep the router cheap. Fails open to an empty
+     * list — routing then falls back to the current query alone (prior
+     * behaviour). On the first turn of a conversation memory may not be
+     * initialised yet; that also yields an empty list, which is fine since the
+     * opening message already carries its own context.
+     */
+    async fetchRecentRoutingHistory(options) {
+        try {
+            const requestContext = options.context;
+            // Inline multi-turn callers pass prior turns via options.conversationMessages
+            // (the same field the main model reads) rather than server-side session
+            // memory. Honor it directly so a contextless follow-up still routes with
+            // context even when no sessionId is present.
+            if (options.conversationMessages &&
+                options.conversationMessages.length > 0) {
+                return options.conversationMessages;
+            }
+            const sessionId = requestContext?.sessionId;
+            if (typeof sessionId !== "string" || !sessionId) {
+                return [];
+            }
+            // The pre-call router runs earlier in the stream pipeline than the main
+            // generation path's own memory init (initializeConversationMemoryForGeneration),
+            // so this.conversationMemory is still undefined at router time and the
+            // router would only ever see the current turn. Trigger the same lazy init
+            // the main path uses — it is idempotent, so the later call is a no-op —
+            // so the router can read prior turns. Fails open via the surrounding catch.
+            await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
+            const memory = this.conversationMemory;
+            if (!memory) {
+                return [];
+            }
+            // Reuse the SAME reader the main model uses so the router sees identically
+            // curated history: polluted turns dropped, read instrumented under the
+            // neurolink.conversation.getMessages span. enableSummarization=false keeps
+            // routing cheap and free of any summary-LLM side effect. The remaining
+            // tool_call/tool_result turns are dropped at transcript-render time
+            // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
+            const messages = await getConversationMessages(memory, {
+                ...options,
+                enableSummarization: false,
+            });
+            logger.debug("[ToolRouting] Loaded conversation history for router", {
+                sessionId,
+                messageCount: messages.length,
+            });
+            return messages;
+        }
+        catch (error) {
+            logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+            return [];
+        }
+    }
+    /**
+     * Supplies (or replaces) the pre-call tool routing server catalog.
+     *
+     * For hosts that only know their tool servers after constructing NeuroLink
+     * (e.g. tools are registered per session/conversation). Routing must still
+     * be enabled via the constructor's `toolRouting.enabled` — setting servers
+     * alone does not activate it.
+     */
+    setToolRoutingServers(servers) {
+        if (!this.toolRoutingConfig) {
+            logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
+            this.toolRoutingConfig = { enabled: false, servers };
+            return;
+        }
+        this.toolRoutingConfig.servers = servers;
+    }
     async validateStreamRequestOptions(options, startTime) {
         await this.validateStreamInput(options);
         this.enforceSessionBudget(options.maxBudgetUsd);

package/dist/telemetry/attributes.js CHANGED Viewed

@@ -156,7 +156,9 @@ export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
         serialized = String(value);
     }
     if (serialized.length > maxChars) {
-        return `${serialized.slice(0, maxChars)}...[truncated ${serialized.length - maxChars} chars]`;
+        const truncationSuffix = `...[truncated ${serialized.length - maxChars} chars]`;
+        const keepLength = Math.max(0, maxChars - truncationSuffix.length);
+        return `${serialized.slice(0, keepLength)}${truncationSuffix}`;
     }
     return serialized;
 }

package/dist/types/config.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type { ConversationMemoryConfig } from "./conversation.js";
 import type { ObservabilityConfig } from "./observability.js";
 import type { AuthProvider, AuthProviderType, AuthProviderConfig, Auth0Config, ClerkConfig, FirebaseConfig, SupabaseConfig, WorkOSConfig, BetterAuthConfig, JWTConfig, OAuth2Config, CognitoConfig, KeycloakConfig, AuthenticatedContext } from "./auth.js";
 import type { NeurolinkCredentials } from "./providers.js";
+import type { ToolRoutingConfig } from "./toolRouting.js";
 /**
  * Main NeuroLink configuration type
  */
@@ -66,6 +67,13 @@ export type NeurolinkConstructorConfig = {
      * provider is preserved across the chain; only the model name changes.
      */
     modelChain?: string[];
+    /**
+     * Pre-call tool routing: a cheap router LLM picks the tool servers
+     * relevant to each stream() turn and the unpicked servers' tools are
+     * dropped from the request via `excludeTools`. Fails open (all tools) on
+     * any router failure. See {@link ToolRoutingConfig}.
+     */
+    toolRouting?: ToolRoutingConfig;
 };
 /**
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.

package/dist/types/index.d.ts CHANGED Viewed

@@ -50,6 +50,7 @@ export * from "./stream.js";
 export * from "./subscription.js";
 export * from "./task.js";
 export * from "./taskClassification.js";
+export * from "./toolRouting.js";
 export * from "./tools.js";
 export * from "./voice.js";
 export * from "./universalProviderOptions.js";

package/dist/types/index.js CHANGED Viewed

@@ -51,6 +51,7 @@ export * from "./stream.js";
 export * from "./subscription.js";
 export * from "./task.js";
 export * from "./taskClassification.js";
+export * from "./toolRouting.js";
 export * from "./tools.js";
 export * from "./voice.js";
 export * from "./universalProviderOptions.js";

package/dist/types/toolRouting.d.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Pre-call tool routing — configuration and catalog types.
+ *
+ * Host applications can register large numbers of custom tools (typically MCP
+ * server tools) whose names are prefixed with their server id
+ * (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
+ * LLM call runs once per `stream()` turn, picks the servers relevant to the
+ * user query, and the tools of every unpicked server are appended to the
+ * request's `excludeTools` denylist before the main model call.
+ *
+ * Denylist semantics are deliberate: the router only knows the declared
+ * server catalog — a strict subset of the real tool set. Excluding unpicked
+ * servers leaves NeuroLink's built-in direct tools, always-include servers,
+ * and any tools outside the catalog untouched. The whole mechanism fails
+ * open: any router failure resolves to an empty exclusion list (all tools),
+ * identical to routing being disabled.
+ */
+import type { GenerateOptions, GenerateResult } from "./generate.js";
+/** One routable server as declared by the host application. */
+export type ToolRoutingServerDescriptor = {
+    /**
+     * Server id. Must be the prefix used when the host registered the server's
+     * tools (`${id}_${toolName}`) — tool names are grouped by this prefix.
+     */
+    id: string;
+    /** Routing-grade server description shown to the router LLM. */
+    description: string;
+};
+/**
+ * LLM settings for the router call. Fields omitted here fall back to the
+ * stream call's own provider/model/region, so the router uses the same model
+ * as the main chat call unless explicitly overridden.
+ */
+export type ToolRoutingModelConfig = {
+    provider?: string;
+    model?: string;
+    region?: string;
+    /** Router sampling temperature. Default: 0. */
+    temperature?: number;
+};
+/** Constructor-level configuration for pre-call tool routing. */
+export type ToolRoutingConfig = {
+    /** Master switch. Routing runs only when true AND the server catalog is non-empty. */
+    enabled: boolean;
+    /**
+     * Routable server catalog. Hosts that only know their servers after
+     * constructing NeuroLink can supply it later via
+     * `neurolink.setToolRoutingServers()` instead.
+     */
+    servers?: ToolRoutingServerDescriptor[];
+    /**
+     * Server ids whose tools are always kept and never offered to the router
+     * (e.g. utility / reasoning / chart servers every turn may need).
+     */
+    alwaysIncludeServerIds?: string[];
+    /** Hard ceiling for the router LLM call before failing open. Default: 15000. */
+    timeoutMs?: number;
+    /** Router LLM override. Defaults to the stream call's provider/model/region at temperature 0. */
+    routerModel?: ToolRoutingModelConfig;
+    /**
+     * Override for the instruction text placed before the user query in the
+     * router prompt (role + task framing). When omitted, the SDK built-in
+     * default is used. The server catalog, user query, and output rules are
+     * always appended by the SDK regardless of this value.
+     */
+    routerPromptPrefix?: string;
+};
+/** Catalog entry pairing a server descriptor with its registered tool names. */
+export type ToolRoutingCatalogEntry = {
+    id: string;
+    description: string;
+    /** Registered tool names for this server, i.e. `${serverId}_${toolName}`. */
+    toolNames: string[];
+};
+/** Parameters for `resolveToolRoutingExclusions()`. */
+export type ToolRoutingResolutionParams = {
+    /** Full catalog; always-include servers are filtered out internally. */
+    catalog: ToolRoutingCatalogEntry[];
+    /** Server ids never offered to the router. */
+    alwaysIncludeServerIds: string[];
+    /** Current user query (the stream input text, before memory enrichment). */
+    userQuery: string;
+    /** Instruction text placed before the user query. Defaults to the SDK built-in. */
+    routerPromptPrefix?: string;
+    /** Router LLM settings, already resolved against the stream call's options. */
+    routerModel: ToolRoutingModelConfig;
+    /** Timeout for the router call in milliseconds. */
+    timeoutMs: number;
+    /** Invokes the router LLM — `NeuroLink.generate` bound by the caller. */
+    generateFn: (options: GenerateOptions) => Promise<GenerateResult>;
+};

package/dist/types/toolRouting.js ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * Pre-call tool routing — configuration and catalog types.
+ *
+ * Host applications can register large numbers of custom tools (typically MCP
+ * server tools) whose names are prefixed with their server id
+ * (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
+ * LLM call runs once per `stream()` turn, picks the servers relevant to the
+ * user query, and the tools of every unpicked server are appended to the
+ * request's `excludeTools` denylist before the main model call.
+ *
+ * Denylist semantics are deliberate: the router only knows the declared
+ * server catalog — a strict subset of the real tool set. Excluding unpicked
+ * servers leaves NeuroLink's built-in direct tools, always-include servers,
+ * and any tools outside the catalog untouched. The whole mechanism fails
+ * open: any router failure resolves to an empty exclusion list (all tools),
+ * identical to routing being disabled.
+ */
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@juspay/neurolink",
-  "version": "9.71.0",
+  "version": "9.72.0",
   "packageManager": "pnpm@10.15.1",
   "description": "Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applications with 21+ providers: OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, Azure OpenAI, Mistral, LiteLLM, SageMaker, Hugging Face, Ollama, OpenAI-compatible, OpenRouter, DeepSeek, NVIDIA NIM, LM Studio, llama.cpp, plus voice (OpenAI TTS, ElevenLabs, Deepgram, Azure Speech).",
   "author": {