@juspay/neurolink 9.71.0 → 9.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/neurolink.js CHANGED
@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
28
28
  import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
29
29
  import { ContextBudgetExceededError } from "./context/errors.js";
30
30
  import { repairToolPairs } from "./context/toolPairRepair.js";
31
- import { SYSTEM_LIMITS } from "./core/constants.js";
31
+ import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
32
32
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
33
+ import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
33
34
  import { AIProviderFactory } from "./core/factory.js";
34
35
  import { createToolEventPayload } from "./core/toolEvents.js";
35
36
  import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
436
437
  conversationMemory;
437
438
  conversationMemoryNeedsInit = false;
438
439
  conversationMemoryConfig;
440
+ // Pre-call tool routing: instance-level config from the constructor.
441
+ // The server catalog inside it can be supplied/replaced later via
442
+ // setToolRoutingServers() for hosts that register tools after construction.
443
+ toolRoutingConfig;
439
444
  // Add orchestration property
440
445
  enableOrchestration;
441
446
  // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
842
847
  if (config?.modelChain) {
843
848
  this.fallbackConfig.modelChain = config.modelChain;
844
849
  }
850
+ if (config?.toolRouting) {
851
+ // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
852
+ // can't leak into the caller's config object, which may be shared across
853
+ // multiple NeuroLink instances.
854
+ this.toolRoutingConfig = { ...config.toolRouting };
855
+ }
845
856
  logger.setEventEmitter(this.emitter);
846
857
  // Read tool cache duration from environment variables, with a default
847
858
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -5622,6 +5633,16 @@ Current user's request: ${currentInput}`;
5622
5633
  // Make neurolink.stream the active span so every provider span (generations,
5623
5634
  // tool calls) parents under it — one Langfuse trace per turn, not a forest.
5624
5635
  const streamSpanContext = trace.setSpan(context.active(), streamSpan);
5636
+ // Pre-call tool routing: run inside the stream-span + Langfuse context so
5637
+ // the router's own generation span nests under this turn's trace instead
5638
+ // of starting a separate one. Asks a cheap router LLM which tool servers
5639
+ // the query needs and appends the unpicked servers' tools to
5640
+ // `excludeTools`. Fails open (no exclusions). Routes on the current
5641
+ // prompt enriched with a bounded window of recent conversation turns
5642
+ // (pulled from conversation memory) so contextless follow-ups still
5643
+ // classify correctly. After the workflow short-circuit, so workflow
5644
+ // streams skip it.
5645
+ await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
5625
5646
  // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
5626
5647
  // accumulated response into a single audio chunk at end-of-stream and
5627
5648
  // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5666,6 +5687,172 @@ Current user's request: ${currentInput}`;
5666
5687
  throw error;
5667
5688
  }
5668
5689
  }
5690
+ /**
5691
+ * Pre-call tool routing for stream(): runs the router LLM once per turn
5692
+ * and appends the unpicked servers' registered tool names to
5693
+ * `options.excludeTools` — the per-call denylist enforced by
5694
+ * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
5695
+ * is true and a non-empty server catalog has been supplied. Never throws
5696
+ * (the resolver fails open to an empty exclusion list).
5697
+ */
5698
+ async applyToolRoutingExclusions(options, userQuery) {
5699
+ const routingConfig = this.toolRoutingConfig;
5700
+ if (!routingConfig?.enabled || options.disableTools) {
5701
+ return;
5702
+ }
5703
+ const servers = routingConfig.servers ?? [];
5704
+ if (servers.length === 0) {
5705
+ return;
5706
+ }
5707
+ // Whole setup is fail-open: catalog building (getCustomTools /
5708
+ // buildToolRoutingCatalog) and the router call degrade to no exclusions
5709
+ // rather than killing the stream, honoring this method's "never throws"
5710
+ // contract. Genuine stream cancellations still propagate.
5711
+ try {
5712
+ const registeredToolNames = Array.from(this.getCustomTools().keys());
5713
+ const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
5714
+ if (catalog.length === 0) {
5715
+ return;
5716
+ }
5717
+ // Fold a bounded window of recent conversation turns into the routing query.
5718
+ // The router runs pre-memory and would otherwise see only this turn's raw
5719
+ // text, so a contextless follow-up ("yes please") gives it nothing to
5720
+ // classify — it fails open and routing narrows nothing. The main model
5721
+ // still receives full history later via conversation memory; this only
5722
+ // enriches the router's view. Fails open to the current query alone.
5723
+ const recentMessages = await this.fetchRecentRoutingHistory(options);
5724
+ const routingQuery = recentMessages.length > 0
5725
+ ? buildRoutingQueryFromHistory(recentMessages, userQuery)
5726
+ : userQuery;
5727
+ // The router call below re-enters the public generate(), whose finally
5728
+ // block resets _disableToolCacheForCurrentRequest to false. That flag is
5729
+ // stream-scoped (set at the top of this turn) and read by the main tool
5730
+ // execution path that runs after routing, so save it before the router
5731
+ // call and restore it afterward to keep the turn's cache setting intact.
5732
+ const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
5733
+ let routedExcludeTools;
5734
+ try {
5735
+ routedExcludeTools = await resolveToolRoutingExclusions({
5736
+ catalog,
5737
+ alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
5738
+ userQuery: routingQuery,
5739
+ routerPromptPrefix: routingConfig.routerPromptPrefix,
5740
+ routerModel: {
5741
+ provider: routingConfig.routerModel?.provider ??
5742
+ options.provider,
5743
+ model: routingConfig.routerModel?.model ?? options.model,
5744
+ region: routingConfig.routerModel?.region ?? options.region,
5745
+ temperature: routingConfig.routerModel?.temperature,
5746
+ },
5747
+ timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
5748
+ // Forward the stream's abort signal so a cancelled stream aborts the
5749
+ // router call promptly instead of waiting out the routing timeout.
5750
+ generateFn: (generateOptions) => this.generate({
5751
+ ...generateOptions,
5752
+ abortSignal: options.abortSignal,
5753
+ }),
5754
+ });
5755
+ }
5756
+ finally {
5757
+ this._disableToolCacheForCurrentRequest =
5758
+ cacheDisabledForCurrentRequest;
5759
+ }
5760
+ // Aborted during the router call — skip applying now-stale exclusions;
5761
+ // the main generation path enforces the abort itself.
5762
+ if (options.abortSignal?.aborted) {
5763
+ return;
5764
+ }
5765
+ if (routedExcludeTools.length > 0) {
5766
+ options.excludeTools = [
5767
+ ...(options.excludeTools ?? []),
5768
+ ...routedExcludeTools,
5769
+ ];
5770
+ }
5771
+ }
5772
+ catch (error) {
5773
+ if (isAbortError(error)) {
5774
+ throw error;
5775
+ }
5776
+ logger.warn("[ToolRouting] Routing setup failed, failing open", {
5777
+ error: error instanceof Error ? error.message : String(error),
5778
+ });
5779
+ }
5780
+ }
5781
+ /**
5782
+ * Loads a bounded window of prior conversation turns for the router so a
5783
+ * follow-up turn carries the context it needs to classify intent. Reads this
5784
+ * turn's conversation memory (keyed by `context.sessionId`) with
5785
+ * summarization disabled to keep the router cheap. Fails open to an empty
5786
+ * list — routing then falls back to the current query alone (prior
5787
+ * behaviour). On the first turn of a conversation memory may not be
5788
+ * initialised yet; that also yields an empty list, which is fine since the
5789
+ * opening message already carries its own context.
5790
+ */
5791
+ async fetchRecentRoutingHistory(options) {
5792
+ try {
5793
+ const requestContext = options.context;
5794
+ // Inline multi-turn callers pass prior turns via options.conversationMessages
5795
+ // (the same field the main model reads) rather than server-side session
5796
+ // memory. Honor it directly so a contextless follow-up still routes with
5797
+ // context even when no sessionId is present.
5798
+ if (options.conversationMessages &&
5799
+ options.conversationMessages.length > 0) {
5800
+ return options.conversationMessages;
5801
+ }
5802
+ const sessionId = requestContext?.sessionId;
5803
+ if (typeof sessionId !== "string" || !sessionId) {
5804
+ return [];
5805
+ }
5806
+ // The pre-call router runs earlier in the stream pipeline than the main
5807
+ // generation path's own memory init (initializeConversationMemoryForGeneration),
5808
+ // so this.conversationMemory is still undefined at router time and the
5809
+ // router would only ever see the current turn. Trigger the same lazy init
5810
+ // the main path uses — it is idempotent, so the later call is a no-op —
5811
+ // so the router can read prior turns. Fails open via the surrounding catch.
5812
+ await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
5813
+ const memory = this.conversationMemory;
5814
+ if (!memory) {
5815
+ return [];
5816
+ }
5817
+ // Reuse the SAME reader the main model uses so the router sees identically
5818
+ // curated history: polluted turns dropped, read instrumented under the
5819
+ // neurolink.conversation.getMessages span. enableSummarization=false keeps
5820
+ // routing cheap and free of any summary-LLM side effect. The remaining
5821
+ // tool_call/tool_result turns are dropped at transcript-render time
5822
+ // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
5823
+ const messages = await getConversationMessages(memory, {
5824
+ ...options,
5825
+ enableSummarization: false,
5826
+ });
5827
+ logger.debug("[ToolRouting] Loaded conversation history for router", {
5828
+ sessionId,
5829
+ messageCount: messages.length,
5830
+ });
5831
+ return messages;
5832
+ }
5833
+ catch (error) {
5834
+ logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
5835
+ error: error instanceof Error ? error.message : String(error),
5836
+ });
5837
+ return [];
5838
+ }
5839
+ }
5840
+ /**
5841
+ * Supplies (or replaces) the pre-call tool routing server catalog.
5842
+ *
5843
+ * For hosts that only know their tool servers after constructing NeuroLink
5844
+ * (e.g. tools are registered per session/conversation). Routing must still
5845
+ * be enabled via the constructor's `toolRouting.enabled` — setting servers
5846
+ * alone does not activate it.
5847
+ */
5848
+ setToolRoutingServers(servers) {
5849
+ if (!this.toolRoutingConfig) {
5850
+ logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
5851
+ this.toolRoutingConfig = { enabled: false, servers };
5852
+ return;
5853
+ }
5854
+ this.toolRoutingConfig.servers = servers;
5855
+ }
5669
5856
  async validateStreamRequestOptions(options, startTime) {
5670
5857
  await this.validateStreamInput(options);
5671
5858
  this.enforceSessionBudget(options.maxBudgetUsd);
@@ -156,7 +156,9 @@ export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
156
156
  serialized = String(value);
157
157
  }
158
158
  if (serialized.length > maxChars) {
159
- return `${serialized.slice(0, maxChars)}...[truncated ${serialized.length - maxChars} chars]`;
159
+ const truncationSuffix = `...[truncated ${serialized.length - maxChars} chars]`;
160
+ const keepLength = Math.max(0, maxChars - truncationSuffix.length);
161
+ return `${serialized.slice(0, keepLength)}${truncationSuffix}`;
160
162
  }
161
163
  return serialized;
162
164
  }
@@ -9,6 +9,7 @@ import type { ConversationMemoryConfig } from "./conversation.js";
9
9
  import type { ObservabilityConfig } from "./observability.js";
10
10
  import type { AuthProvider, AuthProviderType, AuthProviderConfig, Auth0Config, ClerkConfig, FirebaseConfig, SupabaseConfig, WorkOSConfig, BetterAuthConfig, JWTConfig, OAuth2Config, CognitoConfig, KeycloakConfig, AuthenticatedContext } from "./auth.js";
11
11
  import type { NeurolinkCredentials } from "./providers.js";
12
+ import type { ToolRoutingConfig } from "./toolRouting.js";
12
13
  /**
13
14
  * Main NeuroLink configuration type
14
15
  */
@@ -66,6 +67,13 @@ export type NeurolinkConstructorConfig = {
66
67
  * provider is preserved across the chain; only the model name changes.
67
68
  */
68
69
  modelChain?: string[];
70
+ /**
71
+ * Pre-call tool routing: a cheap router LLM picks the tool servers
72
+ * relevant to each stream() turn and the unpicked servers' tools are
73
+ * dropped from the request via `excludeTools`. Fails open (all tools) on
74
+ * any router failure. See {@link ToolRoutingConfig}.
75
+ */
76
+ toolRouting?: ToolRoutingConfig;
69
77
  };
70
78
  /**
71
79
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.
@@ -50,6 +50,7 @@ export * from "./stream.js";
50
50
  export * from "./subscription.js";
51
51
  export * from "./task.js";
52
52
  export * from "./taskClassification.js";
53
+ export * from "./toolRouting.js";
53
54
  export * from "./tools.js";
54
55
  export * from "./voice.js";
55
56
  export * from "./universalProviderOptions.js";
@@ -51,6 +51,7 @@ export * from "./stream.js";
51
51
  export * from "./subscription.js";
52
52
  export * from "./task.js";
53
53
  export * from "./taskClassification.js";
54
+ export * from "./toolRouting.js";
54
55
  export * from "./tools.js";
55
56
  export * from "./voice.js";
56
57
  export * from "./universalProviderOptions.js";
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Pre-call tool routing — configuration and catalog types.
3
+ *
4
+ * Host applications can register large numbers of custom tools (typically MCP
5
+ * server tools) whose names are prefixed with their server id
6
+ * (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
7
+ * LLM call runs once per `stream()` turn, picks the servers relevant to the
8
+ * user query, and the tools of every unpicked server are appended to the
9
+ * request's `excludeTools` denylist before the main model call.
10
+ *
11
+ * Denylist semantics are deliberate: the router only knows the declared
12
+ * server catalog — a strict subset of the real tool set. Excluding unpicked
13
+ * servers leaves NeuroLink's built-in direct tools, always-include servers,
14
+ * and any tools outside the catalog untouched. The whole mechanism fails
15
+ * open: any router failure resolves to an empty exclusion list (all tools),
16
+ * identical to routing being disabled.
17
+ */
18
+ import type { GenerateOptions, GenerateResult } from "./generate.js";
19
+ /** One routable server as declared by the host application. */
20
+ export type ToolRoutingServerDescriptor = {
21
+ /**
22
+ * Server id. Must be the prefix used when the host registered the server's
23
+ * tools (`${id}_${toolName}`) — tool names are grouped by this prefix.
24
+ */
25
+ id: string;
26
+ /** Routing-grade server description shown to the router LLM. */
27
+ description: string;
28
+ };
29
+ /**
30
+ * LLM settings for the router call. Fields omitted here fall back to the
31
+ * stream call's own provider/model/region, so the router uses the same model
32
+ * as the main chat call unless explicitly overridden.
33
+ */
34
+ export type ToolRoutingModelConfig = {
35
+ provider?: string;
36
+ model?: string;
37
+ region?: string;
38
+ /** Router sampling temperature. Default: 0. */
39
+ temperature?: number;
40
+ };
41
+ /** Constructor-level configuration for pre-call tool routing. */
42
+ export type ToolRoutingConfig = {
43
+ /** Master switch. Routing runs only when true AND the server catalog is non-empty. */
44
+ enabled: boolean;
45
+ /**
46
+ * Routable server catalog. Hosts that only know their servers after
47
+ * constructing NeuroLink can supply it later via
48
+ * `neurolink.setToolRoutingServers()` instead.
49
+ */
50
+ servers?: ToolRoutingServerDescriptor[];
51
+ /**
52
+ * Server ids whose tools are always kept and never offered to the router
53
+ * (e.g. utility / reasoning / chart servers every turn may need).
54
+ */
55
+ alwaysIncludeServerIds?: string[];
56
+ /** Hard ceiling for the router LLM call before failing open. Default: 15000. */
57
+ timeoutMs?: number;
58
+ /** Router LLM override. Defaults to the stream call's provider/model/region at temperature 0. */
59
+ routerModel?: ToolRoutingModelConfig;
60
+ /**
61
+ * Override for the instruction text placed before the user query in the
62
+ * router prompt (role + task framing). When omitted, the SDK built-in
63
+ * default is used. The server catalog, user query, and output rules are
64
+ * always appended by the SDK regardless of this value.
65
+ */
66
+ routerPromptPrefix?: string;
67
+ };
68
+ /** Catalog entry pairing a server descriptor with its registered tool names. */
69
+ export type ToolRoutingCatalogEntry = {
70
+ id: string;
71
+ description: string;
72
+ /** Registered tool names for this server, i.e. `${serverId}_${toolName}`. */
73
+ toolNames: string[];
74
+ };
75
+ /** Parameters for `resolveToolRoutingExclusions()`. */
76
+ export type ToolRoutingResolutionParams = {
77
+ /** Full catalog; always-include servers are filtered out internally. */
78
+ catalog: ToolRoutingCatalogEntry[];
79
+ /** Server ids never offered to the router. */
80
+ alwaysIncludeServerIds: string[];
81
+ /** Current user query (the stream input text, before memory enrichment). */
82
+ userQuery: string;
83
+ /** Instruction text placed before the user query. Defaults to the SDK built-in. */
84
+ routerPromptPrefix?: string;
85
+ /** Router LLM settings, already resolved against the stream call's options. */
86
+ routerModel: ToolRoutingModelConfig;
87
+ /** Timeout for the router call in milliseconds. */
88
+ timeoutMs: number;
89
+ /** Invokes the router LLM — `NeuroLink.generate` bound by the caller. */
90
+ generateFn: (options: GenerateOptions) => Promise<GenerateResult>;
91
+ };
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Pre-call tool routing — configuration and catalog types.
3
+ *
4
+ * Host applications can register large numbers of custom tools (typically MCP
5
+ * server tools) whose names are prefixed with their server id
6
+ * (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
7
+ * LLM call runs once per `stream()` turn, picks the servers relevant to the
8
+ * user query, and the tools of every unpicked server are appended to the
9
+ * request's `excludeTools` denylist before the main model call.
10
+ *
11
+ * Denylist semantics are deliberate: the router only knows the declared
12
+ * server catalog — a strict subset of the real tool set. Excluding unpicked
13
+ * servers leaves NeuroLink's built-in direct tools, always-include servers,
14
+ * and any tools outside the catalog untouched. The whole mechanism fails
15
+ * open: any router failure resolves to an empty exclusion list (all tools),
16
+ * identical to routing being disabled.
17
+ */
18
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.71.0",
3
+ "version": "9.72.0",
4
4
  "packageManager": "pnpm@10.15.1",
5
5
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applications with 21+ providers: OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, Azure OpenAI, Mistral, LiteLLM, SageMaker, Hugging Face, Ollama, OpenAI-compatible, OpenRouter, DeepSeek, NVIDIA NIM, LM Studio, llama.cpp, plus voice (OpenAI TTS, ElevenLabs, Deepgram, Azure Speech).",
6
6
  "author": {