npm - @juspay/neurolink - Versions diffs - 9.71.0 → 9.73.0 - Mend

@juspay/neurolink 9.71.0 → 9.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +330 -312
package/dist/core/constants.d.ts +1 -0
package/dist/core/constants.js +2 -0
package/dist/core/toolRouting.d.ts +59 -0
package/dist/core/toolRouting.js +232 -0
package/dist/lib/core/constants.d.ts +1 -0
package/dist/lib/core/constants.js +2 -0
package/dist/lib/core/toolRouting.d.ts +59 -0
package/dist/lib/core/toolRouting.js +233 -0
package/dist/lib/neurolink.d.ts +31 -1
package/dist/lib/neurolink.js +188 -1
package/dist/lib/telemetry/attributes.js +3 -1
package/dist/lib/types/config.d.ts +8 -0
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/livekit.d.ts +134 -0
package/dist/lib/types/toolRouting.d.ts +91 -0
package/dist/lib/types/toolRouting.js +19 -0
package/dist/lib/voice/livekit/brain.js +1 -1
package/dist/lib/voice/livekit/config.d.ts +12 -1
package/dist/lib/voice/livekit/config.js +54 -0
package/dist/lib/voice/livekit/eventBridge.js +4 -4
package/dist/lib/voice/livekit/index.d.ts +9 -2
package/dist/lib/voice/livekit/index.js +9 -2
package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
package/dist/lib/voice/livekit/roomContext.js +57 -0
package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
package/dist/lib/voice/livekit/roomDispatch.js +31 -0
package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
package/dist/lib/voice/livekit/vertexAuth.js +73 -0
package/dist/lib/voice/livekit/voiceAgent.js +47 -37
package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
package/dist/neurolink.d.ts +31 -1
package/dist/neurolink.js +188 -1
package/dist/telemetry/attributes.js +3 -1
package/dist/types/config.d.ts +8 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/livekit.d.ts +134 -0
package/dist/types/toolRouting.d.ts +91 -0
package/dist/types/toolRouting.js +18 -0
package/dist/voice/livekit/brain.js +1 -1
package/dist/voice/livekit/config.d.ts +12 -1
package/dist/voice/livekit/config.js +54 -0
package/dist/voice/livekit/eventBridge.js +4 -4
package/dist/voice/livekit/index.d.ts +9 -2
package/dist/voice/livekit/index.js +9 -2
package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
package/dist/voice/livekit/realtimeEventBridge.js +160 -0
package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
package/dist/voice/livekit/realtimeMcpTools.js +193 -0
package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
package/dist/voice/livekit/roomContext.d.ts +23 -0
package/dist/voice/livekit/roomContext.js +56 -0
package/dist/voice/livekit/roomDispatch.d.ts +24 -0
package/dist/voice/livekit/roomDispatch.js +30 -0
package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
package/dist/voice/livekit/schemaSanitizer.js +143 -0
package/dist/voice/livekit/vertexAuth.d.ts +30 -0
package/dist/voice/livekit/vertexAuth.js +72 -0
package/dist/voice/livekit/voiceAgent.js +47 -37
package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
package/dist/voice/livekit/voiceAgentWorker.js +64 -0
package/package.json +2 -1

package/dist/lib/voice/livekit/vertexAuth.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Vertex authentication helpers for the realtime voice agent.
+ *
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
+ * Credentials (ADC). These helpers materialise ADC from the split
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
+ * any Gemini Developer API key from the environment so `@google/genai` uses
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+/**
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
+ *
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
+ * inline credentials), so this writes a temp service-account JSON and points
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
+ * fields are absent (auth then relies on ambient ADC).
+ */
+export declare function ensureVertexAdc(): void;
+/**
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
+ *
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
+ * ever talks to Vertex, so remove these keys (only affects this process).
+ */
+export declare function clearGeminiApiKeyEnv(): void;

package/dist/lib/voice/livekit/vertexAuth.js ADDED Viewed

@@ -0,0 +1,73 @@
+/**
+ * Vertex authentication helpers for the realtime voice agent.
+ *
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
+ * Credentials (ADC). These helpers materialise ADC from the split
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
+ * any Gemini Developer API key from the environment so `@google/genai` uses
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { logger } from "../../utils/logger.js";
+/**
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
+ *
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
+ * inline credentials), so this writes a temp service-account JSON and points
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
+ * fields are absent (auth then relies on ambient ADC).
+ */
+export function ensureVertexAdc() {
+    if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
+        return;
+    }
+    const clientEmail = process.env.GOOGLE_AUTH_CLIENT_EMAIL;
+    const rawPrivateKey = process.env.GOOGLE_AUTH_PRIVATE_KEY;
+    if (!clientEmail || !rawPrivateKey) {
+        logger.warn("[RealtimeVoiceAgent] No GOOGLE_APPLICATION_CREDENTIALS and no GOOGLE_AUTH_* fields — Vertex auth will rely on ambient ADC.");
+        return;
+    }
+    const credentials = {
+        type: process.env.GOOGLE_AUTH_TYPE ?? "service_account",
+        project_id: process.env.GOOGLE_AUTH_BREEZE_PROJECT_ID ??
+            process.env.GOOGLE_CLOUD_PROJECT_ID,
+        private_key_id: process.env.GOOGLE_AUTH_PRIVATE_KEY_ID,
+        private_key: rawPrivateKey.replace(/\\n/g, "\n"),
+        client_email: clientEmail,
+        token_uri: process.env.GOOGLE_AUTH_TOKEN_URI ??
+            "https://oauth2.googleapis.com/token",
+    };
+    const credentialsDir = mkdtempSync(path.join(os.tmpdir(), "vertex-adc-"));
+    const credentialsPath = path.join(credentialsDir, "adc.json");
+    writeFileSync(credentialsPath, JSON.stringify(credentials), {
+        mode: 0o600,
+        flag: "wx",
+    });
+    process.on("exit", () => {
+        rmSync(credentialsDir, { recursive: true, force: true });
+    });
+    process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
+    logger.info(`[RealtimeVoiceAgent] Vertex ADC written to ${credentialsPath} (project ${credentials.project_id}).`);
+}
+/**
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
+ *
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
+ * ever talks to Vertex, so remove these keys (only affects this process).
+ */
+export function clearGeminiApiKeyEnv() {
+    for (const key of ["GOOGLE_API_KEY", "GOOGLE_AI_API_KEY", "GEMINI_API_KEY"]) {
+        if (process.env[key]) {
+            delete process.env[key];
+            logger.info(`[RealtimeVoiceAgent] cleared ${key} so genai uses Vertex/ADC auth (not API key) for the Live WS.`);
+        }
+    }
+}
+//# sourceMappingURL=vertexAuth.js.map

package/dist/lib/voice/livekit/voiceAgent.js CHANGED Viewed

@@ -192,9 +192,20 @@ export function defineVoiceAgent(config) {
     async function entry(ctx) {
         const entryStartedAt = Date.now();
         await ctx.connect();
-        logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
-        // When the user actually stopped speaking (VAD), used to measure how long
-        // the agent waited after speech before committing the turn to the LLM.
+        logger.debug("voice.agent.roomJoined", {
+            room: ctx.room.name,
+            ms: Date.now() - entryStartedAt,
+        });
+        const { RoomEvent } = await import("@livekit/rtc-node");
+        ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
+            if (ctx.room.remoteParticipants.size === 0) {
+                logger.info("voice.agent.participantLeft", {
+                    room: ctx.room.name,
+                    action: "shutdown",
+                });
+                ctx.shutdown("participant left");
+            }
+        });
         let userStoppedSpeakingAt;
         const neurolink = await config.createNeuroLink();
         const brain = createVoiceBrain({
@@ -245,11 +256,6 @@ export function defineVoiceAgent(config) {
                 final: false,
             });
         }
-        /**
-         * Lock the user bubble at turn-end and reset the buffer for the next turn.
-         * `replacesPrevious` tells the client this committed turn absorbed a prior
-         * interrupted turn, so it should remove the orphaned previous user bubble.
-         */
         function commitUserTranscript(finalText, replacesPrevious = false) {
             if (transcriptEmitter !== undefined) {
                 transcriptEmitter.emit("voice:user-transcript", {
@@ -274,7 +280,9 @@ export function defineVoiceAgent(config) {
                 pendingPrefix = "";
                 commitUserTranscript(promptText, hadPrefix);
                 if (userStoppedSpeakingAt !== undefined) {
-                    logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
+                    logger.debug("voice.agent.endpointingWaited", {
+                        ms: Date.now() - userStoppedSpeakingAt,
+                    });
                 }
                 return brainTurnStream(brain, promptText, conversationId, () => {
                     // Interrupted before producing any reply → carry this turn's text
@@ -299,7 +307,7 @@ export function defineVoiceAgent(config) {
         };
         if (eouTurnDetector !== undefined) {
             turnHandling.turnDetection = eouTurnDetector;
-            logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
+            logger.info("voice.agent.eouEnabled", { language: "english" });
         }
         else if (config.turn?.mode) {
             turnHandling.turnDetection = config.turn.mode;
@@ -319,20 +327,11 @@ export function defineVoiceAgent(config) {
             tts,
             llm: new PlaceholderLLM(),
             turnHandling,
-            // Do NOT speculatively call the LLM on preflight transcripts before the
-            // turn ends — with NeuroLink as the brain each call is a real LLM request,
-            // and it makes the agent feel like it responds while you're still talking.
             preemptiveGeneration: false,
         });
         const agent = new NeuroLinkVoiceAgent({
             instructions: config.systemPrompt ?? "",
         });
-        // Inactivity watchdog: shut the per-call Job down after a stretch with no
-        // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
-        // runs the shutdown callbacks (disposing the bridge) and the Job process
-        // exits — freeing its RAM and the EOU model — while the browser observes a
-        // room disconnect. Reset on every interaction below. Configure via
-        // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
         const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
         const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
         let inactivityTimer;
@@ -350,7 +349,11 @@ export function defineVoiceAgent(config) {
             clearInactivityTimer();
             inactivityTimer = setTimeout(() => {
                 inactivityFired = true;
-                logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
+                logger.info("voice.agent.inactivityTimeout", {
+                    room: ctx.room.name,
+                    timeoutMs: inactivityTimeoutMs,
+                    action: "shutdown",
+                });
                 ctx.shutdown("inactivity timeout");
             }, inactivityTimeoutMs);
             // The watchdog must not, by itself, keep the event loop alive.
@@ -359,46 +362,53 @@ export function defineVoiceAgent(config) {
         ctx.addShutdownCallback(async () => {
             clearInactivityTimer();
         });
-        // Track when the user actually stops speaking (VAD) so endpointing latency
-        // can be measured, and reset the inactivity watchdog on user activity.
+        if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
+            ctx.addShutdownCallback(async () => {
+                const parentPid = process.ppid;
+                setTimeout(() => {
+                    try {
+                        if (typeof parentPid === "number" && parentPid > 1) {
+                            process.kill(parentPid, "SIGTERM");
+                        }
+                    }
+                    catch {
+                        // Parent already gone — fall through to the hard exit below.
+                    }
+                    process.exit(0);
+                }, 500).unref?.();
+            });
+        }
         session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
             noteActivity();
             if (ev.oldState === "speaking" && ev.newState !== "speaking") {
                 userStoppedSpeakingAt = Date.now();
             }
         });
-        // Reset the inactivity watchdog on any agent speech/processing and on every
-        // committed conversation item (user turn or agent reply), so the timeout
-        // only fires during a genuine lull in the conversation.
         session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
             noteActivity();
         });
         session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
             noteActivity();
         });
-        // Forward user STT transcripts to the data-channel bridge as a single
-        // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
-        // finalized SEGMENT (several per turn), so we never forward those as the
-        // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
-        // buffer and emits `final: false`. The lone `final: true` is sent from
-        // `llmNode` at the real turn boundary.
         if (transcriptEventsEnabled) {
             session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
                 emitUserTranscriptSegment(ev.transcript, ev.isFinal);
             });
         }
-        logger.info("[LiveKitVoiceAgent] Session starting", {
+        logger.info("voice.agent.sessionStarting", {
             room: ctx.room.name,
             provider,
             model,
         });
         await session.start({ agent, room: ctx.room });
-        // Start the inactivity countdown now that the session is live; every
-        // interaction handler above re-arms it.
+        if (config.greeting !== undefined && config.greeting.trim().length > 0) {
+            const greetingStream = brainTurnStream(brain, config.greeting, conversationId);
+            session.say(greetingStream, {
+                addToChatCtx: true,
+                allowInterruptions: true,
+            });
+        }
         noteActivity();
-        // Data-channel event bridge: forward NeuroLink events (text, tool calls,
-        // results, HITL prompts, status) to the browser, and accept HITL responses
-        // back. Only when enabled and the instance exposes its event emitter.
         if (config.events?.enabled === true && neurolink.getEventEmitter) {
             const bridge = await attachEventBridge({
                 room: ctx.room,

package/dist/lib/voice/livekit/voiceAgentWorker.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  * See docs/features/livekit-voice-agent.md.
  */
 import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
+export declare function installVoiceWorkerProcessGuards(metricsIntervalMs?: number): void;
 /**
  * Launch the LiveKit voice agent worker.
  *
@@ -25,3 +26,4 @@ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
  * ```
  */
 export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
+export declare function startRealtimeVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;

package/dist/lib/voice/livekit/voiceAgentWorker.js CHANGED Viewed

@@ -11,8 +11,58 @@
  * See docs/features/livekit-voice-agent.md.
  */
 import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
+import { logger } from "../../utils/logger.js";
 const DEFAULT_AGENT_NAME = "neurolink-voice";
 const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
+const IS_JOB_CHILD = process.argv.some((arg) => arg.includes("job_proc"));
+const PROC_ROLE = IS_JOB_CHILD ? "job(child)" : "worker(parent)";
+let processGuardsInstalled = false;
+export function installVoiceWorkerProcessGuards(metricsIntervalMs = Number(process.env.VOICE_METRICS_INTERVAL_MS ?? 10000)) {
+    if (processGuardsInstalled) {
+        return;
+    }
+    processGuardsInstalled = true;
+    const procInfo = {
+        role: PROC_ROLE,
+        pid: process.pid,
+        ppid: process.ppid,
+    };
+    process.on("uncaughtException", (error) => {
+        logger.error("voiceWorker.uncaughtException", {
+            ...procInfo,
+            error: error?.stack ?? String(error),
+        });
+        if (IS_JOB_CHILD) {
+            setTimeout(() => process.exit(1), 1000).unref?.();
+        }
+    });
+    process.on("unhandledRejection", (reason) => {
+        logger.error("voiceWorker.unhandledRejection", {
+            ...procInfo,
+            error: reason instanceof Error ? reason.stack : String(reason),
+        });
+    });
+    for (const signal of ["SIGTERM", "SIGINT", "SIGHUP"]) {
+        process.on(signal, () => {
+            logger.warn("voiceWorker.signal", { ...procInfo, signal });
+            setTimeout(() => process.exit(0), 1500);
+        });
+    }
+    if (Number.isFinite(metricsIntervalMs) && metricsIntervalMs > 0) {
+        const mb = (bytes) => Math.round((bytes / 1024 / 1024) * 10) / 10;
+        const timer = setInterval(() => {
+            const usage = process.memoryUsage();
+            logger.debug("voiceWorker.mem", {
+                ...procInfo,
+                rssMb: mb(usage.rss),
+                heapUsedMb: mb(usage.heapUsed),
+                heapTotalMb: mb(usage.heapTotal),
+                externalMb: mb(usage.external),
+            });
+        }, metricsIntervalMs);
+        timer.unref?.();
+    }
+}
 /**
  * Register the English EOU inference runner in the worker process.
  *
@@ -55,4 +105,18 @@ export async function startVoiceAgentWorker(options) {
         apiSecret: server.apiSecret,
     }));
 }
+export async function startRealtimeVoiceAgentWorker(options) {
+    installVoiceWorkerProcessGuards();
+    if (process.env.LIVEKIT_EOU_TURN_DETECTION) {
+        delete process.env.LIVEKIT_EOU_TURN_DETECTION;
+        logger.info("realtime.worker.eouDisabled", {
+            reason: "s2s-in-model-turn-detection",
+        });
+    }
+    if (process.argv.includes("connect")) {
+        process.env.LK_REALTIME_CONNECT_MODE = "true";
+        logger.info("realtime.worker.connectMode", { enabled: true });
+    }
+    await startVoiceAgentWorker(options);
+}
 //# sourceMappingURL=voiceAgentWorker.js.map

package/dist/neurolink.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * Enhanced AI provider system with natural MCP tool access.
  * Uses real MCP infrastructure for tool discovery and execution.
  */
-import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
+import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext, ToolRoutingServerDescriptor } from "./types/index.js";
 import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
 import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
 import { ExternalServerManager } from "./mcp/externalServerManager.js";
@@ -100,6 +100,7 @@ export declare class NeuroLink {
     conversationMemory?: ConversationMemoryManager | RedisConversationMemoryManager | null;
     private conversationMemoryNeedsInit;
     private conversationMemoryConfig?;
+    private toolRoutingConfig?;
     private enableOrchestration;
     private authProvider?;
     private pendingAuthConfig?;
@@ -803,6 +804,35 @@ export declare class NeuroLink {
      */
     private streamWithIterationFallback;
     private executeStreamRequest;
+    /**
+     * Pre-call tool routing for stream(): runs the router LLM once per turn
+     * and appends the unpicked servers' registered tool names to
+     * `options.excludeTools` — the per-call denylist enforced by
+     * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
+     * is true and a non-empty server catalog has been supplied. Never throws
+     * (the resolver fails open to an empty exclusion list).
+     */
+    private applyToolRoutingExclusions;
+    /**
+     * Loads a bounded window of prior conversation turns for the router so a
+     * follow-up turn carries the context it needs to classify intent. Reads this
+     * turn's conversation memory (keyed by `context.sessionId`) with
+     * summarization disabled to keep the router cheap. Fails open to an empty
+     * list — routing then falls back to the current query alone (prior
+     * behaviour). On the first turn of a conversation memory may not be
+     * initialised yet; that also yields an empty list, which is fine since the
+     * opening message already carries its own context.
+     */
+    private fetchRecentRoutingHistory;
+    /**
+     * Supplies (or replaces) the pre-call tool routing server catalog.
+     *
+     * For hosts that only know their tool servers after constructing NeuroLink
+     * (e.g. tools are registered per session/conversation). Routing must still
+     * be enabled via the constructor's `toolRouting.enabled` — setting servers
+     * alone does not activate it.
+     */
+    setToolRoutingServers(servers: ToolRoutingServerDescriptor[]): void;
     private validateStreamRequestOptions;
     private maybeHandleWorkflowStreamRequest;
     private runStandardStreamRequest;

package/dist/neurolink.js CHANGED Viewed

@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
 import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
 import { ContextBudgetExceededError } from "./context/errors.js";
 import { repairToolPairs } from "./context/toolPairRepair.js";
-import { SYSTEM_LIMITS } from "./core/constants.js";
+import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
 import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
+import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
 import { AIProviderFactory } from "./core/factory.js";
 import { createToolEventPayload } from "./core/toolEvents.js";
 import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
     conversationMemory;
     conversationMemoryNeedsInit = false;
     conversationMemoryConfig;
+    // Pre-call tool routing: instance-level config from the constructor.
+    // The server catalog inside it can be supplied/replaced later via
+    // setToolRoutingServers() for hosts that register tools after construction.
+    toolRoutingConfig;
     // Add orchestration property
     enableOrchestration;
     // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
         if (config?.modelChain) {
             this.fallbackConfig.modelChain = config.modelChain;
         }
+        if (config?.toolRouting) {
+            // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
+            // can't leak into the caller's config object, which may be shared across
+            // multiple NeuroLink instances.
+            this.toolRoutingConfig = { ...config.toolRouting };
+        }
         logger.setEventEmitter(this.emitter);
         // Read tool cache duration from environment variables, with a default
         const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -5622,6 +5633,16 @@ Current user's request: ${currentInput}`;
             // Make neurolink.stream the active span so every provider span (generations,
             // tool calls) parents under it — one Langfuse trace per turn, not a forest.
             const streamSpanContext = trace.setSpan(context.active(), streamSpan);
+            // Pre-call tool routing: run inside the stream-span + Langfuse context so
+            // the router's own generation span nests under this turn's trace instead
+            // of starting a separate one. Asks a cheap router LLM which tool servers
+            // the query needs and appends the unpicked servers' tools to
+            // `excludeTools`. Fails open (no exclusions). Routes on the current
+            // prompt enriched with a bounded window of recent conversation turns
+            // (pulled from conversation memory) so contextless follow-ups still
+            // classify correctly. After the workflow short-circuit, so workflow
+            // streams skip it.
+            await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
             // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
             // accumulated response into a single audio chunk at end-of-stream and
             // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5666,6 +5687,172 @@ Current user's request: ${currentInput}`;
             throw error;
         }
     }
+    /**
+     * Pre-call tool routing for stream(): runs the router LLM once per turn
+     * and appends the unpicked servers' registered tool names to
+     * `options.excludeTools` — the per-call denylist enforced by
+     * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
+     * is true and a non-empty server catalog has been supplied. Never throws
+     * (the resolver fails open to an empty exclusion list).
+     */
+    async applyToolRoutingExclusions(options, userQuery) {
+        const routingConfig = this.toolRoutingConfig;
+        if (!routingConfig?.enabled || options.disableTools) {
+            return;
+        }
+        const servers = routingConfig.servers ?? [];
+        if (servers.length === 0) {
+            return;
+        }
+        // Whole setup is fail-open: catalog building (getCustomTools /
+        // buildToolRoutingCatalog) and the router call degrade to no exclusions
+        // rather than killing the stream, honoring this method's "never throws"
+        // contract. Genuine stream cancellations still propagate.
+        try {
+            const registeredToolNames = Array.from(this.getCustomTools().keys());
+            const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
+            if (catalog.length === 0) {
+                return;
+            }
+            // Fold a bounded window of recent conversation turns into the routing query.
+            // The router runs pre-memory and would otherwise see only this turn's raw
+            // text, so a contextless follow-up ("yes please") gives it nothing to
+            // classify — it fails open and routing narrows nothing. The main model
+            // still receives full history later via conversation memory; this only
+            // enriches the router's view. Fails open to the current query alone.
+            const recentMessages = await this.fetchRecentRoutingHistory(options);
+            const routingQuery = recentMessages.length > 0
+                ? buildRoutingQueryFromHistory(recentMessages, userQuery)
+                : userQuery;
+            // The router call below re-enters the public generate(), whose finally
+            // block resets _disableToolCacheForCurrentRequest to false. That flag is
+            // stream-scoped (set at the top of this turn) and read by the main tool
+            // execution path that runs after routing, so save it before the router
+            // call and restore it afterward to keep the turn's cache setting intact.
+            const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
+            let routedExcludeTools;
+            try {
+                routedExcludeTools = await resolveToolRoutingExclusions({
+                    catalog,
+                    alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
+                    userQuery: routingQuery,
+                    routerPromptPrefix: routingConfig.routerPromptPrefix,
+                    routerModel: {
+                        provider: routingConfig.routerModel?.provider ??
+                            options.provider,
+                        model: routingConfig.routerModel?.model ?? options.model,
+                        region: routingConfig.routerModel?.region ?? options.region,
+                        temperature: routingConfig.routerModel?.temperature,
+                    },
+                    timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
+                    // Forward the stream's abort signal so a cancelled stream aborts the
+                    // router call promptly instead of waiting out the routing timeout.
+                    generateFn: (generateOptions) => this.generate({
+                        ...generateOptions,
+                        abortSignal: options.abortSignal,
+                    }),
+                });
+            }
+            finally {
+                this._disableToolCacheForCurrentRequest =
+                    cacheDisabledForCurrentRequest;
+            }
+            // Aborted during the router call — skip applying now-stale exclusions;
+            // the main generation path enforces the abort itself.
+            if (options.abortSignal?.aborted) {
+                return;
+            }
+            if (routedExcludeTools.length > 0) {
+                options.excludeTools = [
+                    ...(options.excludeTools ?? []),
+                    ...routedExcludeTools,
+                ];
+            }
+        }
+        catch (error) {
+            if (isAbortError(error)) {
+                throw error;
+            }
+            logger.warn("[ToolRouting] Routing setup failed, failing open", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+        }
+    }
+    /**
+     * Loads a bounded window of prior conversation turns for the router so a
+     * follow-up turn carries the context it needs to classify intent. Reads this
+     * turn's conversation memory (keyed by `context.sessionId`) with
+     * summarization disabled to keep the router cheap. Fails open to an empty
+     * list — routing then falls back to the current query alone (prior
+     * behaviour). On the first turn of a conversation memory may not be
+     * initialised yet; that also yields an empty list, which is fine since the
+     * opening message already carries its own context.
+     */
+    async fetchRecentRoutingHistory(options) {
+        try {
+            const requestContext = options.context;
+            // Inline multi-turn callers pass prior turns via options.conversationMessages
+            // (the same field the main model reads) rather than server-side session
+            // memory. Honor it directly so a contextless follow-up still routes with
+            // context even when no sessionId is present.
+            if (options.conversationMessages &&
+                options.conversationMessages.length > 0) {
+                return options.conversationMessages;
+            }
+            const sessionId = requestContext?.sessionId;
+            if (typeof sessionId !== "string" || !sessionId) {
+                return [];
+            }
+            // The pre-call router runs earlier in the stream pipeline than the main
+            // generation path's own memory init (initializeConversationMemoryForGeneration),
+            // so this.conversationMemory is still undefined at router time and the
+            // router would only ever see the current turn. Trigger the same lazy init
+            // the main path uses — it is idempotent, so the later call is a no-op —
+            // so the router can read prior turns. Fails open via the surrounding catch.
+            await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
+            const memory = this.conversationMemory;
+            if (!memory) {
+                return [];
+            }
+            // Reuse the SAME reader the main model uses so the router sees identically
+            // curated history: polluted turns dropped, read instrumented under the
+            // neurolink.conversation.getMessages span. enableSummarization=false keeps
+            // routing cheap and free of any summary-LLM side effect. The remaining
+            // tool_call/tool_result turns are dropped at transcript-render time
+            // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
+            const messages = await getConversationMessages(memory, {
+                ...options,
+                enableSummarization: false,
+            });
+            logger.debug("[ToolRouting] Loaded conversation history for router", {
+                sessionId,
+                messageCount: messages.length,
+            });
+            return messages;
+        }
+        catch (error) {
+            logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+            return [];
+        }
+    }
+    /**
+     * Supplies (or replaces) the pre-call tool routing server catalog.
+     *
+     * For hosts that only know their tool servers after constructing NeuroLink
+     * (e.g. tools are registered per session/conversation). Routing must still
+     * be enabled via the constructor's `toolRouting.enabled` — setting servers
+     * alone does not activate it.
+     */
+    setToolRoutingServers(servers) {
+        if (!this.toolRoutingConfig) {
+            logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
+            this.toolRoutingConfig = { enabled: false, servers };
+            return;
+        }
+        this.toolRoutingConfig.servers = servers;
+    }
     async validateStreamRequestOptions(options, startTime) {
         await this.validateStreamInput(options);
         this.enforceSessionBudget(options.maxBudgetUsd);

package/dist/telemetry/attributes.js CHANGED Viewed

@@ -156,7 +156,9 @@ export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
         serialized = String(value);
     }
     if (serialized.length > maxChars) {
-        return `${serialized.slice(0, maxChars)}...[truncated ${serialized.length - maxChars} chars]`;
+        const truncationSuffix = `...[truncated ${serialized.length - maxChars} chars]`;
+        const keepLength = Math.max(0, maxChars - truncationSuffix.length);
+        return `${serialized.slice(0, keepLength)}${truncationSuffix}`;
     }
     return serialized;
 }