@juspay/neurolink 9.71.0 → 9.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +330 -312
  3. package/dist/core/constants.d.ts +1 -0
  4. package/dist/core/constants.js +2 -0
  5. package/dist/core/toolRouting.d.ts +59 -0
  6. package/dist/core/toolRouting.js +232 -0
  7. package/dist/lib/core/constants.d.ts +1 -0
  8. package/dist/lib/core/constants.js +2 -0
  9. package/dist/lib/core/toolRouting.d.ts +59 -0
  10. package/dist/lib/core/toolRouting.js +233 -0
  11. package/dist/lib/neurolink.d.ts +31 -1
  12. package/dist/lib/neurolink.js +188 -1
  13. package/dist/lib/telemetry/attributes.js +3 -1
  14. package/dist/lib/types/config.d.ts +8 -0
  15. package/dist/lib/types/index.d.ts +1 -0
  16. package/dist/lib/types/index.js +1 -0
  17. package/dist/lib/types/livekit.d.ts +134 -0
  18. package/dist/lib/types/toolRouting.d.ts +91 -0
  19. package/dist/lib/types/toolRouting.js +19 -0
  20. package/dist/lib/voice/livekit/brain.js +1 -1
  21. package/dist/lib/voice/livekit/config.d.ts +12 -1
  22. package/dist/lib/voice/livekit/config.js +54 -0
  23. package/dist/lib/voice/livekit/eventBridge.js +4 -4
  24. package/dist/lib/voice/livekit/index.d.ts +9 -2
  25. package/dist/lib/voice/livekit/index.js +9 -2
  26. package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
  27. package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
  28. package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
  29. package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
  30. package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
  31. package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
  32. package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
  33. package/dist/lib/voice/livekit/roomContext.js +57 -0
  34. package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
  35. package/dist/lib/voice/livekit/roomDispatch.js +31 -0
  36. package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
  37. package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
  38. package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
  39. package/dist/lib/voice/livekit/vertexAuth.js +73 -0
  40. package/dist/lib/voice/livekit/voiceAgent.js +47 -37
  41. package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
  42. package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
  43. package/dist/neurolink.d.ts +31 -1
  44. package/dist/neurolink.js +188 -1
  45. package/dist/telemetry/attributes.js +3 -1
  46. package/dist/types/config.d.ts +8 -0
  47. package/dist/types/index.d.ts +1 -0
  48. package/dist/types/index.js +1 -0
  49. package/dist/types/livekit.d.ts +134 -0
  50. package/dist/types/toolRouting.d.ts +91 -0
  51. package/dist/types/toolRouting.js +18 -0
  52. package/dist/voice/livekit/brain.js +1 -1
  53. package/dist/voice/livekit/config.d.ts +12 -1
  54. package/dist/voice/livekit/config.js +54 -0
  55. package/dist/voice/livekit/eventBridge.js +4 -4
  56. package/dist/voice/livekit/index.d.ts +9 -2
  57. package/dist/voice/livekit/index.js +9 -2
  58. package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
  59. package/dist/voice/livekit/realtimeEventBridge.js +160 -0
  60. package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
  61. package/dist/voice/livekit/realtimeMcpTools.js +193 -0
  62. package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
  63. package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
  64. package/dist/voice/livekit/roomContext.d.ts +23 -0
  65. package/dist/voice/livekit/roomContext.js +56 -0
  66. package/dist/voice/livekit/roomDispatch.d.ts +24 -0
  67. package/dist/voice/livekit/roomDispatch.js +30 -0
  68. package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
  69. package/dist/voice/livekit/schemaSanitizer.js +143 -0
  70. package/dist/voice/livekit/vertexAuth.d.ts +30 -0
  71. package/dist/voice/livekit/vertexAuth.js +72 -0
  72. package/dist/voice/livekit/voiceAgent.js +47 -37
  73. package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
  74. package/dist/voice/livekit/voiceAgentWorker.js +64 -0
  75. package/package.json +2 -1
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Vertex authentication helpers for the realtime voice agent.
3
+ *
4
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
5
+ * Credentials (ADC). These helpers materialise ADC from the split
6
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
7
+ * any Gemini Developer API key from the environment so `@google/genai` uses
8
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
9
+ *
10
+ * See docs/features/livekit-voice-agent.md.
11
+ */
12
+ /**
13
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
14
+ *
15
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
16
+ * inline credentials), so this writes a temp service-account JSON and points
17
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
18
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
19
+ * fields are absent (auth then relies on ambient ADC).
20
+ */
21
+ export declare function ensureVertexAdc(): void;
22
+ /**
23
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
24
+ *
25
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
26
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
27
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
28
+ * ever talks to Vertex, so remove these keys (only affects this process).
29
+ */
30
+ export declare function clearGeminiApiKeyEnv(): void;
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Vertex authentication helpers for the realtime voice agent.
3
+ *
4
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
5
+ * Credentials (ADC). These helpers materialise ADC from the split
6
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
7
+ * any Gemini Developer API key from the environment so `@google/genai` uses
8
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
9
+ *
10
+ * See docs/features/livekit-voice-agent.md.
11
+ */
12
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
13
+ import os from "node:os";
14
+ import path from "node:path";
15
+ import { logger } from "../../utils/logger.js";
16
+ /**
17
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
18
+ *
19
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
20
+ * inline credentials), so this writes a temp service-account JSON and points
21
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
22
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
23
+ * fields are absent (auth then relies on ambient ADC).
24
+ */
25
+ export function ensureVertexAdc() {
26
+ if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
27
+ return;
28
+ }
29
+ const clientEmail = process.env.GOOGLE_AUTH_CLIENT_EMAIL;
30
+ const rawPrivateKey = process.env.GOOGLE_AUTH_PRIVATE_KEY;
31
+ if (!clientEmail || !rawPrivateKey) {
32
+ logger.warn("[RealtimeVoiceAgent] No GOOGLE_APPLICATION_CREDENTIALS and no GOOGLE_AUTH_* fields — Vertex auth will rely on ambient ADC.");
33
+ return;
34
+ }
35
+ const credentials = {
36
+ type: process.env.GOOGLE_AUTH_TYPE ?? "service_account",
37
+ project_id: process.env.GOOGLE_AUTH_BREEZE_PROJECT_ID ??
38
+ process.env.GOOGLE_CLOUD_PROJECT_ID,
39
+ private_key_id: process.env.GOOGLE_AUTH_PRIVATE_KEY_ID,
40
+ private_key: rawPrivateKey.replace(/\\n/g, "\n"),
41
+ client_email: clientEmail,
42
+ token_uri: process.env.GOOGLE_AUTH_TOKEN_URI ??
43
+ "https://oauth2.googleapis.com/token",
44
+ };
45
+ const credentialsDir = mkdtempSync(path.join(os.tmpdir(), "vertex-adc-"));
46
+ const credentialsPath = path.join(credentialsDir, "adc.json");
47
+ writeFileSync(credentialsPath, JSON.stringify(credentials), {
48
+ mode: 0o600,
49
+ flag: "wx",
50
+ });
51
+ process.on("exit", () => {
52
+ rmSync(credentialsDir, { recursive: true, force: true });
53
+ });
54
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
55
+ logger.info(`[RealtimeVoiceAgent] Vertex ADC written to ${credentialsPath} (project ${credentials.project_id}).`);
56
+ }
57
+ /**
58
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
59
+ *
60
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
61
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
62
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
63
+ * ever talks to Vertex, so remove these keys (only affects this process).
64
+ */
65
+ export function clearGeminiApiKeyEnv() {
66
+ for (const key of ["GOOGLE_API_KEY", "GOOGLE_AI_API_KEY", "GEMINI_API_KEY"]) {
67
+ if (process.env[key]) {
68
+ delete process.env[key];
69
+ logger.info(`[RealtimeVoiceAgent] cleared ${key} so genai uses Vertex/ADC auth (not API key) for the Live WS.`);
70
+ }
71
+ }
72
+ }
73
+ //# sourceMappingURL=vertexAuth.js.map
@@ -192,9 +192,20 @@ export function defineVoiceAgent(config) {
192
192
  async function entry(ctx) {
193
193
  const entryStartedAt = Date.now();
194
194
  await ctx.connect();
195
- logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
196
- // When the user actually stopped speaking (VAD), used to measure how long
197
- // the agent waited after speech before committing the turn to the LLM.
195
+ logger.debug("voice.agent.roomJoined", {
196
+ room: ctx.room.name,
197
+ ms: Date.now() - entryStartedAt,
198
+ });
199
+ const { RoomEvent } = await import("@livekit/rtc-node");
200
+ ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
201
+ if (ctx.room.remoteParticipants.size === 0) {
202
+ logger.info("voice.agent.participantLeft", {
203
+ room: ctx.room.name,
204
+ action: "shutdown",
205
+ });
206
+ ctx.shutdown("participant left");
207
+ }
208
+ });
198
209
  let userStoppedSpeakingAt;
199
210
  const neurolink = await config.createNeuroLink();
200
211
  const brain = createVoiceBrain({
@@ -245,11 +256,6 @@ export function defineVoiceAgent(config) {
245
256
  final: false,
246
257
  });
247
258
  }
248
- /**
249
- * Lock the user bubble at turn-end and reset the buffer for the next turn.
250
- * `replacesPrevious` tells the client this committed turn absorbed a prior
251
- * interrupted turn, so it should remove the orphaned previous user bubble.
252
- */
253
259
  function commitUserTranscript(finalText, replacesPrevious = false) {
254
260
  if (transcriptEmitter !== undefined) {
255
261
  transcriptEmitter.emit("voice:user-transcript", {
@@ -274,7 +280,9 @@ export function defineVoiceAgent(config) {
274
280
  pendingPrefix = "";
275
281
  commitUserTranscript(promptText, hadPrefix);
276
282
  if (userStoppedSpeakingAt !== undefined) {
277
- logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
283
+ logger.debug("voice.agent.endpointingWaited", {
284
+ ms: Date.now() - userStoppedSpeakingAt,
285
+ });
278
286
  }
279
287
  return brainTurnStream(brain, promptText, conversationId, () => {
280
288
  // Interrupted before producing any reply → carry this turn's text
@@ -299,7 +307,7 @@ export function defineVoiceAgent(config) {
299
307
  };
300
308
  if (eouTurnDetector !== undefined) {
301
309
  turnHandling.turnDetection = eouTurnDetector;
302
- logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
310
+ logger.info("voice.agent.eouEnabled", { language: "english" });
303
311
  }
304
312
  else if (config.turn?.mode) {
305
313
  turnHandling.turnDetection = config.turn.mode;
@@ -319,20 +327,11 @@ export function defineVoiceAgent(config) {
319
327
  tts,
320
328
  llm: new PlaceholderLLM(),
321
329
  turnHandling,
322
- // Do NOT speculatively call the LLM on preflight transcripts before the
323
- // turn ends — with NeuroLink as the brain each call is a real LLM request,
324
- // and it makes the agent feel like it responds while you're still talking.
325
330
  preemptiveGeneration: false,
326
331
  });
327
332
  const agent = new NeuroLinkVoiceAgent({
328
333
  instructions: config.systemPrompt ?? "",
329
334
  });
330
- // Inactivity watchdog: shut the per-call Job down after a stretch with no
331
- // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
332
- // runs the shutdown callbacks (disposing the bridge) and the Job process
333
- // exits — freeing its RAM and the EOU model — while the browser observes a
334
- // room disconnect. Reset on every interaction below. Configure via
335
- // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
336
335
  const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
337
336
  const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
338
337
  let inactivityTimer;
@@ -350,7 +349,11 @@ export function defineVoiceAgent(config) {
350
349
  clearInactivityTimer();
351
350
  inactivityTimer = setTimeout(() => {
352
351
  inactivityFired = true;
353
- logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
352
+ logger.info("voice.agent.inactivityTimeout", {
353
+ room: ctx.room.name,
354
+ timeoutMs: inactivityTimeoutMs,
355
+ action: "shutdown",
356
+ });
354
357
  ctx.shutdown("inactivity timeout");
355
358
  }, inactivityTimeoutMs);
356
359
  // The watchdog must not, by itself, keep the event loop alive.
@@ -359,46 +362,53 @@ export function defineVoiceAgent(config) {
359
362
  ctx.addShutdownCallback(async () => {
360
363
  clearInactivityTimer();
361
364
  });
362
- // Track when the user actually stops speaking (VAD) so endpointing latency
363
- // can be measured, and reset the inactivity watchdog on user activity.
365
+ if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
366
+ ctx.addShutdownCallback(async () => {
367
+ const parentPid = process.ppid;
368
+ setTimeout(() => {
369
+ try {
370
+ if (typeof parentPid === "number" && parentPid > 1) {
371
+ process.kill(parentPid, "SIGTERM");
372
+ }
373
+ }
374
+ catch {
375
+ // Parent already gone — fall through to the hard exit below.
376
+ }
377
+ process.exit(0);
378
+ }, 500).unref?.();
379
+ });
380
+ }
364
381
  session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
365
382
  noteActivity();
366
383
  if (ev.oldState === "speaking" && ev.newState !== "speaking") {
367
384
  userStoppedSpeakingAt = Date.now();
368
385
  }
369
386
  });
370
- // Reset the inactivity watchdog on any agent speech/processing and on every
371
- // committed conversation item (user turn or agent reply), so the timeout
372
- // only fires during a genuine lull in the conversation.
373
387
  session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
374
388
  noteActivity();
375
389
  });
376
390
  session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
377
391
  noteActivity();
378
392
  });
379
- // Forward user STT transcripts to the data-channel bridge as a single
380
- // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
381
- // finalized SEGMENT (several per turn), so we never forward those as the
382
- // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
383
- // buffer and emits `final: false`. The lone `final: true` is sent from
384
- // `llmNode` at the real turn boundary.
385
393
  if (transcriptEventsEnabled) {
386
394
  session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
387
395
  emitUserTranscriptSegment(ev.transcript, ev.isFinal);
388
396
  });
389
397
  }
390
- logger.info("[LiveKitVoiceAgent] Session starting", {
398
+ logger.info("voice.agent.sessionStarting", {
391
399
  room: ctx.room.name,
392
400
  provider,
393
401
  model,
394
402
  });
395
403
  await session.start({ agent, room: ctx.room });
396
- // Start the inactivity countdown now that the session is live; every
397
- // interaction handler above re-arms it.
404
+ if (config.greeting !== undefined && config.greeting.trim().length > 0) {
405
+ const greetingStream = brainTurnStream(brain, config.greeting, conversationId);
406
+ session.say(greetingStream, {
407
+ addToChatCtx: true,
408
+ allowInterruptions: true,
409
+ });
410
+ }
398
411
  noteActivity();
399
- // Data-channel event bridge: forward NeuroLink events (text, tool calls,
400
- // results, HITL prompts, status) to the browser, and accept HITL responses
401
- // back. Only when enabled and the instance exposes its event emitter.
402
412
  if (config.events?.enabled === true && neurolink.getEventEmitter) {
403
413
  const bridge = await attachEventBridge({
404
414
  room: ctx.room,
@@ -11,6 +11,7 @@
11
11
  * See docs/features/livekit-voice-agent.md.
12
12
  */
13
13
  import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
14
+ export declare function installVoiceWorkerProcessGuards(metricsIntervalMs?: number): void;
14
15
  /**
15
16
  * Launch the LiveKit voice agent worker.
16
17
  *
@@ -25,3 +26,4 @@ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
25
26
  * ```
26
27
  */
27
28
  export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
29
+ export declare function startRealtimeVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
@@ -11,8 +11,58 @@
11
11
  * See docs/features/livekit-voice-agent.md.
12
12
  */
13
13
  import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
14
+ import { logger } from "../../utils/logger.js";
14
15
  const DEFAULT_AGENT_NAME = "neurolink-voice";
15
16
  const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
17
+ const IS_JOB_CHILD = process.argv.some((arg) => arg.includes("job_proc"));
18
+ const PROC_ROLE = IS_JOB_CHILD ? "job(child)" : "worker(parent)";
19
+ let processGuardsInstalled = false;
20
+ export function installVoiceWorkerProcessGuards(metricsIntervalMs = Number(process.env.VOICE_METRICS_INTERVAL_MS ?? 10000)) {
21
+ if (processGuardsInstalled) {
22
+ return;
23
+ }
24
+ processGuardsInstalled = true;
25
+ const procInfo = {
26
+ role: PROC_ROLE,
27
+ pid: process.pid,
28
+ ppid: process.ppid,
29
+ };
30
+ process.on("uncaughtException", (error) => {
31
+ logger.error("voiceWorker.uncaughtException", {
32
+ ...procInfo,
33
+ error: error?.stack ?? String(error),
34
+ });
35
+ if (IS_JOB_CHILD) {
36
+ setTimeout(() => process.exit(1), 1000).unref?.();
37
+ }
38
+ });
39
+ process.on("unhandledRejection", (reason) => {
40
+ logger.error("voiceWorker.unhandledRejection", {
41
+ ...procInfo,
42
+ error: reason instanceof Error ? reason.stack : String(reason),
43
+ });
44
+ });
45
+ for (const signal of ["SIGTERM", "SIGINT", "SIGHUP"]) {
46
+ process.on(signal, () => {
47
+ logger.warn("voiceWorker.signal", { ...procInfo, signal });
48
+ setTimeout(() => process.exit(0), 1500);
49
+ });
50
+ }
51
+ if (Number.isFinite(metricsIntervalMs) && metricsIntervalMs > 0) {
52
+ const mb = (bytes) => Math.round((bytes / 1024 / 1024) * 10) / 10;
53
+ const timer = setInterval(() => {
54
+ const usage = process.memoryUsage();
55
+ logger.debug("voiceWorker.mem", {
56
+ ...procInfo,
57
+ rssMb: mb(usage.rss),
58
+ heapUsedMb: mb(usage.heapUsed),
59
+ heapTotalMb: mb(usage.heapTotal),
60
+ externalMb: mb(usage.external),
61
+ });
62
+ }, metricsIntervalMs);
63
+ timer.unref?.();
64
+ }
65
+ }
16
66
  /**
17
67
  * Register the English EOU inference runner in the worker process.
18
68
  *
@@ -55,4 +105,18 @@ export async function startVoiceAgentWorker(options) {
55
105
  apiSecret: server.apiSecret,
56
106
  }));
57
107
  }
108
+ export async function startRealtimeVoiceAgentWorker(options) {
109
+ installVoiceWorkerProcessGuards();
110
+ if (process.env.LIVEKIT_EOU_TURN_DETECTION) {
111
+ delete process.env.LIVEKIT_EOU_TURN_DETECTION;
112
+ logger.info("realtime.worker.eouDisabled", {
113
+ reason: "s2s-in-model-turn-detection",
114
+ });
115
+ }
116
+ if (process.argv.includes("connect")) {
117
+ process.env.LK_REALTIME_CONNECT_MODE = "true";
118
+ logger.info("realtime.worker.connectMode", { enabled: true });
119
+ }
120
+ await startVoiceAgentWorker(options);
121
+ }
58
122
  //# sourceMappingURL=voiceAgentWorker.js.map
@@ -5,7 +5,7 @@
5
5
  * Enhanced AI provider system with natural MCP tool access.
6
6
  * Uses real MCP infrastructure for tool discovery and execution.
7
7
  */
8
- import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext } from "./types/index.js";
8
+ import type { CompactionConfig, CompactionResult, SpanData, ObservabilityConfig, MetricsSummary, MCPToolAnnotations, TraceView, AuthenticatedContext, AuthProvider, JsonObject, NeuroLinkEvents, TypedEventEmitter, MCPEnhancementsConfig, NeuroLinkAuthConfig, NeurolinkConstructorConfig, ChatMessage, ExternalMCPOperationResult, ExternalMCPServerInstance, ExternalMCPToolInfo, GenerateOptions, GenerateResult, ProviderStatus, TextGenerationOptions, TextGenerationResult, MCPExecutableTool, MCPServerInfo, MCPStatus, StreamOptions, StreamResult, ToolExecutionContext, ToolExecutionSummary, ToolInfo, ToolRegistrationOptions, BatchOperationResult, StreamGenerationEndContext, ToolRoutingServerDescriptor } from "./types/index.js";
9
9
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
10
10
  import type { RedisConversationMemoryManager } from "./core/redisConversationMemoryManager.js";
11
11
  import { ExternalServerManager } from "./mcp/externalServerManager.js";
@@ -100,6 +100,7 @@ export declare class NeuroLink {
100
100
  conversationMemory?: ConversationMemoryManager | RedisConversationMemoryManager | null;
101
101
  private conversationMemoryNeedsInit;
102
102
  private conversationMemoryConfig?;
103
+ private toolRoutingConfig?;
103
104
  private enableOrchestration;
104
105
  private authProvider?;
105
106
  private pendingAuthConfig?;
@@ -803,6 +804,35 @@ export declare class NeuroLink {
803
804
  */
804
805
  private streamWithIterationFallback;
805
806
  private executeStreamRequest;
807
+ /**
808
+ * Pre-call tool routing for stream(): runs the router LLM once per turn
809
+ * and appends the unpicked servers' registered tool names to
810
+ * `options.excludeTools` — the per-call denylist enforced by
811
+ * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
812
+ * is true and a non-empty server catalog has been supplied. Never throws
813
+ * (the resolver fails open to an empty exclusion list).
814
+ */
815
+ private applyToolRoutingExclusions;
816
+ /**
817
+ * Loads a bounded window of prior conversation turns for the router so a
818
+ * follow-up turn carries the context it needs to classify intent. Reads this
819
+ * turn's conversation memory (keyed by `context.sessionId`) with
820
+ * summarization disabled to keep the router cheap. Fails open to an empty
821
+ * list — routing then falls back to the current query alone (prior
822
+ * behaviour). On the first turn of a conversation memory may not be
823
+ * initialised yet; that also yields an empty list, which is fine since the
824
+ * opening message already carries its own context.
825
+ */
826
+ private fetchRecentRoutingHistory;
827
+ /**
828
+ * Supplies (or replaces) the pre-call tool routing server catalog.
829
+ *
830
+ * For hosts that only know their tool servers after constructing NeuroLink
831
+ * (e.g. tools are registered per session/conversation). Routing must still
832
+ * be enabled via the constructor's `toolRouting.enabled` — setting servers
833
+ * alone does not activate it.
834
+ */
835
+ setToolRoutingServers(servers: ToolRoutingServerDescriptor[]): void;
806
836
  private validateStreamRequestOptions;
807
837
  private maybeHandleWorkflowStreamRequest;
808
838
  private runStandardStreamRequest;
package/dist/neurolink.js CHANGED
@@ -28,8 +28,9 @@ import { emergencyContentTruncation } from "./context/emergencyTruncation.js";
28
28
  import { getContextOverflowProvider, isContextOverflowError, parseProviderOverflowDetails, } from "./context/errorDetection.js";
29
29
  import { ContextBudgetExceededError } from "./context/errors.js";
30
30
  import { repairToolPairs } from "./context/toolPairRepair.js";
31
- import { SYSTEM_LIMITS } from "./core/constants.js";
31
+ import { SYSTEM_LIMITS, DEFAULT_TOOL_ROUTING_TIMEOUT_MS, } from "./core/constants.js";
32
32
  import { ConversationMemoryManager } from "./core/conversationMemoryManager.js";
33
+ import { buildToolRoutingCatalog, buildRoutingQueryFromHistory, resolveToolRoutingExclusions, } from "./core/toolRouting.js";
33
34
  import { AIProviderFactory } from "./core/factory.js";
34
35
  import { createToolEventPayload } from "./core/toolEvents.js";
35
36
  import { ProviderRegistry } from "./factories/providerRegistry.js";
@@ -436,6 +437,10 @@ export class NeuroLink {
436
437
  conversationMemory;
437
438
  conversationMemoryNeedsInit = false;
438
439
  conversationMemoryConfig;
440
+ // Pre-call tool routing: instance-level config from the constructor.
441
+ // The server catalog inside it can be supplied/replaced later via
442
+ // setToolRoutingServers() for hosts that register tools after construction.
443
+ toolRoutingConfig;
439
444
  // Add orchestration property
440
445
  enableOrchestration;
441
446
  // Authentication provider for secure access control
@@ -842,6 +847,12 @@ export class NeuroLink {
842
847
  if (config?.modelChain) {
843
848
  this.fallbackConfig.modelChain = config.modelChain;
844
849
  }
850
+ if (config?.toolRouting) {
851
+ // Shallow-clone so setToolRoutingServers() mutating this.toolRoutingConfig
852
+ // can't leak into the caller's config object, which may be shared across
853
+ // multiple NeuroLink instances.
854
+ this.toolRoutingConfig = { ...config.toolRouting };
855
+ }
845
856
  logger.setEventEmitter(this.emitter);
846
857
  // Read tool cache duration from environment variables, with a default
847
858
  const cacheDurationEnv = process.env.NEUROLINK_TOOL_CACHE_DURATION;
@@ -5622,6 +5633,16 @@ Current user's request: ${currentInput}`;
5622
5633
  // Make neurolink.stream the active span so every provider span (generations,
5623
5634
  // tool calls) parents under it — one Langfuse trace per turn, not a forest.
5624
5635
  const streamSpanContext = trace.setSpan(context.active(), streamSpan);
5636
+ // Pre-call tool routing: run inside the stream-span + Langfuse context so
5637
+ // the router's own generation span nests under this turn's trace instead
5638
+ // of starting a separate one. Asks a cheap router LLM which tool servers
5639
+ // the query needs and appends the unpicked servers' tools to
5640
+ // `excludeTools`. Fails open (no exclusions). Routes on the current
5641
+ // prompt enriched with a bounded window of recent conversation turns
5642
+ // (pulled from conversation memory) so contextless follow-ups still
5643
+ // classify correctly. After the workflow short-circuit, so workflow
5644
+ // streams skip it.
5645
+ await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.applyToolRoutingExclusions(options, originalPrompt)));
5625
5646
  // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
5626
5647
  // accumulated response into a single audio chunk at end-of-stream and
5627
5648
  // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5666,6 +5687,172 @@ Current user's request: ${currentInput}`;
5666
5687
  throw error;
5667
5688
  }
5668
5689
  }
5690
+ /**
5691
+ * Pre-call tool routing for stream(): runs the router LLM once per turn
5692
+ * and appends the unpicked servers' registered tool names to
5693
+ * `options.excludeTools` — the per-call denylist enforced by
5694
+ * `baseProvider.applyToolFiltering`. No-op unless `toolRouting.enabled`
5695
+ * is true and a non-empty server catalog has been supplied. Never throws
5696
+ * (the resolver fails open to an empty exclusion list).
5697
+ */
5698
+ async applyToolRoutingExclusions(options, userQuery) {
5699
+ const routingConfig = this.toolRoutingConfig;
5700
+ if (!routingConfig?.enabled || options.disableTools) {
5701
+ return;
5702
+ }
5703
+ const servers = routingConfig.servers ?? [];
5704
+ if (servers.length === 0) {
5705
+ return;
5706
+ }
5707
+ // Whole setup is fail-open: catalog building (getCustomTools /
5708
+ // buildToolRoutingCatalog) and the router call degrade to no exclusions
5709
+ // rather than killing the stream, honoring this method's "never throws"
5710
+ // contract. Genuine stream cancellations still propagate.
5711
+ try {
5712
+ const registeredToolNames = Array.from(this.getCustomTools().keys());
5713
+ const catalog = buildToolRoutingCatalog(servers, registeredToolNames);
5714
+ if (catalog.length === 0) {
5715
+ return;
5716
+ }
5717
+ // Fold a bounded window of recent conversation turns into the routing query.
5718
+ // The router runs pre-memory and would otherwise see only this turn's raw
5719
+ // text, so a contextless follow-up ("yes please") gives it nothing to
5720
+ // classify — it fails open and routing narrows nothing. The main model
5721
+ // still receives full history later via conversation memory; this only
5722
+ // enriches the router's view. Fails open to the current query alone.
5723
+ const recentMessages = await this.fetchRecentRoutingHistory(options);
5724
+ const routingQuery = recentMessages.length > 0
5725
+ ? buildRoutingQueryFromHistory(recentMessages, userQuery)
5726
+ : userQuery;
5727
+ // The router call below re-enters the public generate(), whose finally
5728
+ // block resets _disableToolCacheForCurrentRequest to false. That flag is
5729
+ // stream-scoped (set at the top of this turn) and read by the main tool
5730
+ // execution path that runs after routing, so save it before the router
5731
+ // call and restore it afterward to keep the turn's cache setting intact.
5732
+ const cacheDisabledForCurrentRequest = this._disableToolCacheForCurrentRequest;
5733
+ let routedExcludeTools;
5734
+ try {
5735
+ routedExcludeTools = await resolveToolRoutingExclusions({
5736
+ catalog,
5737
+ alwaysIncludeServerIds: routingConfig.alwaysIncludeServerIds ?? [],
5738
+ userQuery: routingQuery,
5739
+ routerPromptPrefix: routingConfig.routerPromptPrefix,
5740
+ routerModel: {
5741
+ provider: routingConfig.routerModel?.provider ??
5742
+ options.provider,
5743
+ model: routingConfig.routerModel?.model ?? options.model,
5744
+ region: routingConfig.routerModel?.region ?? options.region,
5745
+ temperature: routingConfig.routerModel?.temperature,
5746
+ },
5747
+ timeoutMs: routingConfig.timeoutMs ?? DEFAULT_TOOL_ROUTING_TIMEOUT_MS,
5748
+ // Forward the stream's abort signal so a cancelled stream aborts the
5749
+ // router call promptly instead of waiting out the routing timeout.
5750
+ generateFn: (generateOptions) => this.generate({
5751
+ ...generateOptions,
5752
+ abortSignal: options.abortSignal,
5753
+ }),
5754
+ });
5755
+ }
5756
+ finally {
5757
+ this._disableToolCacheForCurrentRequest =
5758
+ cacheDisabledForCurrentRequest;
5759
+ }
5760
+ // Aborted during the router call — skip applying now-stale exclusions;
5761
+ // the main generation path enforces the abort itself.
5762
+ if (options.abortSignal?.aborted) {
5763
+ return;
5764
+ }
5765
+ if (routedExcludeTools.length > 0) {
5766
+ options.excludeTools = [
5767
+ ...(options.excludeTools ?? []),
5768
+ ...routedExcludeTools,
5769
+ ];
5770
+ }
5771
+ }
5772
+ catch (error) {
5773
+ if (isAbortError(error)) {
5774
+ throw error;
5775
+ }
5776
+ logger.warn("[ToolRouting] Routing setup failed, failing open", {
5777
+ error: error instanceof Error ? error.message : String(error),
5778
+ });
5779
+ }
5780
+ }
5781
+ /**
5782
+ * Loads a bounded window of prior conversation turns for the router so a
5783
+ * follow-up turn carries the context it needs to classify intent. Reads this
5784
+ * turn's conversation memory (keyed by `context.sessionId`) with
5785
+ * summarization disabled to keep the router cheap. Fails open to an empty
5786
+ * list — routing then falls back to the current query alone (prior
5787
+ * behaviour). On the first turn of a conversation memory may not be
5788
+ * initialised yet; that also yields an empty list, which is fine since the
5789
+ * opening message already carries its own context.
5790
+ */
5791
+ async fetchRecentRoutingHistory(options) {
5792
+ try {
5793
+ const requestContext = options.context;
5794
+ // Inline multi-turn callers pass prior turns via options.conversationMessages
5795
+ // (the same field the main model reads) rather than server-side session
5796
+ // memory. Honor it directly so a contextless follow-up still routes with
5797
+ // context even when no sessionId is present.
5798
+ if (options.conversationMessages &&
5799
+ options.conversationMessages.length > 0) {
5800
+ return options.conversationMessages;
5801
+ }
5802
+ const sessionId = requestContext?.sessionId;
5803
+ if (typeof sessionId !== "string" || !sessionId) {
5804
+ return [];
5805
+ }
5806
+ // The pre-call router runs earlier in the stream pipeline than the main
5807
+ // generation path's own memory init (initializeConversationMemoryForGeneration),
5808
+ // so this.conversationMemory is still undefined at router time and the
5809
+ // router would only ever see the current turn. Trigger the same lazy init
5810
+ // the main path uses — it is idempotent, so the later call is a no-op —
5811
+ // so the router can read prior turns. Fails open via the surrounding catch.
5812
+ await this.initializeConversationMemoryForGeneration(`tool-routing-${Date.now()}`, Date.now(), process.hrtime.bigint());
5813
+ const memory = this.conversationMemory;
5814
+ if (!memory) {
5815
+ return [];
5816
+ }
5817
+ // Reuse the SAME reader the main model uses so the router sees identically
5818
+ // curated history: polluted turns dropped, read instrumented under the
5819
+ // neurolink.conversation.getMessages span. enableSummarization=false keeps
5820
+ // routing cheap and free of any summary-LLM side effect. The remaining
5821
+ // tool_call/tool_result turns are dropped at transcript-render time
5822
+ // (buildRoutingQueryFromHistory) to mirror what the main model is sent.
5823
+ const messages = await getConversationMessages(memory, {
5824
+ ...options,
5825
+ enableSummarization: false,
5826
+ });
5827
+ logger.debug("[ToolRouting] Loaded conversation history for router", {
5828
+ sessionId,
5829
+ messageCount: messages.length,
5830
+ });
5831
+ return messages;
5832
+ }
5833
+ catch (error) {
5834
+ logger.debug("[ToolRouting] Failed to load conversation history; routing on current query only", {
5835
+ error: error instanceof Error ? error.message : String(error),
5836
+ });
5837
+ return [];
5838
+ }
5839
+ }
5840
+ /**
5841
+ * Supplies (or replaces) the pre-call tool routing server catalog.
5842
+ *
5843
+ * For hosts that only know their tool servers after constructing NeuroLink
5844
+ * (e.g. tools are registered per session/conversation). Routing must still
5845
+ * be enabled via the constructor's `toolRouting.enabled` — setting servers
5846
+ * alone does not activate it.
5847
+ */
5848
+ setToolRoutingServers(servers) {
5849
+ if (!this.toolRoutingConfig) {
5850
+ logger.warn("[ToolRouting] setToolRoutingServers called without toolRouting constructor config — servers stored but routing stays disabled");
5851
+ this.toolRoutingConfig = { enabled: false, servers };
5852
+ return;
5853
+ }
5854
+ this.toolRoutingConfig.servers = servers;
5855
+ }
5669
5856
  async validateStreamRequestOptions(options, startTime) {
5670
5857
  await this.validateStreamInput(options);
5671
5858
  this.enforceSessionBudget(options.maxBudgetUsd);
@@ -156,7 +156,9 @@ export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
156
156
  serialized = String(value);
157
157
  }
158
158
  if (serialized.length > maxChars) {
159
- return `${serialized.slice(0, maxChars)}...[truncated ${serialized.length - maxChars} chars]`;
159
+ const truncationSuffix = `...[truncated ${serialized.length - maxChars} chars]`;
160
+ const keepLength = Math.max(0, maxChars - truncationSuffix.length);
161
+ return `${serialized.slice(0, keepLength)}${truncationSuffix}`;
160
162
  }
161
163
  return serialized;
162
164
  }