@juspay/neurolink 9.69.3 → 9.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +355 -347
  3. package/dist/core/modules/GenerationHandler.js +75 -23
  4. package/dist/core/modules/structuredOutputPolicy.d.ts +28 -0
  5. package/dist/core/modules/structuredOutputPolicy.js +50 -0
  6. package/dist/lib/core/modules/GenerationHandler.js +75 -23
  7. package/dist/lib/core/modules/structuredOutputPolicy.d.ts +28 -0
  8. package/dist/lib/core/modules/structuredOutputPolicy.js +51 -0
  9. package/dist/lib/neurolink.js +58 -0
  10. package/dist/lib/providers/anthropic.js +34 -7
  11. package/dist/lib/providers/googleVertex.js +17 -2
  12. package/dist/lib/types/generate.d.ts +47 -19
  13. package/dist/lib/types/index.d.ts +1 -0
  14. package/dist/lib/types/index.js +1 -0
  15. package/dist/lib/types/livekit.d.ts +369 -0
  16. package/dist/lib/types/livekit.js +13 -0
  17. package/dist/lib/types/utilities.d.ts +16 -0
  18. package/dist/lib/utils/json/coerce.d.ts +10 -0
  19. package/dist/lib/utils/json/coerce.js +141 -0
  20. package/dist/lib/utils/json/extract.d.ts +10 -0
  21. package/dist/lib/utils/json/extract.js +61 -11
  22. package/dist/lib/utils/tokenLimits.d.ts +20 -0
  23. package/dist/lib/utils/tokenLimits.js +55 -0
  24. package/dist/lib/voice/livekit/brain.d.ts +21 -0
  25. package/dist/lib/voice/livekit/brain.js +75 -0
  26. package/dist/lib/voice/livekit/config.d.ts +41 -0
  27. package/dist/lib/voice/livekit/config.js +80 -0
  28. package/dist/lib/voice/livekit/eventBridge.d.ts +27 -0
  29. package/dist/lib/voice/livekit/eventBridge.js +360 -0
  30. package/dist/lib/voice/livekit/index.d.ts +15 -0
  31. package/dist/lib/voice/livekit/index.js +16 -0
  32. package/dist/lib/voice/livekit/tokens.d.ts +19 -0
  33. package/dist/lib/voice/livekit/tokens.js +51 -0
  34. package/dist/lib/voice/livekit/voiceAgent.d.ts +32 -0
  35. package/dist/lib/voice/livekit/voiceAgent.js +415 -0
  36. package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +27 -0
  37. package/dist/lib/voice/livekit/voiceAgentWorker.js +58 -0
  38. package/dist/neurolink.js +58 -0
  39. package/dist/providers/anthropic.js +34 -7
  40. package/dist/providers/googleVertex.js +17 -2
  41. package/dist/types/generate.d.ts +47 -19
  42. package/dist/types/index.d.ts +1 -0
  43. package/dist/types/index.js +1 -0
  44. package/dist/types/livekit.d.ts +369 -0
  45. package/dist/types/livekit.js +12 -0
  46. package/dist/types/utilities.d.ts +16 -0
  47. package/dist/utils/json/coerce.d.ts +10 -0
  48. package/dist/utils/json/coerce.js +140 -0
  49. package/dist/utils/json/extract.d.ts +10 -0
  50. package/dist/utils/json/extract.js +61 -11
  51. package/dist/utils/tokenLimits.d.ts +20 -0
  52. package/dist/utils/tokenLimits.js +55 -0
  53. package/dist/voice/livekit/brain.d.ts +21 -0
  54. package/dist/voice/livekit/brain.js +74 -0
  55. package/dist/voice/livekit/config.d.ts +41 -0
  56. package/dist/voice/livekit/config.js +79 -0
  57. package/dist/voice/livekit/eventBridge.d.ts +27 -0
  58. package/dist/voice/livekit/eventBridge.js +359 -0
  59. package/dist/voice/livekit/index.d.ts +15 -0
  60. package/dist/voice/livekit/index.js +15 -0
  61. package/dist/voice/livekit/tokens.d.ts +19 -0
  62. package/dist/voice/livekit/tokens.js +50 -0
  63. package/dist/voice/livekit/voiceAgent.d.ts +32 -0
  64. package/dist/voice/livekit/voiceAgent.js +414 -0
  65. package/dist/voice/livekit/voiceAgentWorker.d.ts +27 -0
  66. package/dist/voice/livekit/voiceAgentWorker.js +57 -0
  67. package/package.json +23 -6
@@ -0,0 +1,415 @@
1
+ /**
2
+ * LiveKit Agents agent definition.
3
+ *
4
+ * `defineVoiceAgent` returns the agent object placed as the default export of a
5
+ * worker entry file. The framework runs it as a Job (one per call, in its own
6
+ * process): it connects to the room, builds the NeuroLink brain via the
7
+ * supplied factory, wires Silero VAD + STT/TTS plugins, and overrides `llmNode`
8
+ * so every turn is generated by `neurolink.stream()`.
9
+ *
10
+ * `@livekit/agents` and the plugins are optional dependencies, imported
11
+ * dynamically so the core package does not require them unless the LiveKit
12
+ * voice agent is used. Type-only imports are erased at build time and add no
13
+ * runtime dependency.
14
+ *
15
+ * See docs/features/livekit-voice-agent.md.
16
+ */
17
+ import { ReadableStream } from "node:stream/web";
18
+ import { logger } from "../../utils/logger.js";
19
+ import { createVoiceBrain } from "./brain.js";
20
+ import { resolveBrainDefaults, resolveEouTurnDetection } from "./config.js";
21
+ import { attachEventBridge } from "./eventBridge.js";
22
+ const DEFAULT_CONVERSATION_PREFIX = "voice";
23
+ // Turn-end timing defaults (approach A: silence tuning). Longer silence +
24
+ // endpointing floor so natural mid-sentence pauses ("...and so, [pause] um")
25
+ // don't end the turn early and split one utterance into two. Overridable via
26
+ // config (vad.minSilenceDuration / turn.minEndpointingDelay).
27
+ const DEFAULT_MIN_SILENCE_DURATION = 1.0; // seconds (Silero VAD)
28
+ const DEFAULT_MIN_ENDPOINTING_DELAY = 1000; // ms (framework endpointing floor)
29
+ /**
30
+ * Find the most recent user utterance in a chat context.
31
+ * Uses the `type === "message"` discriminant — no type assertions.
32
+ */
33
+ function latestUserText(chatCtx) {
34
+ const items = chatCtx.items;
35
+ for (let i = items.length - 1; i >= 0; i -= 1) {
36
+ const item = items[i];
37
+ if (item.type === "message" && item.role === "user") {
38
+ return item.textContent;
39
+ }
40
+ }
41
+ return undefined;
42
+ }
43
+ /**
44
+ * Build a text stream for a single turn from the brain, abortable on cancel.
45
+ * When the framework cancels the stream (barge-in), the brain's turn is aborted.
46
+ */
47
+ function brainTurnStream(brain, transcript, conversationId, onAbortedBeforeOutput) {
48
+ const controller = new AbortController();
49
+ const generator = brain.streamReply({
50
+ transcript,
51
+ conversationId,
52
+ signal: controller.signal,
53
+ });
54
+ const iterator = generator[Symbol.asyncIterator]();
55
+ let producedOutput = false;
56
+ return new ReadableStream({
57
+ async pull(streamController) {
58
+ const next = await iterator.next();
59
+ if (next.done === true) {
60
+ streamController.close();
61
+ return;
62
+ }
63
+ producedOutput = true;
64
+ streamController.enqueue(next.value);
65
+ },
66
+ cancel() {
67
+ controller.abort();
68
+ if (!producedOutput) {
69
+ onAbortedBeforeOutput?.();
70
+ }
71
+ },
72
+ });
73
+ }
74
+ /**
75
+ * Construct the English semantic EOU turn detector, or `undefined` if disabled.
76
+ *
77
+ * Layered on top of VAD: VAD detects acoustic silence, then this model decides
78
+ * whether the user's turn is semantically complete, so natural mid-sentence
79
+ * pauses don't split one utterance. Opt-in via `LIVEKIT_EOU_TURN_DETECTION`.
80
+ * The runner is registered in the worker process (see `voiceAgentWorker.ts`);
81
+ * here we only construct the model handle, which dispatches inference to the
82
+ * shared executor via the running job context.
83
+ */
84
+ async function loadEouTurnDetector() {
85
+ const { enabled, unlikelyThreshold } = resolveEouTurnDetection();
86
+ if (!enabled) {
87
+ return undefined;
88
+ }
89
+ const { turnDetector } = await import("@livekit/agents-plugin-livekit");
90
+ return new turnDetector.EnglishModel(unlikelyThreshold);
91
+ }
92
+ /**
93
+ * Construct the Silero VAD instance for the session.
94
+ *
95
+ * Stricter-than-default thresholds so background noise isn't treated as speech
96
+ * (a higher activation threshold and a minimum speech duration reject short,
97
+ * quiet noise bursts).
98
+ */
99
+ async function loadVad(config) {
100
+ const silero = await import("@livekit/agents-plugin-silero");
101
+ return silero.VAD.load({
102
+ activationThreshold: config?.activationThreshold ?? 0.6,
103
+ minSpeechDuration: config?.minSpeechDuration ?? 0.2,
104
+ minSilenceDuration: config?.minSilenceDuration ?? DEFAULT_MIN_SILENCE_DURATION,
105
+ });
106
+ }
107
+ /**
108
+ * Construct the STT plugin instance from configuration.
109
+ *
110
+ * Only defined options are passed — passing `undefined` would override the
111
+ * plugin's own defaults (e.g. its default model) with `undefined` and break it.
112
+ */
113
+ async function buildStt(config) {
114
+ if (config.provider === "soniox") {
115
+ const soniox = await import("@livekit/agents-plugin-soniox");
116
+ const opts = {};
117
+ if (config.model !== undefined) {
118
+ opts.model = config.model;
119
+ }
120
+ if (config.language !== undefined) {
121
+ // Soft hint only: Soniox biases toward this language but can still
122
+ // auto-detect another (e.g. the user switching to Telugu mid-call).
123
+ // Do NOT set `languageHintsStrict` — forcing the hinted language makes
124
+ // the realtime STT stall/error on other-language audio and the session
125
+ // never recovers (no further transcripts, so no audio at all).
126
+ opts.languageHints = [config.language];
127
+ }
128
+ if (config.maxEndpointDelayMs !== undefined) {
129
+ opts.maxEndpointDelayMs = config.maxEndpointDelayMs;
130
+ }
131
+ return new soniox.STT(opts);
132
+ }
133
+ if (config.provider === "deepgram") {
134
+ const deepgram = await import("@livekit/agents-plugin-deepgram");
135
+ const opts = {};
136
+ if (config.language !== undefined) {
137
+ opts.language = config.language;
138
+ }
139
+ return new deepgram.STT(opts);
140
+ }
141
+ throw new Error(`Unsupported LiveKit STT provider "${config.provider}" (supported: soniox, deepgram)`);
142
+ }
143
+ /**
144
+ * Construct the TTS plugin instance from configuration.
145
+ *
146
+ * Only defined options are passed — passing `undefined` would override the
147
+ * plugin's own defaults (default voice/model) with `undefined` and break it.
148
+ */
149
+ async function buildTts(config) {
150
+ if (config.provider === "cartesia") {
151
+ const cartesia = await import("@livekit/agents-plugin-cartesia");
152
+ const opts = {};
153
+ if (config.voice !== undefined) {
154
+ opts.voice = config.voice;
155
+ }
156
+ if (config.model !== undefined) {
157
+ opts.model = config.model;
158
+ }
159
+ return new cartesia.TTS(opts);
160
+ }
161
+ if (config.provider === "elevenlabs") {
162
+ const elevenlabs = await import("@livekit/agents-plugin-elevenlabs");
163
+ const opts = {};
164
+ if (config.voice !== undefined) {
165
+ opts.voiceId = config.voice;
166
+ }
167
+ if (config.model !== undefined) {
168
+ opts.modelID = config.model;
169
+ }
170
+ return new elevenlabs.TTS(opts);
171
+ }
172
+ throw new Error(`Unsupported LiveKit TTS provider "${config.provider}" (supported: cartesia, elevenlabs)`);
173
+ }
174
+ /**
175
+ * Define a LiveKit voice agent backed by NeuroLink.
176
+ *
177
+ * Place the result as the default export of the worker entry file:
178
+ *
179
+ * ```ts
180
+ * export default defineVoiceAgent({
181
+ * createNeuroLink: async () => buildConfiguredNeuroLink(),
182
+ * stt: { provider: "deepgram" },
183
+ * tts: { provider: "elevenlabs" },
184
+ * });
185
+ * ```
186
+ */
187
+ export function defineVoiceAgent(config) {
188
+ const defaults = resolveBrainDefaults();
189
+ const provider = config.provider ?? defaults.provider;
190
+ const model = config.model ?? defaults.model;
191
+ const conversationPrefix = config.conversationIdPrefix ?? DEFAULT_CONVERSATION_PREFIX;
192
+ async function entry(ctx) {
193
+ const entryStartedAt = Date.now();
194
+ await ctx.connect();
195
+ logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
196
+ // When the user actually stopped speaking (VAD), used to measure how long
197
+ // the agent waited after speech before committing the turn to the LLM.
198
+ let userStoppedSpeakingAt;
199
+ const neurolink = await config.createNeuroLink();
200
+ const brain = createVoiceBrain({
201
+ neurolink,
202
+ provider,
203
+ model,
204
+ systemPrompt: config.systemPrompt,
205
+ temperature: config.temperature,
206
+ maxTokens: config.maxTokens,
207
+ userId: config.userId,
208
+ });
209
+ const conversationId = `${conversationPrefix}-${ctx.room.name ?? ctx.job.id}`;
210
+ const { voice, llm } = await import("@livekit/agents");
211
+ const [vad, stt, tts, eouTurnDetector] = await Promise.all([
212
+ loadVad(config.vad),
213
+ buildStt(config.stt),
214
+ buildTts(config.tts),
215
+ loadEouTurnDetector(),
216
+ ]);
217
+ const transcriptEventsEnabled = config.events?.enabled === true &&
218
+ typeof neurolink.getEventEmitter === "function";
219
+ const transcriptEmitter = transcriptEventsEnabled
220
+ ? neurolink.getEventEmitter?.()
221
+ : undefined;
222
+ let userTranscriptBuffer = "";
223
+ let pendingPrefix = "";
224
+ function emitUserTranscriptSegment(segmentText, isFinal) {
225
+ if (transcriptEmitter === undefined) {
226
+ return;
227
+ }
228
+ const trimmed = segmentText.trim();
229
+ if (isFinal) {
230
+ userTranscriptBuffer =
231
+ userTranscriptBuffer.length > 0
232
+ ? `${userTranscriptBuffer} ${trimmed}`
233
+ : trimmed;
234
+ transcriptEmitter.emit("voice:user-transcript", {
235
+ text: userTranscriptBuffer,
236
+ final: false,
237
+ });
238
+ return;
239
+ }
240
+ const live = userTranscriptBuffer.length > 0
241
+ ? `${userTranscriptBuffer} ${trimmed}`
242
+ : trimmed;
243
+ transcriptEmitter.emit("voice:user-transcript", {
244
+ text: live,
245
+ final: false,
246
+ });
247
+ }
248
+ /**
249
+ * Lock the user bubble at turn-end and reset the buffer for the next turn.
250
+ * `replacesPrevious` tells the client this committed turn absorbed a prior
251
+ * interrupted turn, so it should remove the orphaned previous user bubble.
252
+ */
253
+ function commitUserTranscript(finalText, replacesPrevious = false) {
254
+ if (transcriptEmitter !== undefined) {
255
+ transcriptEmitter.emit("voice:user-transcript", {
256
+ text: finalText,
257
+ final: true,
258
+ replacesPrevious,
259
+ });
260
+ }
261
+ userTranscriptBuffer = "";
262
+ }
263
+ class NeuroLinkVoiceAgent extends voice.Agent {
264
+ async llmNode(chatCtx, _toolCtx, _modelSettings) {
265
+ const transcript = latestUserText(chatCtx);
266
+ if (transcript === undefined || transcript.trim().length === 0) {
267
+ userTranscriptBuffer = "";
268
+ return null;
269
+ }
270
+ const hadPrefix = pendingPrefix.length > 0;
271
+ const promptText = hadPrefix
272
+ ? `${pendingPrefix} ${transcript}`
273
+ : transcript;
274
+ pendingPrefix = "";
275
+ commitUserTranscript(promptText, hadPrefix);
276
+ if (userStoppedSpeakingAt !== undefined) {
277
+ logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
278
+ }
279
+ return brainTurnStream(brain, promptText, conversationId, () => {
280
+ // Interrupted before producing any reply → carry this turn's text
281
+ // forward; the next turn merges it (prompt + UI).
282
+ pendingPrefix = promptText;
283
+ });
284
+ }
285
+ }
286
+ class PlaceholderLLM extends llm.LLM {
287
+ label() {
288
+ return "neurolink-placeholder";
289
+ }
290
+ chat() {
291
+ throw new Error("PlaceholderLLM.chat must not be called — llmNode overrides generation");
292
+ }
293
+ }
294
+ const turnHandling = {
295
+ interruption: {
296
+ minWords: config.interruption?.minWords ?? 2,
297
+ minDuration: config.interruption?.minDuration ?? 600,
298
+ },
299
+ };
300
+ if (eouTurnDetector !== undefined) {
301
+ turnHandling.turnDetection = eouTurnDetector;
302
+ logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
303
+ }
304
+ else if (config.turn?.mode) {
305
+ turnHandling.turnDetection = config.turn.mode;
306
+ }
307
+ const endpointing = {};
308
+ endpointing.minDelay =
309
+ config.turn?.minEndpointingDelay ?? DEFAULT_MIN_ENDPOINTING_DELAY;
310
+ if (config.turn?.maxEndpointingDelay !== undefined) {
311
+ endpointing.maxDelay = config.turn.maxEndpointingDelay;
312
+ }
313
+ if (Object.keys(endpointing).length > 0) {
314
+ turnHandling.endpointing = endpointing;
315
+ }
316
+ const session = new voice.AgentSession({
317
+ vad,
318
+ stt,
319
+ tts,
320
+ llm: new PlaceholderLLM(),
321
+ turnHandling,
322
+ // Do NOT speculatively call the LLM on preflight transcripts before the
323
+ // turn ends — with NeuroLink as the brain each call is a real LLM request,
324
+ // and it makes the agent feel like it responds while you're still talking.
325
+ preemptiveGeneration: false,
326
+ });
327
+ const agent = new NeuroLinkVoiceAgent({
328
+ instructions: config.systemPrompt ?? "",
329
+ });
330
+ // Inactivity watchdog: shut the per-call Job down after a stretch with no
331
+ // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
332
+ // runs the shutdown callbacks (disposing the bridge) and the Job process
333
+ // exits — freeing its RAM and the EOU model — while the browser observes a
334
+ // room disconnect. Reset on every interaction below. Configure via
335
+ // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
336
+ const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
337
+ const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
338
+ let inactivityTimer;
339
+ let inactivityFired = false;
340
+ function clearInactivityTimer() {
341
+ if (inactivityTimer !== undefined) {
342
+ clearTimeout(inactivityTimer);
343
+ inactivityTimer = undefined;
344
+ }
345
+ }
346
+ function noteActivity() {
347
+ if (!inactivityEnabled || inactivityFired) {
348
+ return;
349
+ }
350
+ clearInactivityTimer();
351
+ inactivityTimer = setTimeout(() => {
352
+ inactivityFired = true;
353
+ logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
354
+ ctx.shutdown("inactivity timeout");
355
+ }, inactivityTimeoutMs);
356
+ // The watchdog must not, by itself, keep the event loop alive.
357
+ inactivityTimer.unref?.();
358
+ }
359
+ ctx.addShutdownCallback(async () => {
360
+ clearInactivityTimer();
361
+ });
362
+ // Track when the user actually stops speaking (VAD) so endpointing latency
363
+ // can be measured, and reset the inactivity watchdog on user activity.
364
+ session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
365
+ noteActivity();
366
+ if (ev.oldState === "speaking" && ev.newState !== "speaking") {
367
+ userStoppedSpeakingAt = Date.now();
368
+ }
369
+ });
370
+ // Reset the inactivity watchdog on any agent speech/processing and on every
371
+ // committed conversation item (user turn or agent reply), so the timeout
372
+ // only fires during a genuine lull in the conversation.
373
+ session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
374
+ noteActivity();
375
+ });
376
+ session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
377
+ noteActivity();
378
+ });
379
+ // Forward user STT transcripts to the data-channel bridge as a single
380
+ // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
381
+ // finalized SEGMENT (several per turn), so we never forward those as the
382
+ // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
383
+ // buffer and emits `final: false`. The lone `final: true` is sent from
384
+ // `llmNode` at the real turn boundary.
385
+ if (transcriptEventsEnabled) {
386
+ session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
387
+ emitUserTranscriptSegment(ev.transcript, ev.isFinal);
388
+ });
389
+ }
390
+ logger.info("[LiveKitVoiceAgent] Session starting", {
391
+ room: ctx.room.name,
392
+ provider,
393
+ model,
394
+ });
395
+ await session.start({ agent, room: ctx.room });
396
+ // Start the inactivity countdown now that the session is live; every
397
+ // interaction handler above re-arms it.
398
+ noteActivity();
399
+ // Data-channel event bridge: forward NeuroLink events (text, tool calls,
400
+ // results, HITL prompts, status) to the browser, and accept HITL responses
401
+ // back. Only when enabled and the instance exposes its event emitter.
402
+ if (config.events?.enabled === true && neurolink.getEventEmitter) {
403
+ const bridge = await attachEventBridge({
404
+ room: ctx.room,
405
+ emitter: neurolink.getEventEmitter(),
406
+ options: config.events,
407
+ });
408
+ ctx.addShutdownCallback(async () => {
409
+ bridge.dispose();
410
+ });
411
+ }
412
+ }
413
+ return { entry };
414
+ }
415
+ //# sourceMappingURL=voiceAgent.js.map
@@ -0,0 +1,27 @@
1
+ /**
2
+ * LiveKit Agents worker launcher.
3
+ *
4
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
5
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
6
+ * its own process, which provides worker-per-call isolation and horizontal
7
+ * scaling. Connection settings are resolved from the environment.
8
+ *
9
+ * `@livekit/agents` is an optional dependency, imported dynamically.
10
+ *
11
+ * See docs/features/livekit-voice-agent.md.
12
+ */
13
+ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
14
+ /**
15
+ * Launch the LiveKit voice agent worker.
16
+ *
17
+ * Call from a small runner script; `agentFile` must point to the file whose
18
+ * default export is the result of `defineVoiceAgent`.
19
+ *
20
+ * ```ts
21
+ * await startVoiceAgentWorker({
22
+ * agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
23
+ * agentName: "neurolink-voice",
24
+ * });
25
+ * ```
26
+ */
27
+ export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
@@ -0,0 +1,58 @@
1
+ /**
2
+ * LiveKit Agents worker launcher.
3
+ *
4
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
5
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
6
+ * its own process, which provides worker-per-call isolation and horizontal
7
+ * scaling. Connection settings are resolved from the environment.
8
+ *
9
+ * `@livekit/agents` is an optional dependency, imported dynamically.
10
+ *
11
+ * See docs/features/livekit-voice-agent.md.
12
+ */
13
+ import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
14
+ const DEFAULT_AGENT_NAME = "neurolink-voice";
15
+ const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
16
+ /**
17
+ * Register the English EOU inference runner in the worker process.
18
+ *
19
+ * Must run before `cli.runApp`: the worker only spawns the shared inference
20
+ * executor when `InferenceRunner.registeredRunners` is non-empty at startup,
21
+ * and passes that registry to the executor process. Importing the plugin
22
+ * registers both English and multilingual runners, so we delete multilingual to
23
+ * keep only the English model loaded.
24
+ */
25
+ async function registerEouTurnDetectorRunner() {
26
+ const { InferenceRunner } = await import("@livekit/agents");
27
+ // Importing the plugin's turn-detector module triggers registerRunner().
28
+ await import("@livekit/agents-plugin-livekit");
29
+ delete InferenceRunner.registeredRunners[EOU_METHOD_MULTILINGUAL];
30
+ }
31
+ /**
32
+ * Launch the LiveKit voice agent worker.
33
+ *
34
+ * Call from a small runner script; `agentFile` must point to the file whose
35
+ * default export is the result of `defineVoiceAgent`.
36
+ *
37
+ * ```ts
38
+ * await startVoiceAgentWorker({
39
+ * agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
40
+ * agentName: "neurolink-voice",
41
+ * });
42
+ * ```
43
+ */
44
+ export async function startVoiceAgentWorker(options) {
45
+ const server = resolveLiveKitServerConfig();
46
+ const { cli, WorkerOptions } = await import("@livekit/agents");
47
+ if (resolveEouTurnDetection().enabled) {
48
+ await registerEouTurnDetectorRunner();
49
+ }
50
+ cli.runApp(new WorkerOptions({
51
+ agent: options.agentFile,
52
+ agentName: options.agentName ?? DEFAULT_AGENT_NAME,
53
+ wsURL: server.url,
54
+ apiKey: server.apiKey,
55
+ apiSecret: server.apiSecret,
56
+ }));
57
+ }
58
+ //# sourceMappingURL=voiceAgentWorker.js.map
package/dist/neurolink.js CHANGED
@@ -66,6 +66,7 @@ import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableErr
66
66
  import { hasLifecycleErrorFired, markLifecycleErrorFired, } from "./utils/lifecycleCallbacks.js";
67
67
  import { resolveLifecycleTimeoutMs } from "./utils/lifecycleTimeout.js";
68
68
  import { cloneOptionsForCallIsolation } from "./utils/cloneOptions.js";
69
+ import { coerceJsonToSchema } from "./utils/json/coerce.js";
69
70
  // Factory processing imports
70
71
  import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
71
72
  import { logger, mcpLogger } from "./utils/logger.js";
@@ -3345,6 +3346,60 @@ Current user's request: ${currentInput}`;
3345
3346
  }
3346
3347
  finalizeGenerateRequestResult(params) {
3347
3348
  const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
3349
+ // Provider-agnostic JSON coercion for schema requests. Structured-output
3350
+ // enforcement makes valid JSON the overwhelming case; for every other
3351
+ // provider path — including generate() overrides (Vertex, Anthropic,
3352
+ // Bedrock, Google AI Studio) — object/array roots are recovered here via
3353
+ // balanced-scan + jsonrepair and scalar JSON roots via plain JSON.parse,
3354
+ // with the parsed value exposed as `structuredData`. If nothing
3355
+ // JSON-shaped is recoverable (pure prose), the raw text is returned,
3356
+ // `structuredData` stays undefined, and a WARN makes the case observable.
3357
+ // Runs BEFORE the end-of-generation emits below so event consumers see
3358
+ // the same coerced content/structuredData the caller receives.
3359
+ if (textOptions.schema &&
3360
+ textResult.structuredData === undefined &&
3361
+ typeof textResult.content === "string") {
3362
+ const coerced = coerceJsonToSchema(textResult.content, textOptions.schema);
3363
+ if (coerced) {
3364
+ textResult.content = coerced.content;
3365
+ textResult.structuredData = coerced.structuredData;
3366
+ if (coerced.repaired) {
3367
+ textResult.jsonRepaired = true;
3368
+ }
3369
+ if (coerced.truncated) {
3370
+ textResult.jsonTruncated = true;
3371
+ }
3372
+ }
3373
+ else {
3374
+ try {
3375
+ const scalar = JSON.parse(textResult.content);
3376
+ if (scalar !== null && scalar !== undefined) {
3377
+ textResult.structuredData = scalar;
3378
+ }
3379
+ }
3380
+ catch {
3381
+ logger.warn("[NeuroLink] schema requested but no JSON could be recovered from model output; returning raw text", { provider: textResult.provider, model: textResult.model });
3382
+ }
3383
+ }
3384
+ }
3385
+ // Surface truncation when a schema was requested: either the provider
3386
+ // reported finishReason="length" or the recovered JSON came from an
3387
+ // unclosed span. Either way `structuredData` may be incomplete — warn at
3388
+ // info level so it is observable in production (not just debug logs).
3389
+ if (textOptions.schema) {
3390
+ if (textResult.finishReason === "length") {
3391
+ textResult.jsonTruncated = true;
3392
+ }
3393
+ if (textResult.jsonTruncated) {
3394
+ logger.warn("[NeuroLink] Structured output may be truncated (finishReason=length or unclosed JSON); " +
3395
+ "increase maxTokens to fit the full response.", {
3396
+ provider: textResult.provider,
3397
+ model: textResult.model,
3398
+ finishReason: textResult.finishReason,
3399
+ outputTokens: textResult.usage?.output,
3400
+ });
3401
+ }
3402
+ }
3348
3403
  // Skip the top-level `generation:end` emission when the provider already
3349
3404
  // emitted it from its native generate path (Vertex / Google AI Studio).
3350
3405
  // Without this guard, native-path providers would surface TWO events
@@ -3378,7 +3433,10 @@ Current user's request: ${currentInput}`;
3378
3433
  this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
3379
3434
  const generateResult = {
3380
3435
  content: textResult.content,
3436
+ structuredData: textResult.structuredData,
3381
3437
  finishReason: textResult.finishReason,
3438
+ jsonRepaired: textResult.jsonRepaired,
3439
+ jsonTruncated: textResult.jsonTruncated,
3382
3440
  provider: textResult.provider,
3383
3441
  model: textResult.model,
3384
3442
  usage: textResult.usage
@@ -21,6 +21,7 @@ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
21
21
  import { NoOutputGeneratedError } from "../utils/generationErrors.js";
22
22
  import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
23
23
  import { convertZodToJsonSchema } from "../utils/schemaConversion.js";
24
+ import { resolveClaudeMaxTokens } from "../utils/tokenLimits.js";
24
25
  import { createChunkQueue, createDeferredAnalytics, stringifyToolInput, } from "./openaiChatCompletionsClient.js";
25
26
  /**
26
27
  * Beta headers for Claude Code integration.
@@ -493,10 +494,19 @@ const mapAnthropicStopReason = (raw) => {
493
494
  return "stop";
494
495
  }
495
496
  };
496
- // Anthropic's Messages API requires max_tokens on every request. The previous
497
- // @ai-sdk/anthropic implementation defaulted it to 4096 when the caller did
498
- // not specify maxTokens preserve that wire behavior.
499
- const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096;
497
+ // Anthropic's Messages API requires max_tokens on every request. When the
498
+ // caller omits it, default to the model's real output ceiling via
499
+ // resolveClaudeMaxTokens (e.g. 64K for Sonnet 4.x) instead of the legacy 4096,
500
+ // which silently truncated large structured responses mid-JSON.
501
+ //
502
+ // Client-level request timeout. The Anthropic SDK throws "Streaming is required
503
+ // for long requests" from a NON-streaming `messages.create` when `max_tokens`
504
+ // is large AND no client-level timeout is configured (it can't estimate a safe
505
+ // timeout). Setting an explicit client timeout — equal to the SDK's own default
506
+ // for the non-throwing path — suppresses that pre-flight throw so large
507
+ // max_tokens (our model-ceiling default) works. Per-request duration is still
508
+ // bounded by the abort signal NeuroLink composes for each call.
509
+ const ANTHROPIC_CLIENT_TIMEOUT_MS = 600_000;
500
510
  /**
501
511
  * Anthropic Provider v2 - BaseProvider Implementation
502
512
  * Enhanced with OAuth support, subscription tiers, and beta headers for Claude Code integration.
@@ -602,6 +612,7 @@ export class AnthropicProvider extends BaseProvider {
602
612
  apiKey: "oauth-authenticated", // Placeholder, actual auth is in fetch wrapper
603
613
  // Note: No headers passed - fetch wrapper sets oauth-2025-04-20 beta header
604
614
  fetch: oauthFetch,
615
+ timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
605
616
  });
606
617
  logger.debug("[AnthropicProvider] Anthropic SDK client created with OAuth fetch wrapper");
607
618
  logger.debug("Anthropic Provider initialized with OAuth", {
@@ -647,6 +658,7 @@ export class AnthropicProvider extends BaseProvider {
647
658
  defaultHeaders: headers,
648
659
  ...(normalizedBaseURL && { baseURL: normalizedBaseURL }),
649
660
  fetch: createProxyFetch(),
661
+ timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
650
662
  });
651
663
  logger.debug("Anthropic Provider initialized with API key", {
652
664
  modelName: this.modelName,
@@ -1122,7 +1134,7 @@ export class AnthropicProvider extends BaseProvider {
1122
1134
  const params = {
1123
1135
  model: modelId,
1124
1136
  messages,
1125
- max_tokens: options.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
1137
+ max_tokens: resolveClaudeMaxTokens(modelId, options.maxOutputTokens),
1126
1138
  ...(system ? { system } : {}),
1127
1139
  ...(options.temperature !== undefined && options.temperature !== null
1128
1140
  ? { temperature: options.temperature }
@@ -1137,7 +1149,22 @@ export class AnthropicProvider extends BaseProvider {
1137
1149
  ...(toolChoice ? { tool_choice: toolChoice } : {}),
1138
1150
  ...(thinking ? { thinking } : {}),
1139
1151
  };
1140
- const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
1152
+ // The 60s anthropic generate default was tuned for the old ~4096
1153
+ // max_tokens. Now that the default ceiling is the model's real max,
1154
+ // a large structured response needs more wall-clock to be produced —
1155
+ // otherwise the inner controller aborts mid-generation (the AI-SDK
1156
+ // doGenerate layer doesn't see the caller's `timeout`). Raise the
1157
+ // floor to 5 min when a large output budget is in play — but only
1158
+ // when the caller did NOT set an explicit timeout: an explicit value
1159
+ // is a contract and must never be silently extended. The abort
1160
+ // signal stays the real bound.
1161
+ const callerTimeout = options
1162
+ .timeout;
1163
+ const callerSpecifiedTimeout = callerTimeout !== undefined && callerTimeout !== null;
1164
+ const generateTimeoutMs = params.max_tokens > 8192 && !callerSpecifiedTimeout
1165
+ ? Math.max(getTimeoutForOptions(options), 300_000)
1166
+ : getTimeoutForOptions(options);
1167
+ const timeoutController = createTimeoutController(generateTimeoutMs, providerName, "generate");
1141
1168
  let response;
1142
1169
  try {
1143
1170
  response = await client.messages.create(params, {
@@ -1356,7 +1383,7 @@ export class AnthropicProvider extends BaseProvider {
1356
1383
  const params = {
1357
1384
  model: modelId,
1358
1385
  messages: conversation,
1359
- max_tokens: options.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
1386
+ max_tokens: resolveClaudeMaxTokens(modelId, options.maxTokens),
1360
1387
  stream: true,
1361
1388
  ...(payload.system ? { system: payload.system } : {}),
1362
1389
  ...(options.temperature !== undefined && options.temperature !== null