@juspay/neurolink 9.69.3 → 9.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/browser/neurolink.min.js +295 -295
  3. package/dist/lib/types/index.d.ts +1 -0
  4. package/dist/lib/types/index.js +1 -0
  5. package/dist/lib/types/livekit.d.ts +369 -0
  6. package/dist/lib/types/livekit.js +13 -0
  7. package/dist/lib/voice/livekit/brain.d.ts +21 -0
  8. package/dist/lib/voice/livekit/brain.js +75 -0
  9. package/dist/lib/voice/livekit/config.d.ts +41 -0
  10. package/dist/lib/voice/livekit/config.js +80 -0
  11. package/dist/lib/voice/livekit/eventBridge.d.ts +27 -0
  12. package/dist/lib/voice/livekit/eventBridge.js +360 -0
  13. package/dist/lib/voice/livekit/index.d.ts +15 -0
  14. package/dist/lib/voice/livekit/index.js +16 -0
  15. package/dist/lib/voice/livekit/tokens.d.ts +19 -0
  16. package/dist/lib/voice/livekit/tokens.js +51 -0
  17. package/dist/lib/voice/livekit/voiceAgent.d.ts +32 -0
  18. package/dist/lib/voice/livekit/voiceAgent.js +415 -0
  19. package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +27 -0
  20. package/dist/lib/voice/livekit/voiceAgentWorker.js +58 -0
  21. package/dist/types/index.d.ts +1 -0
  22. package/dist/types/index.js +1 -0
  23. package/dist/types/livekit.d.ts +369 -0
  24. package/dist/types/livekit.js +12 -0
  25. package/dist/voice/livekit/brain.d.ts +21 -0
  26. package/dist/voice/livekit/brain.js +74 -0
  27. package/dist/voice/livekit/config.d.ts +41 -0
  28. package/dist/voice/livekit/config.js +79 -0
  29. package/dist/voice/livekit/eventBridge.d.ts +27 -0
  30. package/dist/voice/livekit/eventBridge.js +359 -0
  31. package/dist/voice/livekit/index.d.ts +15 -0
  32. package/dist/voice/livekit/index.js +15 -0
  33. package/dist/voice/livekit/tokens.d.ts +19 -0
  34. package/dist/voice/livekit/tokens.js +50 -0
  35. package/dist/voice/livekit/voiceAgent.d.ts +32 -0
  36. package/dist/voice/livekit/voiceAgent.js +414 -0
  37. package/dist/voice/livekit/voiceAgentWorker.d.ts +27 -0
  38. package/dist/voice/livekit/voiceAgentWorker.js +57 -0
  39. package/package.json +20 -6
@@ -0,0 +1,414 @@
1
+ /**
2
+ * LiveKit Agents agent definition.
3
+ *
4
+ * `defineVoiceAgent` returns the agent object placed as the default export of a
5
+ * worker entry file. The framework runs it as a Job (one per call, in its own
6
+ * process): it connects to the room, builds the NeuroLink brain via the
7
+ * supplied factory, wires Silero VAD + STT/TTS plugins, and overrides `llmNode`
8
+ * so every turn is generated by `neurolink.stream()`.
9
+ *
10
+ * `@livekit/agents` and the plugins are optional dependencies, imported
11
+ * dynamically so the core package does not require them unless the LiveKit
12
+ * voice agent is used. Type-only imports are erased at build time and add no
13
+ * runtime dependency.
14
+ *
15
+ * See docs/features/livekit-voice-agent.md.
16
+ */
17
+ import { ReadableStream } from "node:stream/web";
18
+ import { logger } from "../../utils/logger.js";
19
+ import { createVoiceBrain } from "./brain.js";
20
+ import { resolveBrainDefaults, resolveEouTurnDetection } from "./config.js";
21
+ import { attachEventBridge } from "./eventBridge.js";
22
+ const DEFAULT_CONVERSATION_PREFIX = "voice";
23
+ // Turn-end timing defaults (approach A: silence tuning). Longer silence +
24
+ // endpointing floor so natural mid-sentence pauses ("...and so, [pause] um")
25
+ // don't end the turn early and split one utterance into two. Overridable via
26
+ // config (vad.minSilenceDuration / turn.minEndpointingDelay).
27
+ const DEFAULT_MIN_SILENCE_DURATION = 1.0; // seconds (Silero VAD)
28
+ const DEFAULT_MIN_ENDPOINTING_DELAY = 1000; // ms (framework endpointing floor)
29
+ /**
30
+ * Find the most recent user utterance in a chat context.
31
+ * Uses the `type === "message"` discriminant — no type assertions.
32
+ */
33
+ function latestUserText(chatCtx) {
34
+ const items = chatCtx.items;
35
+ for (let i = items.length - 1; i >= 0; i -= 1) {
36
+ const item = items[i];
37
+ if (item.type === "message" && item.role === "user") {
38
+ return item.textContent;
39
+ }
40
+ }
41
+ return undefined;
42
+ }
43
+ /**
44
+ * Build a text stream for a single turn from the brain, abortable on cancel.
45
+ * When the framework cancels the stream (barge-in), the brain's turn is aborted.
46
+ */
47
+ function brainTurnStream(brain, transcript, conversationId, onAbortedBeforeOutput) {
48
+ const controller = new AbortController();
49
+ const generator = brain.streamReply({
50
+ transcript,
51
+ conversationId,
52
+ signal: controller.signal,
53
+ });
54
+ const iterator = generator[Symbol.asyncIterator]();
55
+ let producedOutput = false;
56
+ return new ReadableStream({
57
+ async pull(streamController) {
58
+ const next = await iterator.next();
59
+ if (next.done === true) {
60
+ streamController.close();
61
+ return;
62
+ }
63
+ producedOutput = true;
64
+ streamController.enqueue(next.value);
65
+ },
66
+ cancel() {
67
+ controller.abort();
68
+ if (!producedOutput) {
69
+ onAbortedBeforeOutput?.();
70
+ }
71
+ },
72
+ });
73
+ }
74
+ /**
75
+ * Construct the English semantic EOU turn detector, or `undefined` if disabled.
76
+ *
77
+ * Layered on top of VAD: VAD detects acoustic silence, then this model decides
78
+ * whether the user's turn is semantically complete, so natural mid-sentence
79
+ * pauses don't split one utterance. Opt-in via `LIVEKIT_EOU_TURN_DETECTION`.
80
+ * The runner is registered in the worker process (see `voiceAgentWorker.ts`);
81
+ * here we only construct the model handle, which dispatches inference to the
82
+ * shared executor via the running job context.
83
+ */
84
+ async function loadEouTurnDetector() {
85
+ const { enabled, unlikelyThreshold } = resolveEouTurnDetection();
86
+ if (!enabled) {
87
+ return undefined;
88
+ }
89
+ const { turnDetector } = await import("@livekit/agents-plugin-livekit");
90
+ return new turnDetector.EnglishModel(unlikelyThreshold);
91
+ }
92
+ /**
93
+ * Construct the Silero VAD instance for the session.
94
+ *
95
+ * Stricter-than-default thresholds so background noise isn't treated as speech
96
+ * (a higher activation threshold and a minimum speech duration reject short,
97
+ * quiet noise bursts).
98
+ */
99
+ async function loadVad(config) {
100
+ const silero = await import("@livekit/agents-plugin-silero");
101
+ return silero.VAD.load({
102
+ activationThreshold: config?.activationThreshold ?? 0.6,
103
+ minSpeechDuration: config?.minSpeechDuration ?? 0.2,
104
+ minSilenceDuration: config?.minSilenceDuration ?? DEFAULT_MIN_SILENCE_DURATION,
105
+ });
106
+ }
107
+ /**
108
+ * Construct the STT plugin instance from configuration.
109
+ *
110
+ * Only defined options are passed — passing `undefined` would override the
111
+ * plugin's own defaults (e.g. its default model) with `undefined` and break it.
112
+ */
113
+ async function buildStt(config) {
114
+ if (config.provider === "soniox") {
115
+ const soniox = await import("@livekit/agents-plugin-soniox");
116
+ const opts = {};
117
+ if (config.model !== undefined) {
118
+ opts.model = config.model;
119
+ }
120
+ if (config.language !== undefined) {
121
+ // Soft hint only: Soniox biases toward this language but can still
122
+ // auto-detect another (e.g. the user switching to Telugu mid-call).
123
+ // Do NOT set `languageHintsStrict` — forcing the hinted language makes
124
+ // the realtime STT stall/error on other-language audio and the session
125
+ // never recovers (no further transcripts, so no audio at all).
126
+ opts.languageHints = [config.language];
127
+ }
128
+ if (config.maxEndpointDelayMs !== undefined) {
129
+ opts.maxEndpointDelayMs = config.maxEndpointDelayMs;
130
+ }
131
+ return new soniox.STT(opts);
132
+ }
133
+ if (config.provider === "deepgram") {
134
+ const deepgram = await import("@livekit/agents-plugin-deepgram");
135
+ const opts = {};
136
+ if (config.language !== undefined) {
137
+ opts.language = config.language;
138
+ }
139
+ return new deepgram.STT(opts);
140
+ }
141
+ throw new Error(`Unsupported LiveKit STT provider "${config.provider}" (supported: soniox, deepgram)`);
142
+ }
143
+ /**
144
+ * Construct the TTS plugin instance from configuration.
145
+ *
146
+ * Only defined options are passed — passing `undefined` would override the
147
+ * plugin's own defaults (default voice/model) with `undefined` and break it.
148
+ */
149
+ async function buildTts(config) {
150
+ if (config.provider === "cartesia") {
151
+ const cartesia = await import("@livekit/agents-plugin-cartesia");
152
+ const opts = {};
153
+ if (config.voice !== undefined) {
154
+ opts.voice = config.voice;
155
+ }
156
+ if (config.model !== undefined) {
157
+ opts.model = config.model;
158
+ }
159
+ return new cartesia.TTS(opts);
160
+ }
161
+ if (config.provider === "elevenlabs") {
162
+ const elevenlabs = await import("@livekit/agents-plugin-elevenlabs");
163
+ const opts = {};
164
+ if (config.voice !== undefined) {
165
+ opts.voiceId = config.voice;
166
+ }
167
+ if (config.model !== undefined) {
168
+ opts.modelID = config.model;
169
+ }
170
+ return new elevenlabs.TTS(opts);
171
+ }
172
+ throw new Error(`Unsupported LiveKit TTS provider "${config.provider}" (supported: cartesia, elevenlabs)`);
173
+ }
174
+ /**
175
+ * Define a LiveKit voice agent backed by NeuroLink.
176
+ *
177
+ * Place the result as the default export of the worker entry file:
178
+ *
179
+ * ```ts
180
+ * export default defineVoiceAgent({
181
+ * createNeuroLink: async () => buildConfiguredNeuroLink(),
182
+ * stt: { provider: "deepgram" },
183
+ * tts: { provider: "elevenlabs" },
184
+ * });
185
+ * ```
186
+ */
187
+ export function defineVoiceAgent(config) {
188
+ const defaults = resolveBrainDefaults();
189
+ const provider = config.provider ?? defaults.provider;
190
+ const model = config.model ?? defaults.model;
191
+ const conversationPrefix = config.conversationIdPrefix ?? DEFAULT_CONVERSATION_PREFIX;
192
+ async function entry(ctx) {
193
+ const entryStartedAt = Date.now();
194
+ await ctx.connect();
195
+ logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
196
+ // When the user actually stopped speaking (VAD), used to measure how long
197
+ // the agent waited after speech before committing the turn to the LLM.
198
+ let userStoppedSpeakingAt;
199
+ const neurolink = await config.createNeuroLink();
200
+ const brain = createVoiceBrain({
201
+ neurolink,
202
+ provider,
203
+ model,
204
+ systemPrompt: config.systemPrompt,
205
+ temperature: config.temperature,
206
+ maxTokens: config.maxTokens,
207
+ userId: config.userId,
208
+ });
209
+ const conversationId = `${conversationPrefix}-${ctx.room.name ?? ctx.job.id}`;
210
+ const { voice, llm } = await import("@livekit/agents");
211
+ const [vad, stt, tts, eouTurnDetector] = await Promise.all([
212
+ loadVad(config.vad),
213
+ buildStt(config.stt),
214
+ buildTts(config.tts),
215
+ loadEouTurnDetector(),
216
+ ]);
217
+ const transcriptEventsEnabled = config.events?.enabled === true &&
218
+ typeof neurolink.getEventEmitter === "function";
219
+ const transcriptEmitter = transcriptEventsEnabled
220
+ ? neurolink.getEventEmitter?.()
221
+ : undefined;
222
+ let userTranscriptBuffer = "";
223
+ let pendingPrefix = "";
224
+ function emitUserTranscriptSegment(segmentText, isFinal) {
225
+ if (transcriptEmitter === undefined) {
226
+ return;
227
+ }
228
+ const trimmed = segmentText.trim();
229
+ if (isFinal) {
230
+ userTranscriptBuffer =
231
+ userTranscriptBuffer.length > 0
232
+ ? `${userTranscriptBuffer} ${trimmed}`
233
+ : trimmed;
234
+ transcriptEmitter.emit("voice:user-transcript", {
235
+ text: userTranscriptBuffer,
236
+ final: false,
237
+ });
238
+ return;
239
+ }
240
+ const live = userTranscriptBuffer.length > 0
241
+ ? `${userTranscriptBuffer} ${trimmed}`
242
+ : trimmed;
243
+ transcriptEmitter.emit("voice:user-transcript", {
244
+ text: live,
245
+ final: false,
246
+ });
247
+ }
248
+ /**
249
+ * Lock the user bubble at turn-end and reset the buffer for the next turn.
250
+ * `replacesPrevious` tells the client this committed turn absorbed a prior
251
+ * interrupted turn, so it should remove the orphaned previous user bubble.
252
+ */
253
+ function commitUserTranscript(finalText, replacesPrevious = false) {
254
+ if (transcriptEmitter !== undefined) {
255
+ transcriptEmitter.emit("voice:user-transcript", {
256
+ text: finalText,
257
+ final: true,
258
+ replacesPrevious,
259
+ });
260
+ }
261
+ userTranscriptBuffer = "";
262
+ }
263
+ class NeuroLinkVoiceAgent extends voice.Agent {
264
+ async llmNode(chatCtx, _toolCtx, _modelSettings) {
265
+ const transcript = latestUserText(chatCtx);
266
+ if (transcript === undefined || transcript.trim().length === 0) {
267
+ userTranscriptBuffer = "";
268
+ return null;
269
+ }
270
+ const hadPrefix = pendingPrefix.length > 0;
271
+ const promptText = hadPrefix
272
+ ? `${pendingPrefix} ${transcript}`
273
+ : transcript;
274
+ pendingPrefix = "";
275
+ commitUserTranscript(promptText, hadPrefix);
276
+ if (userStoppedSpeakingAt !== undefined) {
277
+ logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
278
+ }
279
+ return brainTurnStream(brain, promptText, conversationId, () => {
280
+ // Interrupted before producing any reply → carry this turn's text
281
+ // forward; the next turn merges it (prompt + UI).
282
+ pendingPrefix = promptText;
283
+ });
284
+ }
285
+ }
286
+ class PlaceholderLLM extends llm.LLM {
287
+ label() {
288
+ return "neurolink-placeholder";
289
+ }
290
+ chat() {
291
+ throw new Error("PlaceholderLLM.chat must not be called — llmNode overrides generation");
292
+ }
293
+ }
294
+ const turnHandling = {
295
+ interruption: {
296
+ minWords: config.interruption?.minWords ?? 2,
297
+ minDuration: config.interruption?.minDuration ?? 600,
298
+ },
299
+ };
300
+ if (eouTurnDetector !== undefined) {
301
+ turnHandling.turnDetection = eouTurnDetector;
302
+ logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
303
+ }
304
+ else if (config.turn?.mode) {
305
+ turnHandling.turnDetection = config.turn.mode;
306
+ }
307
+ const endpointing = {};
308
+ endpointing.minDelay =
309
+ config.turn?.minEndpointingDelay ?? DEFAULT_MIN_ENDPOINTING_DELAY;
310
+ if (config.turn?.maxEndpointingDelay !== undefined) {
311
+ endpointing.maxDelay = config.turn.maxEndpointingDelay;
312
+ }
313
+ if (Object.keys(endpointing).length > 0) {
314
+ turnHandling.endpointing = endpointing;
315
+ }
316
+ const session = new voice.AgentSession({
317
+ vad,
318
+ stt,
319
+ tts,
320
+ llm: new PlaceholderLLM(),
321
+ turnHandling,
322
+ // Do NOT speculatively call the LLM on preflight transcripts before the
323
+ // turn ends — with NeuroLink as the brain each call is a real LLM request,
324
+ // and it makes the agent feel like it responds while you're still talking.
325
+ preemptiveGeneration: false,
326
+ });
327
+ const agent = new NeuroLinkVoiceAgent({
328
+ instructions: config.systemPrompt ?? "",
329
+ });
330
+ // Inactivity watchdog: shut the per-call Job down after a stretch with no
331
+ // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
332
+ // runs the shutdown callbacks (disposing the bridge) and the Job process
333
+ // exits — freeing its RAM and the EOU model — while the browser observes a
334
+ // room disconnect. Reset on every interaction below. Configure via
335
+ // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
336
+ const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
337
+ const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
338
+ let inactivityTimer;
339
+ let inactivityFired = false;
340
+ function clearInactivityTimer() {
341
+ if (inactivityTimer !== undefined) {
342
+ clearTimeout(inactivityTimer);
343
+ inactivityTimer = undefined;
344
+ }
345
+ }
346
+ function noteActivity() {
347
+ if (!inactivityEnabled || inactivityFired) {
348
+ return;
349
+ }
350
+ clearInactivityTimer();
351
+ inactivityTimer = setTimeout(() => {
352
+ inactivityFired = true;
353
+ logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
354
+ ctx.shutdown("inactivity timeout");
355
+ }, inactivityTimeoutMs);
356
+ // The watchdog must not, by itself, keep the event loop alive.
357
+ inactivityTimer.unref?.();
358
+ }
359
+ ctx.addShutdownCallback(async () => {
360
+ clearInactivityTimer();
361
+ });
362
+ // Track when the user actually stops speaking (VAD) so endpointing latency
363
+ // can be measured, and reset the inactivity watchdog on user activity.
364
+ session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
365
+ noteActivity();
366
+ if (ev.oldState === "speaking" && ev.newState !== "speaking") {
367
+ userStoppedSpeakingAt = Date.now();
368
+ }
369
+ });
370
+ // Reset the inactivity watchdog on any agent speech/processing and on every
371
+ // committed conversation item (user turn or agent reply), so the timeout
372
+ // only fires during a genuine lull in the conversation.
373
+ session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
374
+ noteActivity();
375
+ });
376
+ session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
377
+ noteActivity();
378
+ });
379
+ // Forward user STT transcripts to the data-channel bridge as a single
380
+ // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
381
+ // finalized SEGMENT (several per turn), so we never forward those as the
382
+ // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
383
+ // buffer and emits `final: false`. The lone `final: true` is sent from
384
+ // `llmNode` at the real turn boundary.
385
+ if (transcriptEventsEnabled) {
386
+ session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
387
+ emitUserTranscriptSegment(ev.transcript, ev.isFinal);
388
+ });
389
+ }
390
+ logger.info("[LiveKitVoiceAgent] Session starting", {
391
+ room: ctx.room.name,
392
+ provider,
393
+ model,
394
+ });
395
+ await session.start({ agent, room: ctx.room });
396
+ // Start the inactivity countdown now that the session is live; every
397
+ // interaction handler above re-arms it.
398
+ noteActivity();
399
+ // Data-channel event bridge: forward NeuroLink events (text, tool calls,
400
+ // results, HITL prompts, status) to the browser, and accept HITL responses
401
+ // back. Only when enabled and the instance exposes its event emitter.
402
+ if (config.events?.enabled === true && neurolink.getEventEmitter) {
403
+ const bridge = await attachEventBridge({
404
+ room: ctx.room,
405
+ emitter: neurolink.getEventEmitter(),
406
+ options: config.events,
407
+ });
408
+ ctx.addShutdownCallback(async () => {
409
+ bridge.dispose();
410
+ });
411
+ }
412
+ }
413
+ return { entry };
414
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * LiveKit Agents worker launcher.
3
+ *
4
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
5
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
6
+ * its own process, which provides worker-per-call isolation and horizontal
7
+ * scaling. Connection settings are resolved from the environment.
8
+ *
9
+ * `@livekit/agents` is an optional dependency, imported dynamically.
10
+ *
11
+ * See docs/features/livekit-voice-agent.md.
12
+ */
13
+ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
14
+ /**
15
+ * Launch the LiveKit voice agent worker.
16
+ *
17
+ * Call from a small runner script; `agentFile` must point to the file whose
18
+ * default export is the result of `defineVoiceAgent`.
19
+ *
20
+ * ```ts
21
+ * await startVoiceAgentWorker({
22
+ * agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
23
+ * agentName: "neurolink-voice",
24
+ * });
25
+ * ```
26
+ */
27
+ export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
@@ -0,0 +1,57 @@
1
+ /**
2
+ * LiveKit Agents worker launcher.
3
+ *
4
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
5
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
6
+ * its own process, which provides worker-per-call isolation and horizontal
7
+ * scaling. Connection settings are resolved from the environment.
8
+ *
9
+ * `@livekit/agents` is an optional dependency, imported dynamically.
10
+ *
11
+ * See docs/features/livekit-voice-agent.md.
12
+ */
13
+ import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
14
+ const DEFAULT_AGENT_NAME = "neurolink-voice";
15
+ const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
16
+ /**
17
+ * Register the English EOU inference runner in the worker process.
18
+ *
19
+ * Must run before `cli.runApp`: the worker only spawns the shared inference
20
+ * executor when `InferenceRunner.registeredRunners` is non-empty at startup,
21
+ * and passes that registry to the executor process. Importing the plugin
22
+ * registers both English and multilingual runners, so we delete multilingual to
23
+ * keep only the English model loaded.
24
+ */
25
+ async function registerEouTurnDetectorRunner() {
26
+ const { InferenceRunner } = await import("@livekit/agents");
27
+ // Importing the plugin's turn-detector module triggers registerRunner().
28
+ await import("@livekit/agents-plugin-livekit");
29
+ delete InferenceRunner.registeredRunners[EOU_METHOD_MULTILINGUAL];
30
+ }
31
+ /**
32
+ * Launch the LiveKit voice agent worker.
33
+ *
34
+ * Call from a small runner script; `agentFile` must point to the file whose
35
+ * default export is the result of `defineVoiceAgent`.
36
+ *
37
+ * ```ts
38
+ * await startVoiceAgentWorker({
39
+ * agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
40
+ * agentName: "neurolink-voice",
41
+ * });
42
+ * ```
43
+ */
44
+ export async function startVoiceAgentWorker(options) {
45
+ const server = resolveLiveKitServerConfig();
46
+ const { cli, WorkerOptions } = await import("@livekit/agents");
47
+ if (resolveEouTurnDetection().enabled) {
48
+ await registerEouTurnDetectorRunner();
49
+ }
50
+ cli.runApp(new WorkerOptions({
51
+ agent: options.agentFile,
52
+ agentName: options.agentName ?? DEFAULT_AGENT_NAME,
53
+ wsURL: server.url,
54
+ apiKey: server.apiKey,
55
+ apiSecret: server.apiSecret,
56
+ }));
57
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.69.3",
3
+ "version": "9.70.0",
4
4
  "packageManager": "pnpm@10.15.1",
5
5
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applications with 21+ providers: OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, Azure OpenAI, Mistral, LiteLLM, SageMaker, Hugging Face, Ollama, OpenAI-compatible, OpenRouter, DeepSeek, NVIDIA NIM, LM Studio, llama.cpp, plus voice (OpenAI TTS, ElevenLabs, Deepgram, Azure Speech).",
6
6
  "author": {
@@ -294,6 +294,11 @@
294
294
  "types": "./dist/adapters/*.d.ts",
295
295
  "import": "./dist/adapters/*.js",
296
296
  "default": "./dist/adapters/*.js"
297
+ },
298
+ "./livekit": {
299
+ "types": "./dist/voice/livekit/index.d.ts",
300
+ "import": "./dist/voice/livekit/index.js",
301
+ "default": "./dist/voice/livekit/index.js"
297
302
  }
298
303
  },
299
304
  "dependencies": {
@@ -332,7 +337,7 @@
332
337
  "dotenv": "^17.3.1",
333
338
  "eventsource-parser": "^3.0.8",
334
339
  "google-auth-library": "^10.6.1",
335
- "hono": "^4.12.3",
340
+ "hono": "^4.12.21",
336
341
  "inquirer": "^13.3.0",
337
342
  "jose": "^6.1.3",
338
343
  "json-schema-to-zod": "^2.7.0",
@@ -343,7 +348,7 @@
343
348
  "redis": "^5.11.0",
344
349
  "tar-stream": "^3.1.8",
345
350
  "undici": ">=7.22.0",
346
- "ws": "^8.19.0",
351
+ "ws": "^8.20.1",
347
352
  "yargs": "^18.0.0",
348
353
  "zod": "^4.3.6",
349
354
  "zod-to-json-schema": "^3.25.1"
@@ -370,10 +375,18 @@
370
375
  "@aws-sdk/client-sagemaker": "^3.1000.0",
371
376
  "@fastify/cors": "^11.2.0",
372
377
  "@fastify/rate-limit": "^10.3.0",
373
- "@hono/node-server": "^1.19.9",
378
+ "@hono/node-server": "^1.19.13",
374
379
  "@koa/cors": "^5.0.0",
375
380
  "@koa/router": "^15.3.1",
376
381
  "@langfuse/otel": "^5.0.1",
382
+ "@livekit/agents": "^1.4.5",
383
+ "@livekit/agents-plugin-cartesia": "^1.4.5",
384
+ "@livekit/agents-plugin-deepgram": "^1.4.5",
385
+ "@livekit/agents-plugin-elevenlabs": "^1.4.5",
386
+ "@livekit/agents-plugin-livekit": "^1.4.5",
387
+ "@livekit/agents-plugin-silero": "^1.4.5",
388
+ "@livekit/agents-plugin-soniox": "^1.4.5",
389
+ "@livekit/rtc-node": "^0.13.29",
377
390
  "@picovoice/cobra-node": "^3.0.2",
378
391
  "bullmq": "^5.52.2",
379
392
  "cors": "^2.8.5",
@@ -385,6 +398,7 @@
385
398
  "fluent-ffmpeg": "^2.1.3",
386
399
  "koa": "^3.1.1",
387
400
  "koa-bodyparser": "^4.4.1",
401
+ "livekit-server-sdk": "^2.15.4",
388
402
  "mammoth": "^1.11.0",
389
403
  "mediabunny": "^1.40.1",
390
404
  "music-metadata": "^11.11.2",
@@ -449,7 +463,7 @@
449
463
  "react-dom": "^19.2.4",
450
464
  "semantic-release": "^25.0.3",
451
465
  "shell-quote": "^1.8.3",
452
- "svelte": "^5.53.6",
466
+ "svelte": "^5.55.7",
453
467
  "svelte-check": "^4.4.4",
454
468
  "ts-morph": "^24.0.0",
455
469
  "tslib": "^2.8.1",
@@ -457,7 +471,7 @@
457
471
  "typedoc": "^0.28.17",
458
472
  "typedoc-plugin-markdown": "^4.10.0",
459
473
  "typescript": "^5.9.3",
460
- "vite": "^8.0.1",
474
+ "vite": "^8.0.5",
461
475
  "vitest": "^4.1.0",
462
476
  "why-is-node-running": "^3.2.2"
463
477
  },