@juspay/neurolink 9.71.0 → 9.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +330 -312
  3. package/dist/core/constants.d.ts +1 -0
  4. package/dist/core/constants.js +2 -0
  5. package/dist/core/toolRouting.d.ts +59 -0
  6. package/dist/core/toolRouting.js +232 -0
  7. package/dist/lib/core/constants.d.ts +1 -0
  8. package/dist/lib/core/constants.js +2 -0
  9. package/dist/lib/core/toolRouting.d.ts +59 -0
  10. package/dist/lib/core/toolRouting.js +233 -0
  11. package/dist/lib/neurolink.d.ts +31 -1
  12. package/dist/lib/neurolink.js +188 -1
  13. package/dist/lib/telemetry/attributes.js +3 -1
  14. package/dist/lib/types/config.d.ts +8 -0
  15. package/dist/lib/types/index.d.ts +1 -0
  16. package/dist/lib/types/index.js +1 -0
  17. package/dist/lib/types/livekit.d.ts +134 -0
  18. package/dist/lib/types/toolRouting.d.ts +91 -0
  19. package/dist/lib/types/toolRouting.js +19 -0
  20. package/dist/lib/voice/livekit/brain.js +1 -1
  21. package/dist/lib/voice/livekit/config.d.ts +12 -1
  22. package/dist/lib/voice/livekit/config.js +54 -0
  23. package/dist/lib/voice/livekit/eventBridge.js +4 -4
  24. package/dist/lib/voice/livekit/index.d.ts +9 -2
  25. package/dist/lib/voice/livekit/index.js +9 -2
  26. package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
  27. package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
  28. package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
  29. package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
  30. package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
  31. package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
  32. package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
  33. package/dist/lib/voice/livekit/roomContext.js +57 -0
  34. package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
  35. package/dist/lib/voice/livekit/roomDispatch.js +31 -0
  36. package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
  37. package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
  38. package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
  39. package/dist/lib/voice/livekit/vertexAuth.js +73 -0
  40. package/dist/lib/voice/livekit/voiceAgent.js +47 -37
  41. package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
  42. package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
  43. package/dist/neurolink.d.ts +31 -1
  44. package/dist/neurolink.js +188 -1
  45. package/dist/telemetry/attributes.js +3 -1
  46. package/dist/types/config.d.ts +8 -0
  47. package/dist/types/index.d.ts +1 -0
  48. package/dist/types/index.js +1 -0
  49. package/dist/types/livekit.d.ts +134 -0
  50. package/dist/types/toolRouting.d.ts +91 -0
  51. package/dist/types/toolRouting.js +18 -0
  52. package/dist/voice/livekit/brain.js +1 -1
  53. package/dist/voice/livekit/config.d.ts +12 -1
  54. package/dist/voice/livekit/config.js +54 -0
  55. package/dist/voice/livekit/eventBridge.js +4 -4
  56. package/dist/voice/livekit/index.d.ts +9 -2
  57. package/dist/voice/livekit/index.js +9 -2
  58. package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
  59. package/dist/voice/livekit/realtimeEventBridge.js +160 -0
  60. package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
  61. package/dist/voice/livekit/realtimeMcpTools.js +193 -0
  62. package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
  63. package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
  64. package/dist/voice/livekit/roomContext.d.ts +23 -0
  65. package/dist/voice/livekit/roomContext.js +56 -0
  66. package/dist/voice/livekit/roomDispatch.d.ts +24 -0
  67. package/dist/voice/livekit/roomDispatch.js +30 -0
  68. package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
  69. package/dist/voice/livekit/schemaSanitizer.js +143 -0
  70. package/dist/voice/livekit/vertexAuth.d.ts +30 -0
  71. package/dist/voice/livekit/vertexAuth.js +72 -0
  72. package/dist/voice/livekit/voiceAgent.js +47 -37
  73. package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
  74. package/dist/voice/livekit/voiceAgentWorker.js +64 -0
  75. package/package.json +2 -1
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Per-call context from LiveKit room metadata.
3
+ *
4
+ * The manager (e.g. a Lighthouse `/start` endpoint) pre-creates the room with
5
+ * `base64(JSON({ authToken, mcpContext }))` metadata, built from the caller's
6
+ * session. The worker reads it on join — nothing per-call comes from worker env.
7
+ * Returns the MCP `x-auth-token` and the base64(JSON) `x-context` the server
8
+ * expects.
9
+ *
10
+ * The metadata is untrusted input, so it is decoded with a zod schema rather
11
+ * than a trusted `JSON.parse` cast.
12
+ *
13
+ * See docs/features/livekit-voice-agent.md.
14
+ */
15
+ import { Buffer } from "node:buffer";
16
+ import { z } from "zod";
17
+ import { logger } from "../../utils/logger.js";
18
+ /** Shape the manager writes into room metadata. `mcpContext` is opaque here. */
19
+ const roomMetadataSchema = z.object({
20
+ authToken: z.string().optional(),
21
+ mcpContext: z.unknown().optional(),
22
+ });
23
+ /** Decode the base64(JSON) metadata string into an `unknown`, or `undefined`. */
24
+ function decodeBase64Json(encoded) {
25
+ try {
26
+ return JSON.parse(Buffer.from(encoded, "base64").toString("utf-8"));
27
+ }
28
+ catch (error) {
29
+ logger.error(`[RealtimeVoiceAgent] room metadata is not valid base64 JSON: ${String(error)}`);
30
+ return undefined;
31
+ }
32
+ }
33
+ /**
34
+ * Decode `{ authToken, mcpContext }` from a room's base64(JSON) metadata.
35
+ *
36
+ * `authToken` may be empty (demo/guest, where the MCP server gates on the
37
+ * context's `demoMode`); `xContext` is the re-encoded base64(JSON) of
38
+ * `mcpContext`, or `""` when no context was supplied or the metadata is invalid.
39
+ */
40
+ export function readCallContextFromRoom(roomMetadata) {
41
+ const empty = { authToken: "", xContext: "" };
42
+ if (!roomMetadata) {
43
+ logger.warn("[RealtimeVoiceAgent] room has no metadata — MCP auth/context unavailable.");
44
+ return empty;
45
+ }
46
+ const decoded = roomMetadataSchema.safeParse(decodeBase64Json(roomMetadata));
47
+ if (!decoded.success) {
48
+ logger.error(`[RealtimeVoiceAgent] room metadata has unexpected shape: ${decoded.error.message}`);
49
+ return empty;
50
+ }
51
+ const { authToken, mcpContext } = decoded.data;
52
+ const xContext = mcpContext === undefined || mcpContext === null
53
+ ? ""
54
+ : Buffer.from(JSON.stringify(mcpContext), "utf-8").toString("base64");
55
+ return { authToken: authToken ?? "", xContext };
56
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * LiveKit server-side room operations: create a room with metadata, and
3
+ * dispatch a named agent to a room.
4
+ *
5
+ * Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
6
+ * consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
7
+ * — they never depend on the SDK directly. Mirrors `mintJoinToken`.
8
+ */
9
+ import type { LiveKitServerCredentials } from "../../types/index.js";
10
+ export declare function createVoiceRoom(req: LiveKitServerCredentials & {
11
+ room: string;
12
+ metadata?: string;
13
+ emptyTimeoutSeconds?: number;
14
+ departureTimeoutSeconds?: number;
15
+ }): Promise<void>;
16
+ /**
17
+ * Explicitly dispatch a named agent to a room. The long-lived worker registered
18
+ * under `agentName` receives the job and forks a child to run the call.
19
+ */
20
+ export declare function dispatchVoiceAgent(req: LiveKitServerCredentials & {
21
+ room: string;
22
+ agentName: string;
23
+ metadata?: string;
24
+ }): Promise<void>;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * LiveKit server-side room operations: create a room with metadata, and
3
+ * dispatch a named agent to a room.
4
+ *
5
+ * Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
6
+ * consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
7
+ * — they never depend on the SDK directly. Mirrors `mintJoinToken`.
8
+ */
9
+ const toHttpUrl = (url) => url.replace(/^ws/, "http");
10
+ export async function createVoiceRoom(req) {
11
+ const { RoomServiceClient } = await import("livekit-server-sdk");
12
+ const client = new RoomServiceClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
13
+ await client.createRoom({
14
+ name: req.room,
15
+ metadata: req.metadata ?? "",
16
+ emptyTimeout: req.emptyTimeoutSeconds ?? 300,
17
+ departureTimeout: req.departureTimeoutSeconds ?? 20,
18
+ });
19
+ }
20
+ /**
21
+ * Explicitly dispatch a named agent to a room. The long-lived worker registered
22
+ * under `agentName` receives the job and forks a child to run the call.
23
+ */
24
+ export async function dispatchVoiceAgent(req) {
25
+ const { AgentDispatchClient } = await import("livekit-server-sdk");
26
+ const client = new AgentDispatchClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
27
+ await client.createDispatch(req.room, req.agentName, {
28
+ metadata: req.metadata ?? "",
29
+ });
30
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Gemini function-calling schema sanitizer.
3
+ *
4
+ * Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
5
+ * accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
6
+ * `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
7
+ * every node gets a concrete `type`, unions collapse to their first concrete
8
+ * branch, and unsupported keywords are dropped.
9
+ *
10
+ * Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
11
+ * narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
12
+ */
13
+ /**
14
+ * Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
15
+ * with a concrete `type` on every node.
16
+ */
17
+ export declare function sanitizeSchema(node: unknown): Record<string, unknown>;
18
+ /** Tool parameters must be an object schema; force it and sanitize the tree. */
19
+ export declare function sanitizeToolParameters(schema: unknown): Record<string, unknown>;
20
+ /**
21
+ * Walk a (sanitized) schema and return the first node the google plugin would
22
+ * turn into `undefined` — which genai then crashes on. Returns a human-readable
23
+ * path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
24
+ * should always be `null`; if not, it names the exact offending path.
25
+ */
26
+ export declare function findSchemaIssue(node: unknown, pathPrefix?: string): string | null;
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Gemini function-calling schema sanitizer.
3
+ *
4
+ * Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
5
+ * accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
6
+ * `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
7
+ * every node gets a concrete `type`, unions collapse to their first concrete
8
+ * branch, and unsupported keywords are dropped.
9
+ *
10
+ * Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
11
+ * narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
12
+ */
13
+ const GEMINI_TYPES = new Set([
14
+ "string",
15
+ "number",
16
+ "integer",
17
+ "boolean",
18
+ "array",
19
+ "object",
20
+ ]);
21
+ function isRecord(value) {
22
+ return typeof value === "object" && value !== null && !Array.isArray(value);
23
+ }
24
+ /**
25
+ * The first concrete (non-`"null"`-typed) branch of an `anyOf`/`oneOf`/`allOf`
26
+ * union, or `undefined` when there is no union to collapse.
27
+ */
28
+ function firstConcreteUnionBranch(schema) {
29
+ const union = schema.anyOf ?? schema.oneOf ?? schema.allOf;
30
+ if (!Array.isArray(union)) {
31
+ return undefined;
32
+ }
33
+ return union.find((branch) => isRecord(branch) && branch.type !== "null");
34
+ }
35
+ function resolveSchemaType(schema) {
36
+ if (typeof schema.type === "string") {
37
+ return GEMINI_TYPES.has(schema.type) ? schema.type : "string";
38
+ }
39
+ if (Array.isArray(schema.type)) {
40
+ const named = schema.type.find((entry) => typeof entry === "string" && entry !== "null");
41
+ if (named !== undefined && GEMINI_TYPES.has(named)) {
42
+ return named;
43
+ }
44
+ }
45
+ if (isRecord(schema.properties)) {
46
+ return "object";
47
+ }
48
+ if (schema.items !== undefined) {
49
+ return "array";
50
+ }
51
+ return "string";
52
+ }
53
+ function sanitizeObjectMembers(schema, out) {
54
+ const properties = isRecord(schema.properties) ? schema.properties : {};
55
+ const sanitizedProperties = {};
56
+ for (const [key, value] of Object.entries(properties)) {
57
+ sanitizedProperties[key] = sanitizeSchema(value);
58
+ }
59
+ out.properties = sanitizedProperties;
60
+ if (Array.isArray(schema.required)) {
61
+ out.required = schema.required.filter((name) => typeof name === "string" && name in sanitizedProperties);
62
+ }
63
+ if (Object.keys(sanitizedProperties).length === 0) {
64
+ out.additionalProperties = true;
65
+ }
66
+ }
67
+ /**
68
+ * Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
69
+ * with a concrete `type` on every node.
70
+ */
71
+ export function sanitizeSchema(node) {
72
+ if (!isRecord(node)) {
73
+ return { type: "string" };
74
+ }
75
+ const out = {};
76
+ if (typeof node.description === "string") {
77
+ out.description = node.description;
78
+ }
79
+ if (typeof node.type !== "string") {
80
+ const branch = firstConcreteUnionBranch(node);
81
+ if (branch !== undefined) {
82
+ const merged = sanitizeSchema(branch);
83
+ return out.description
84
+ ? { ...merged, description: out.description }
85
+ : merged;
86
+ }
87
+ }
88
+ const type = resolveSchemaType(node);
89
+ out.type = type;
90
+ if (Array.isArray(node.enum)) {
91
+ out.enum = node.enum;
92
+ }
93
+ if (type === "object") {
94
+ sanitizeObjectMembers(node, out);
95
+ }
96
+ if (type === "array") {
97
+ out.items = sanitizeSchema(node.items);
98
+ }
99
+ return out;
100
+ }
101
+ /** Tool parameters must be an object schema; force it and sanitize the tree. */
102
+ export function sanitizeToolParameters(schema) {
103
+ const sanitized = sanitizeSchema(schema ?? {});
104
+ if (sanitized.type !== "object") {
105
+ return { type: "object", properties: {}, additionalProperties: true };
106
+ }
107
+ return sanitized;
108
+ }
109
+ /**
110
+ * Walk a (sanitized) schema and return the first node the google plugin would
111
+ * turn into `undefined` — which genai then crashes on. Returns a human-readable
112
+ * path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
113
+ * should always be `null`; if not, it names the exact offending path.
114
+ */
115
+ export function findSchemaIssue(node, pathPrefix = "$") {
116
+ if (!isRecord(node)) {
117
+ return `${pathPrefix}: not an object schema`;
118
+ }
119
+ if (typeof node.type !== "string") {
120
+ return `${pathPrefix}: missing string "type"`;
121
+ }
122
+ if (node.type === "object") {
123
+ const properties = isRecord(node.properties) ? node.properties : undefined;
124
+ const isEmpty = properties === undefined || Object.keys(properties).length === 0;
125
+ if (isEmpty &&
126
+ (node.additionalProperties === undefined ||
127
+ node.additionalProperties === null)) {
128
+ return `${pathPrefix}: empty object schema without additionalProperties (plugin → undefined)`;
129
+ }
130
+ if (properties !== undefined) {
131
+ for (const [key, value] of Object.entries(properties)) {
132
+ const childIssue = findSchemaIssue(value, `${pathPrefix}.${key}`);
133
+ if (childIssue) {
134
+ return childIssue;
135
+ }
136
+ }
137
+ }
138
+ }
139
+ if (node.type === "array") {
140
+ return findSchemaIssue(node.items, `${pathPrefix}[]`);
141
+ }
142
+ return null;
143
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Vertex authentication helpers for the realtime voice agent.
3
+ *
4
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
5
+ * Credentials (ADC). These helpers materialise ADC from the split
6
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
7
+ * any Gemini Developer API key from the environment so `@google/genai` uses
8
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
9
+ *
10
+ * See docs/features/livekit-voice-agent.md.
11
+ */
12
+ /**
13
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
14
+ *
15
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
16
+ * inline credentials), so this writes a temp service-account JSON and points
17
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
18
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
19
+ * fields are absent (auth then relies on ambient ADC).
20
+ */
21
+ export declare function ensureVertexAdc(): void;
22
+ /**
23
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
24
+ *
25
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
26
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
27
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
28
+ * ever talks to Vertex, so remove these keys (only affects this process).
29
+ */
30
+ export declare function clearGeminiApiKeyEnv(): void;
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Vertex authentication helpers for the realtime voice agent.
3
+ *
4
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
5
+ * Credentials (ADC). These helpers materialise ADC from the split
6
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
7
+ * any Gemini Developer API key from the environment so `@google/genai` uses
8
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
9
+ *
10
+ * See docs/features/livekit-voice-agent.md.
11
+ */
12
+ import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
13
+ import os from "node:os";
14
+ import path from "node:path";
15
+ import { logger } from "../../utils/logger.js";
16
+ /**
17
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
18
+ *
19
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
20
+ * inline credentials), so this writes a temp service-account JSON and points
21
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
22
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
23
+ * fields are absent (auth then relies on ambient ADC).
24
+ */
25
+ export function ensureVertexAdc() {
26
+ if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
27
+ return;
28
+ }
29
+ const clientEmail = process.env.GOOGLE_AUTH_CLIENT_EMAIL;
30
+ const rawPrivateKey = process.env.GOOGLE_AUTH_PRIVATE_KEY;
31
+ if (!clientEmail || !rawPrivateKey) {
32
+ logger.warn("[RealtimeVoiceAgent] No GOOGLE_APPLICATION_CREDENTIALS and no GOOGLE_AUTH_* fields — Vertex auth will rely on ambient ADC.");
33
+ return;
34
+ }
35
+ const credentials = {
36
+ type: process.env.GOOGLE_AUTH_TYPE ?? "service_account",
37
+ project_id: process.env.GOOGLE_AUTH_BREEZE_PROJECT_ID ??
38
+ process.env.GOOGLE_CLOUD_PROJECT_ID,
39
+ private_key_id: process.env.GOOGLE_AUTH_PRIVATE_KEY_ID,
40
+ private_key: rawPrivateKey.replace(/\\n/g, "\n"),
41
+ client_email: clientEmail,
42
+ token_uri: process.env.GOOGLE_AUTH_TOKEN_URI ??
43
+ "https://oauth2.googleapis.com/token",
44
+ };
45
+ const credentialsDir = mkdtempSync(path.join(os.tmpdir(), "vertex-adc-"));
46
+ const credentialsPath = path.join(credentialsDir, "adc.json");
47
+ writeFileSync(credentialsPath, JSON.stringify(credentials), {
48
+ mode: 0o600,
49
+ flag: "wx",
50
+ });
51
+ process.on("exit", () => {
52
+ rmSync(credentialsDir, { recursive: true, force: true });
53
+ });
54
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
55
+ logger.info(`[RealtimeVoiceAgent] Vertex ADC written to ${credentialsPath} (project ${credentials.project_id}).`);
56
+ }
57
+ /**
58
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
59
+ *
60
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
61
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
62
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
63
+ * ever talks to Vertex, so remove these keys (only affects this process).
64
+ */
65
+ export function clearGeminiApiKeyEnv() {
66
+ for (const key of ["GOOGLE_API_KEY", "GOOGLE_AI_API_KEY", "GEMINI_API_KEY"]) {
67
+ if (process.env[key]) {
68
+ delete process.env[key];
69
+ logger.info(`[RealtimeVoiceAgent] cleared ${key} so genai uses Vertex/ADC auth (not API key) for the Live WS.`);
70
+ }
71
+ }
72
+ }
@@ -192,9 +192,20 @@ export function defineVoiceAgent(config) {
192
192
  async function entry(ctx) {
193
193
  const entryStartedAt = Date.now();
194
194
  await ctx.connect();
195
- logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
196
- // When the user actually stopped speaking (VAD), used to measure how long
197
- // the agent waited after speech before committing the turn to the LLM.
195
+ logger.debug("voice.agent.roomJoined", {
196
+ room: ctx.room.name,
197
+ ms: Date.now() - entryStartedAt,
198
+ });
199
+ const { RoomEvent } = await import("@livekit/rtc-node");
200
+ ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
201
+ if (ctx.room.remoteParticipants.size === 0) {
202
+ logger.info("voice.agent.participantLeft", {
203
+ room: ctx.room.name,
204
+ action: "shutdown",
205
+ });
206
+ ctx.shutdown("participant left");
207
+ }
208
+ });
198
209
  let userStoppedSpeakingAt;
199
210
  const neurolink = await config.createNeuroLink();
200
211
  const brain = createVoiceBrain({
@@ -245,11 +256,6 @@ export function defineVoiceAgent(config) {
245
256
  final: false,
246
257
  });
247
258
  }
248
- /**
249
- * Lock the user bubble at turn-end and reset the buffer for the next turn.
250
- * `replacesPrevious` tells the client this committed turn absorbed a prior
251
- * interrupted turn, so it should remove the orphaned previous user bubble.
252
- */
253
259
  function commitUserTranscript(finalText, replacesPrevious = false) {
254
260
  if (transcriptEmitter !== undefined) {
255
261
  transcriptEmitter.emit("voice:user-transcript", {
@@ -274,7 +280,9 @@ export function defineVoiceAgent(config) {
274
280
  pendingPrefix = "";
275
281
  commitUserTranscript(promptText, hadPrefix);
276
282
  if (userStoppedSpeakingAt !== undefined) {
277
- logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
283
+ logger.debug("voice.agent.endpointingWaited", {
284
+ ms: Date.now() - userStoppedSpeakingAt,
285
+ });
278
286
  }
279
287
  return brainTurnStream(brain, promptText, conversationId, () => {
280
288
  // Interrupted before producing any reply → carry this turn's text
@@ -299,7 +307,7 @@ export function defineVoiceAgent(config) {
299
307
  };
300
308
  if (eouTurnDetector !== undefined) {
301
309
  turnHandling.turnDetection = eouTurnDetector;
302
- logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
310
+ logger.info("voice.agent.eouEnabled", { language: "english" });
303
311
  }
304
312
  else if (config.turn?.mode) {
305
313
  turnHandling.turnDetection = config.turn.mode;
@@ -319,20 +327,11 @@ export function defineVoiceAgent(config) {
319
327
  tts,
320
328
  llm: new PlaceholderLLM(),
321
329
  turnHandling,
322
- // Do NOT speculatively call the LLM on preflight transcripts before the
323
- // turn ends — with NeuroLink as the brain each call is a real LLM request,
324
- // and it makes the agent feel like it responds while you're still talking.
325
330
  preemptiveGeneration: false,
326
331
  });
327
332
  const agent = new NeuroLinkVoiceAgent({
328
333
  instructions: config.systemPrompt ?? "",
329
334
  });
330
- // Inactivity watchdog: shut the per-call Job down after a stretch with no
331
- // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
332
- // runs the shutdown callbacks (disposing the bridge) and the Job process
333
- // exits — freeing its RAM and the EOU model — while the browser observes a
334
- // room disconnect. Reset on every interaction below. Configure via
335
- // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
336
335
  const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
337
336
  const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
338
337
  let inactivityTimer;
@@ -350,7 +349,11 @@ export function defineVoiceAgent(config) {
350
349
  clearInactivityTimer();
351
350
  inactivityTimer = setTimeout(() => {
352
351
  inactivityFired = true;
353
- logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
352
+ logger.info("voice.agent.inactivityTimeout", {
353
+ room: ctx.room.name,
354
+ timeoutMs: inactivityTimeoutMs,
355
+ action: "shutdown",
356
+ });
354
357
  ctx.shutdown("inactivity timeout");
355
358
  }, inactivityTimeoutMs);
356
359
  // The watchdog must not, by itself, keep the event loop alive.
@@ -359,46 +362,53 @@ export function defineVoiceAgent(config) {
359
362
  ctx.addShutdownCallback(async () => {
360
363
  clearInactivityTimer();
361
364
  });
362
- // Track when the user actually stops speaking (VAD) so endpointing latency
363
- // can be measured, and reset the inactivity watchdog on user activity.
365
+ if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
366
+ ctx.addShutdownCallback(async () => {
367
+ const parentPid = process.ppid;
368
+ setTimeout(() => {
369
+ try {
370
+ if (typeof parentPid === "number" && parentPid > 1) {
371
+ process.kill(parentPid, "SIGTERM");
372
+ }
373
+ }
374
+ catch {
375
+ // Parent already gone — fall through to the hard exit below.
376
+ }
377
+ process.exit(0);
378
+ }, 500).unref?.();
379
+ });
380
+ }
364
381
  session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
365
382
  noteActivity();
366
383
  if (ev.oldState === "speaking" && ev.newState !== "speaking") {
367
384
  userStoppedSpeakingAt = Date.now();
368
385
  }
369
386
  });
370
- // Reset the inactivity watchdog on any agent speech/processing and on every
371
- // committed conversation item (user turn or agent reply), so the timeout
372
- // only fires during a genuine lull in the conversation.
373
387
  session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
374
388
  noteActivity();
375
389
  });
376
390
  session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
377
391
  noteActivity();
378
392
  });
379
- // Forward user STT transcripts to the data-channel bridge as a single
380
- // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
381
- // finalized SEGMENT (several per turn), so we never forward those as the
382
- // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
383
- // buffer and emits `final: false`. The lone `final: true` is sent from
384
- // `llmNode` at the real turn boundary.
385
393
  if (transcriptEventsEnabled) {
386
394
  session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
387
395
  emitUserTranscriptSegment(ev.transcript, ev.isFinal);
388
396
  });
389
397
  }
390
- logger.info("[LiveKitVoiceAgent] Session starting", {
398
+ logger.info("voice.agent.sessionStarting", {
391
399
  room: ctx.room.name,
392
400
  provider,
393
401
  model,
394
402
  });
395
403
  await session.start({ agent, room: ctx.room });
396
- // Start the inactivity countdown now that the session is live; every
397
- // interaction handler above re-arms it.
404
+ if (config.greeting !== undefined && config.greeting.trim().length > 0) {
405
+ const greetingStream = brainTurnStream(brain, config.greeting, conversationId);
406
+ session.say(greetingStream, {
407
+ addToChatCtx: true,
408
+ allowInterruptions: true,
409
+ });
410
+ }
398
411
  noteActivity();
399
- // Data-channel event bridge: forward NeuroLink events (text, tool calls,
400
- // results, HITL prompts, status) to the browser, and accept HITL responses
401
- // back. Only when enabled and the instance exposes its event emitter.
402
412
  if (config.events?.enabled === true && neurolink.getEventEmitter) {
403
413
  const bridge = await attachEventBridge({
404
414
  room: ctx.room,
@@ -11,6 +11,7 @@
11
11
  * See docs/features/livekit-voice-agent.md.
12
12
  */
13
13
  import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
14
+ export declare function installVoiceWorkerProcessGuards(metricsIntervalMs?: number): void;
14
15
  /**
15
16
  * Launch the LiveKit voice agent worker.
16
17
  *
@@ -25,3 +26,4 @@ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
25
26
  * ```
26
27
  */
27
28
  export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
29
+ export declare function startRealtimeVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
@@ -11,8 +11,58 @@
11
11
  * See docs/features/livekit-voice-agent.md.
12
12
  */
13
13
  import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
14
+ import { logger } from "../../utils/logger.js";
14
15
  const DEFAULT_AGENT_NAME = "neurolink-voice";
15
16
  const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
17
+ const IS_JOB_CHILD = process.argv.some((arg) => arg.includes("job_proc"));
18
+ const PROC_ROLE = IS_JOB_CHILD ? "job(child)" : "worker(parent)";
19
+ let processGuardsInstalled = false;
20
+ export function installVoiceWorkerProcessGuards(metricsIntervalMs = Number(process.env.VOICE_METRICS_INTERVAL_MS ?? 10000)) {
21
+ if (processGuardsInstalled) {
22
+ return;
23
+ }
24
+ processGuardsInstalled = true;
25
+ const procInfo = {
26
+ role: PROC_ROLE,
27
+ pid: process.pid,
28
+ ppid: process.ppid,
29
+ };
30
+ process.on("uncaughtException", (error) => {
31
+ logger.error("voiceWorker.uncaughtException", {
32
+ ...procInfo,
33
+ error: error?.stack ?? String(error),
34
+ });
35
+ if (IS_JOB_CHILD) {
36
+ setTimeout(() => process.exit(1), 1000).unref?.();
37
+ }
38
+ });
39
+ process.on("unhandledRejection", (reason) => {
40
+ logger.error("voiceWorker.unhandledRejection", {
41
+ ...procInfo,
42
+ error: reason instanceof Error ? reason.stack : String(reason),
43
+ });
44
+ });
45
+ for (const signal of ["SIGTERM", "SIGINT", "SIGHUP"]) {
46
+ process.on(signal, () => {
47
+ logger.warn("voiceWorker.signal", { ...procInfo, signal });
48
+ setTimeout(() => process.exit(0), 1500);
49
+ });
50
+ }
51
+ if (Number.isFinite(metricsIntervalMs) && metricsIntervalMs > 0) {
52
+ const mb = (bytes) => Math.round((bytes / 1024 / 1024) * 10) / 10;
53
+ const timer = setInterval(() => {
54
+ const usage = process.memoryUsage();
55
+ logger.debug("voiceWorker.mem", {
56
+ ...procInfo,
57
+ rssMb: mb(usage.rss),
58
+ heapUsedMb: mb(usage.heapUsed),
59
+ heapTotalMb: mb(usage.heapTotal),
60
+ externalMb: mb(usage.external),
61
+ });
62
+ }, metricsIntervalMs);
63
+ timer.unref?.();
64
+ }
65
+ }
16
66
  /**
17
67
  * Register the English EOU inference runner in the worker process.
18
68
  *
@@ -55,3 +105,17 @@ export async function startVoiceAgentWorker(options) {
55
105
  apiSecret: server.apiSecret,
56
106
  }));
57
107
  }
108
+ export async function startRealtimeVoiceAgentWorker(options) {
109
+ installVoiceWorkerProcessGuards();
110
+ if (process.env.LIVEKIT_EOU_TURN_DETECTION) {
111
+ delete process.env.LIVEKIT_EOU_TURN_DETECTION;
112
+ logger.info("realtime.worker.eouDisabled", {
113
+ reason: "s2s-in-model-turn-detection",
114
+ });
115
+ }
116
+ if (process.argv.includes("connect")) {
117
+ process.env.LK_REALTIME_CONNECT_MODE = "true";
118
+ logger.info("realtime.worker.connectMode", { enabled: true });
119
+ }
120
+ await startVoiceAgentWorker(options);
121
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.71.0",
3
+ "version": "9.73.0",
4
4
  "packageManager": "pnpm@10.15.1",
5
5
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applications with 21+ providers: OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, Azure OpenAI, Mistral, LiteLLM, SageMaker, Hugging Face, Ollama, OpenAI-compatible, OpenRouter, DeepSeek, NVIDIA NIM, LM Studio, llama.cpp, plus voice (OpenAI TTS, ElevenLabs, Deepgram, Azure Speech).",
6
6
  "author": {
@@ -387,6 +387,7 @@
387
387
  "@livekit/agents-plugin-cartesia": "^1.4.5",
388
388
  "@livekit/agents-plugin-deepgram": "^1.4.5",
389
389
  "@livekit/agents-plugin-elevenlabs": "^1.4.5",
390
+ "@livekit/agents-plugin-google": "^1.4.5",
390
391
  "@livekit/agents-plugin-livekit": "^1.4.5",
391
392
  "@livekit/agents-plugin-silero": "^1.4.5",
392
393
  "@livekit/agents-plugin-soniox": "^1.4.5",