npm - @juspay/neurolink - Versions diffs - 9.71.0 → 9.73.0 - Mend

@juspay/neurolink 9.71.0 → 9.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +330 -312
package/dist/core/constants.d.ts +1 -0
package/dist/core/constants.js +2 -0
package/dist/core/toolRouting.d.ts +59 -0
package/dist/core/toolRouting.js +232 -0
package/dist/lib/core/constants.d.ts +1 -0
package/dist/lib/core/constants.js +2 -0
package/dist/lib/core/toolRouting.d.ts +59 -0
package/dist/lib/core/toolRouting.js +233 -0
package/dist/lib/neurolink.d.ts +31 -1
package/dist/lib/neurolink.js +188 -1
package/dist/lib/telemetry/attributes.js +3 -1
package/dist/lib/types/config.d.ts +8 -0
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/livekit.d.ts +134 -0
package/dist/lib/types/toolRouting.d.ts +91 -0
package/dist/lib/types/toolRouting.js +19 -0
package/dist/lib/voice/livekit/brain.js +1 -1
package/dist/lib/voice/livekit/config.d.ts +12 -1
package/dist/lib/voice/livekit/config.js +54 -0
package/dist/lib/voice/livekit/eventBridge.js +4 -4
package/dist/lib/voice/livekit/index.d.ts +9 -2
package/dist/lib/voice/livekit/index.js +9 -2
package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
package/dist/lib/voice/livekit/roomContext.js +57 -0
package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
package/dist/lib/voice/livekit/roomDispatch.js +31 -0
package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
package/dist/lib/voice/livekit/vertexAuth.js +73 -0
package/dist/lib/voice/livekit/voiceAgent.js +47 -37
package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
package/dist/neurolink.d.ts +31 -1
package/dist/neurolink.js +188 -1
package/dist/telemetry/attributes.js +3 -1
package/dist/types/config.d.ts +8 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/livekit.d.ts +134 -0
package/dist/types/toolRouting.d.ts +91 -0
package/dist/types/toolRouting.js +18 -0
package/dist/voice/livekit/brain.js +1 -1
package/dist/voice/livekit/config.d.ts +12 -1
package/dist/voice/livekit/config.js +54 -0
package/dist/voice/livekit/eventBridge.js +4 -4
package/dist/voice/livekit/index.d.ts +9 -2
package/dist/voice/livekit/index.js +9 -2
package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
package/dist/voice/livekit/realtimeEventBridge.js +160 -0
package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
package/dist/voice/livekit/realtimeMcpTools.js +193 -0
package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
package/dist/voice/livekit/roomContext.d.ts +23 -0
package/dist/voice/livekit/roomContext.js +56 -0
package/dist/voice/livekit/roomDispatch.d.ts +24 -0
package/dist/voice/livekit/roomDispatch.js +30 -0
package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
package/dist/voice/livekit/schemaSanitizer.js +143 -0
package/dist/voice/livekit/vertexAuth.d.ts +30 -0
package/dist/voice/livekit/vertexAuth.js +72 -0
package/dist/voice/livekit/voiceAgent.js +47 -37
package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
package/dist/voice/livekit/voiceAgentWorker.js +64 -0
package/package.json +2 -1

package/dist/voice/livekit/roomContext.js ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Per-call context from LiveKit room metadata.
+ *
+ * The manager (e.g. a Lighthouse `/start` endpoint) pre-creates the room with
+ * `base64(JSON({ authToken, mcpContext }))` metadata, built from the caller's
+ * session. The worker reads it on join — nothing per-call comes from worker env.
+ * Returns the MCP `x-auth-token` and the base64(JSON) `x-context` the server
+ * expects.
+ *
+ * The metadata is untrusted input, so it is decoded with a zod schema rather
+ * than a trusted `JSON.parse` cast.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import { Buffer } from "node:buffer";
+import { z } from "zod";
+import { logger } from "../../utils/logger.js";
+/** Shape the manager writes into room metadata. `mcpContext` is opaque here. */
+const roomMetadataSchema = z.object({
+    authToken: z.string().optional(),
+    mcpContext: z.unknown().optional(),
+});
+/** Decode the base64(JSON) metadata string into an `unknown`, or `undefined`. */
+function decodeBase64Json(encoded) {
+    try {
+        return JSON.parse(Buffer.from(encoded, "base64").toString("utf-8"));
+    }
+    catch (error) {
+        logger.error(`[RealtimeVoiceAgent] room metadata is not valid base64 JSON: ${String(error)}`);
+        return undefined;
+    }
+}
+/**
+ * Decode `{ authToken, mcpContext }` from a room's base64(JSON) metadata.
+ *
+ * `authToken` may be empty (demo/guest, where the MCP server gates on the
+ * context's `demoMode`); `xContext` is the re-encoded base64(JSON) of
+ * `mcpContext`, or `""` when no context was supplied or the metadata is invalid.
+ */
+export function readCallContextFromRoom(roomMetadata) {
+    const empty = { authToken: "", xContext: "" };
+    if (!roomMetadata) {
+        logger.warn("[RealtimeVoiceAgent] room has no metadata — MCP auth/context unavailable.");
+        return empty;
+    }
+    const decoded = roomMetadataSchema.safeParse(decodeBase64Json(roomMetadata));
+    if (!decoded.success) {
+        logger.error(`[RealtimeVoiceAgent] room metadata has unexpected shape: ${decoded.error.message}`);
+        return empty;
+    }
+    const { authToken, mcpContext } = decoded.data;
+    const xContext = mcpContext === undefined || mcpContext === null
+        ? ""
+        : Buffer.from(JSON.stringify(mcpContext), "utf-8").toString("base64");
+    return { authToken: authToken ?? "", xContext };
+}

package/dist/voice/livekit/roomDispatch.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * LiveKit server-side room operations: create a room with metadata, and
+ * dispatch a named agent to a room.
+ *
+ * Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
+ * consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
+ * — they never depend on the SDK directly. Mirrors `mintJoinToken`.
+ */
+import type { LiveKitServerCredentials } from "../../types/index.js";
+export declare function createVoiceRoom(req: LiveKitServerCredentials & {
+    room: string;
+    metadata?: string;
+    emptyTimeoutSeconds?: number;
+    departureTimeoutSeconds?: number;
+}): Promise<void>;
+/**
+ * Explicitly dispatch a named agent to a room. The long-lived worker registered
+ * under `agentName` receives the job and forks a child to run the call.
+ */
+export declare function dispatchVoiceAgent(req: LiveKitServerCredentials & {
+    room: string;
+    agentName: string;
+    metadata?: string;
+}): Promise<void>;

package/dist/voice/livekit/roomDispatch.js ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * LiveKit server-side room operations: create a room with metadata, and
+ * dispatch a named agent to a room.
+ *
+ * Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
+ * consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
+ * — they never depend on the SDK directly. Mirrors `mintJoinToken`.
+ */
+const toHttpUrl = (url) => url.replace(/^ws/, "http");
+export async function createVoiceRoom(req) {
+    const { RoomServiceClient } = await import("livekit-server-sdk");
+    const client = new RoomServiceClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
+    await client.createRoom({
+        name: req.room,
+        metadata: req.metadata ?? "",
+        emptyTimeout: req.emptyTimeoutSeconds ?? 300,
+        departureTimeout: req.departureTimeoutSeconds ?? 20,
+    });
+}
+/**
+ * Explicitly dispatch a named agent to a room. The long-lived worker registered
+ * under `agentName` receives the job and forks a child to run the call.
+ */
+export async function dispatchVoiceAgent(req) {
+    const { AgentDispatchClient } = await import("livekit-server-sdk");
+    const client = new AgentDispatchClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
+    await client.createDispatch(req.room, req.agentName, {
+        metadata: req.metadata ?? "",
+    });
+}

package/dist/voice/livekit/schemaSanitizer.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Gemini function-calling schema sanitizer.
+ *
+ * Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
+ * accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
+ * `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
+ * every node gets a concrete `type`, unions collapse to their first concrete
+ * branch, and unsupported keywords are dropped.
+ *
+ * Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
+ * narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
+ */
+/**
+ * Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
+ * with a concrete `type` on every node.
+ */
+export declare function sanitizeSchema(node: unknown): Record<string, unknown>;
+/** Tool parameters must be an object schema; force it and sanitize the tree. */
+export declare function sanitizeToolParameters(schema: unknown): Record<string, unknown>;
+/**
+ * Walk a (sanitized) schema and return the first node the google plugin would
+ * turn into `undefined` — which genai then crashes on. Returns a human-readable
+ * path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
+ * should always be `null`; if not, it names the exact offending path.
+ */
+export declare function findSchemaIssue(node: unknown, pathPrefix?: string): string | null;

package/dist/voice/livekit/schemaSanitizer.js ADDED Viewed

@@ -0,0 +1,143 @@
+/**
+ * Gemini function-calling schema sanitizer.
+ *
+ * Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
+ * accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
+ * `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
+ * every node gets a concrete `type`, unions collapse to their first concrete
+ * branch, and unsupported keywords are dropped.
+ *
+ * Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
+ * narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
+ */
+const GEMINI_TYPES = new Set([
+    "string",
+    "number",
+    "integer",
+    "boolean",
+    "array",
+    "object",
+]);
+function isRecord(value) {
+    return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+/**
+ * The first concrete (non-`"null"`-typed) branch of an `anyOf`/`oneOf`/`allOf`
+ * union, or `undefined` when there is no union to collapse.
+ */
+function firstConcreteUnionBranch(schema) {
+    const union = schema.anyOf ?? schema.oneOf ?? schema.allOf;
+    if (!Array.isArray(union)) {
+        return undefined;
+    }
+    return union.find((branch) => isRecord(branch) && branch.type !== "null");
+}
+function resolveSchemaType(schema) {
+    if (typeof schema.type === "string") {
+        return GEMINI_TYPES.has(schema.type) ? schema.type : "string";
+    }
+    if (Array.isArray(schema.type)) {
+        const named = schema.type.find((entry) => typeof entry === "string" && entry !== "null");
+        if (named !== undefined && GEMINI_TYPES.has(named)) {
+            return named;
+        }
+    }
+    if (isRecord(schema.properties)) {
+        return "object";
+    }
+    if (schema.items !== undefined) {
+        return "array";
+    }
+    return "string";
+}
+function sanitizeObjectMembers(schema, out) {
+    const properties = isRecord(schema.properties) ? schema.properties : {};
+    const sanitizedProperties = {};
+    for (const [key, value] of Object.entries(properties)) {
+        sanitizedProperties[key] = sanitizeSchema(value);
+    }
+    out.properties = sanitizedProperties;
+    if (Array.isArray(schema.required)) {
+        out.required = schema.required.filter((name) => typeof name === "string" && name in sanitizedProperties);
+    }
+    if (Object.keys(sanitizedProperties).length === 0) {
+        out.additionalProperties = true;
+    }
+}
+/**
+ * Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
+ * with a concrete `type` on every node.
+ */
+export function sanitizeSchema(node) {
+    if (!isRecord(node)) {
+        return { type: "string" };
+    }
+    const out = {};
+    if (typeof node.description === "string") {
+        out.description = node.description;
+    }
+    if (typeof node.type !== "string") {
+        const branch = firstConcreteUnionBranch(node);
+        if (branch !== undefined) {
+            const merged = sanitizeSchema(branch);
+            return out.description
+                ? { ...merged, description: out.description }
+                : merged;
+        }
+    }
+    const type = resolveSchemaType(node);
+    out.type = type;
+    if (Array.isArray(node.enum)) {
+        out.enum = node.enum;
+    }
+    if (type === "object") {
+        sanitizeObjectMembers(node, out);
+    }
+    if (type === "array") {
+        out.items = sanitizeSchema(node.items);
+    }
+    return out;
+}
+/** Tool parameters must be an object schema; force it and sanitize the tree. */
+export function sanitizeToolParameters(schema) {
+    const sanitized = sanitizeSchema(schema ?? {});
+    if (sanitized.type !== "object") {
+        return { type: "object", properties: {}, additionalProperties: true };
+    }
+    return sanitized;
+}
+/**
+ * Walk a (sanitized) schema and return the first node the google plugin would
+ * turn into `undefined` — which genai then crashes on. Returns a human-readable
+ * path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
+ * should always be `null`; if not, it names the exact offending path.
+ */
+export function findSchemaIssue(node, pathPrefix = "$") {
+    if (!isRecord(node)) {
+        return `${pathPrefix}: not an object schema`;
+    }
+    if (typeof node.type !== "string") {
+        return `${pathPrefix}: missing string "type"`;
+    }
+    if (node.type === "object") {
+        const properties = isRecord(node.properties) ? node.properties : undefined;
+        const isEmpty = properties === undefined || Object.keys(properties).length === 0;
+        if (isEmpty &&
+            (node.additionalProperties === undefined ||
+                node.additionalProperties === null)) {
+            return `${pathPrefix}: empty object schema without additionalProperties (plugin → undefined)`;
+        }
+        if (properties !== undefined) {
+            for (const [key, value] of Object.entries(properties)) {
+                const childIssue = findSchemaIssue(value, `${pathPrefix}.${key}`);
+                if (childIssue) {
+                    return childIssue;
+                }
+            }
+        }
+    }
+    if (node.type === "array") {
+        return findSchemaIssue(node.items, `${pathPrefix}[]`);
+    }
+    return null;
+}

package/dist/voice/livekit/vertexAuth.d.ts ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Vertex authentication helpers for the realtime voice agent.
+ *
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
+ * Credentials (ADC). These helpers materialise ADC from the split
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
+ * any Gemini Developer API key from the environment so `@google/genai` uses
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+/**
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
+ *
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
+ * inline credentials), so this writes a temp service-account JSON and points
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
+ * fields are absent (auth then relies on ambient ADC).
+ */
+export declare function ensureVertexAdc(): void;
+/**
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
+ *
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
+ * ever talks to Vertex, so remove these keys (only affects this process).
+ */
+export declare function clearGeminiApiKeyEnv(): void;

package/dist/voice/livekit/vertexAuth.js ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Vertex authentication helpers for the realtime voice agent.
+ *
+ * The Gemini Live WebSocket authenticates to Vertex via Application Default
+ * Credentials (ADC). These helpers materialise ADC from the split
+ * `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
+ * any Gemini Developer API key from the environment so `@google/genai` uses
+ * Vertex/ADC auth (not an API key) for the realtime WebSocket.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { logger } from "../../utils/logger.js";
+/**
+ * Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
+ *
+ * The google realtime plugin authenticates Vertex via ADC (it does not accept
+ * inline credentials), so this writes a temp service-account JSON and points
+ * `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
+ * No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
+ * fields are absent (auth then relies on ambient ADC).
+ */
+export function ensureVertexAdc() {
+    if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
+        return;
+    }
+    const clientEmail = process.env.GOOGLE_AUTH_CLIENT_EMAIL;
+    const rawPrivateKey = process.env.GOOGLE_AUTH_PRIVATE_KEY;
+    if (!clientEmail || !rawPrivateKey) {
+        logger.warn("[RealtimeVoiceAgent] No GOOGLE_APPLICATION_CREDENTIALS and no GOOGLE_AUTH_* fields — Vertex auth will rely on ambient ADC.");
+        return;
+    }
+    const credentials = {
+        type: process.env.GOOGLE_AUTH_TYPE ?? "service_account",
+        project_id: process.env.GOOGLE_AUTH_BREEZE_PROJECT_ID ??
+            process.env.GOOGLE_CLOUD_PROJECT_ID,
+        private_key_id: process.env.GOOGLE_AUTH_PRIVATE_KEY_ID,
+        private_key: rawPrivateKey.replace(/\\n/g, "\n"),
+        client_email: clientEmail,
+        token_uri: process.env.GOOGLE_AUTH_TOKEN_URI ??
+            "https://oauth2.googleapis.com/token",
+    };
+    const credentialsDir = mkdtempSync(path.join(os.tmpdir(), "vertex-adc-"));
+    const credentialsPath = path.join(credentialsDir, "adc.json");
+    writeFileSync(credentialsPath, JSON.stringify(credentials), {
+        mode: 0o600,
+        flag: "wx",
+    });
+    process.on("exit", () => {
+        rmSync(credentialsDir, { recursive: true, force: true });
+    });
+    process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
+    logger.info(`[RealtimeVoiceAgent] Vertex ADC written to ${credentialsPath} (project ${credentials.project_id}).`);
+}
+/**
+ * Force pure Vertex/ADC auth for the Gemini Live WebSocket.
+ *
+ * `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
+ * WebSocket auth even when `vertexai: true` and project/location are set, which
+ * Vertex rejects at the handshake (WS close 1006). The realtime worker only
+ * ever talks to Vertex, so remove these keys (only affects this process).
+ */
+export function clearGeminiApiKeyEnv() {
+    for (const key of ["GOOGLE_API_KEY", "GOOGLE_AI_API_KEY", "GEMINI_API_KEY"]) {
+        if (process.env[key]) {
+            delete process.env[key];
+            logger.info(`[RealtimeVoiceAgent] cleared ${key} so genai uses Vertex/ADC auth (not API key) for the Live WS.`);
+        }
+    }
+}

package/dist/voice/livekit/voiceAgent.js CHANGED Viewed

@@ -192,9 +192,20 @@ export function defineVoiceAgent(config) {
     async function entry(ctx) {
         const entryStartedAt = Date.now();
         await ctx.connect();
-        logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
-        // When the user actually stopped speaking (VAD), used to measure how long
-        // the agent waited after speech before committing the turn to the LLM.
+        logger.debug("voice.agent.roomJoined", {
+            room: ctx.room.name,
+            ms: Date.now() - entryStartedAt,
+        });
+        const { RoomEvent } = await import("@livekit/rtc-node");
+        ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
+            if (ctx.room.remoteParticipants.size === 0) {
+                logger.info("voice.agent.participantLeft", {
+                    room: ctx.room.name,
+                    action: "shutdown",
+                });
+                ctx.shutdown("participant left");
+            }
+        });
         let userStoppedSpeakingAt;
         const neurolink = await config.createNeuroLink();
         const brain = createVoiceBrain({
@@ -245,11 +256,6 @@ export function defineVoiceAgent(config) {
                 final: false,
             });
         }
-        /**
-         * Lock the user bubble at turn-end and reset the buffer for the next turn.
-         * `replacesPrevious` tells the client this committed turn absorbed a prior
-         * interrupted turn, so it should remove the orphaned previous user bubble.
-         */
         function commitUserTranscript(finalText, replacesPrevious = false) {
             if (transcriptEmitter !== undefined) {
                 transcriptEmitter.emit("voice:user-transcript", {
@@ -274,7 +280,9 @@ export function defineVoiceAgent(config) {
                 pendingPrefix = "";
                 commitUserTranscript(promptText, hadPrefix);
                 if (userStoppedSpeakingAt !== undefined) {
-                    logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
+                    logger.debug("voice.agent.endpointingWaited", {
+                        ms: Date.now() - userStoppedSpeakingAt,
+                    });
                 }
                 return brainTurnStream(brain, promptText, conversationId, () => {
                     // Interrupted before producing any reply → carry this turn's text
@@ -299,7 +307,7 @@ export function defineVoiceAgent(config) {
         };
         if (eouTurnDetector !== undefined) {
             turnHandling.turnDetection = eouTurnDetector;
-            logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
+            logger.info("voice.agent.eouEnabled", { language: "english" });
         }
         else if (config.turn?.mode) {
             turnHandling.turnDetection = config.turn.mode;
@@ -319,20 +327,11 @@ export function defineVoiceAgent(config) {
             tts,
             llm: new PlaceholderLLM(),
             turnHandling,
-            // Do NOT speculatively call the LLM on preflight transcripts before the
-            // turn ends — with NeuroLink as the brain each call is a real LLM request,
-            // and it makes the agent feel like it responds while you're still talking.
             preemptiveGeneration: false,
         });
         const agent = new NeuroLinkVoiceAgent({
             instructions: config.systemPrompt ?? "",
         });
-        // Inactivity watchdog: shut the per-call Job down after a stretch with no
-        // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
-        // runs the shutdown callbacks (disposing the bridge) and the Job process
-        // exits — freeing its RAM and the EOU model — while the browser observes a
-        // room disconnect. Reset on every interaction below. Configure via
-        // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
         const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
         const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
         let inactivityTimer;
@@ -350,7 +349,11 @@ export function defineVoiceAgent(config) {
             clearInactivityTimer();
             inactivityTimer = setTimeout(() => {
                 inactivityFired = true;
-                logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
+                logger.info("voice.agent.inactivityTimeout", {
+                    room: ctx.room.name,
+                    timeoutMs: inactivityTimeoutMs,
+                    action: "shutdown",
+                });
                 ctx.shutdown("inactivity timeout");
             }, inactivityTimeoutMs);
             // The watchdog must not, by itself, keep the event loop alive.
@@ -359,46 +362,53 @@ export function defineVoiceAgent(config) {
         ctx.addShutdownCallback(async () => {
             clearInactivityTimer();
         });
-        // Track when the user actually stops speaking (VAD) so endpointing latency
-        // can be measured, and reset the inactivity watchdog on user activity.
+        if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
+            ctx.addShutdownCallback(async () => {
+                const parentPid = process.ppid;
+                setTimeout(() => {
+                    try {
+                        if (typeof parentPid === "number" && parentPid > 1) {
+                            process.kill(parentPid, "SIGTERM");
+                        }
+                    }
+                    catch {
+                        // Parent already gone — fall through to the hard exit below.
+                    }
+                    process.exit(0);
+                }, 500).unref?.();
+            });
+        }
         session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
             noteActivity();
             if (ev.oldState === "speaking" && ev.newState !== "speaking") {
                 userStoppedSpeakingAt = Date.now();
             }
         });
-        // Reset the inactivity watchdog on any agent speech/processing and on every
-        // committed conversation item (user turn or agent reply), so the timeout
-        // only fires during a genuine lull in the conversation.
         session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
             noteActivity();
         });
         session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
             noteActivity();
         });
-        // Forward user STT transcripts to the data-channel bridge as a single
-        // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
-        // finalized SEGMENT (several per turn), so we never forward those as the
-        // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
-        // buffer and emits `final: false`. The lone `final: true` is sent from
-        // `llmNode` at the real turn boundary.
         if (transcriptEventsEnabled) {
             session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
                 emitUserTranscriptSegment(ev.transcript, ev.isFinal);
             });
         }
-        logger.info("[LiveKitVoiceAgent] Session starting", {
+        logger.info("voice.agent.sessionStarting", {
             room: ctx.room.name,
             provider,
             model,
         });
         await session.start({ agent, room: ctx.room });
-        // Start the inactivity countdown now that the session is live; every
-        // interaction handler above re-arms it.
+        if (config.greeting !== undefined && config.greeting.trim().length > 0) {
+            const greetingStream = brainTurnStream(brain, config.greeting, conversationId);
+            session.say(greetingStream, {
+                addToChatCtx: true,
+                allowInterruptions: true,
+            });
+        }
         noteActivity();
-        // Data-channel event bridge: forward NeuroLink events (text, tool calls,
-        // results, HITL prompts, status) to the browser, and accept HITL responses
-        // back. Only when enabled and the instance exposes its event emitter.
         if (config.events?.enabled === true && neurolink.getEventEmitter) {
             const bridge = await attachEventBridge({
                 room: ctx.room,

package/dist/voice/livekit/voiceAgentWorker.d.ts CHANGED Viewed

@@ -11,6 +11,7 @@
  * See docs/features/livekit-voice-agent.md.
  */
 import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
+export declare function installVoiceWorkerProcessGuards(metricsIntervalMs?: number): void;
 /**
  * Launch the LiveKit voice agent worker.
  *
@@ -25,3 +26,4 @@ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
  * ```
  */
 export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
+export declare function startRealtimeVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;

package/dist/voice/livekit/voiceAgentWorker.js CHANGED Viewed

@@ -11,8 +11,58 @@
  * See docs/features/livekit-voice-agent.md.
  */
 import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
+import { logger } from "../../utils/logger.js";
 const DEFAULT_AGENT_NAME = "neurolink-voice";
 const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
+const IS_JOB_CHILD = process.argv.some((arg) => arg.includes("job_proc"));
+const PROC_ROLE = IS_JOB_CHILD ? "job(child)" : "worker(parent)";
+let processGuardsInstalled = false;
+export function installVoiceWorkerProcessGuards(metricsIntervalMs = Number(process.env.VOICE_METRICS_INTERVAL_MS ?? 10000)) {
+    if (processGuardsInstalled) {
+        return;
+    }
+    processGuardsInstalled = true;
+    const procInfo = {
+        role: PROC_ROLE,
+        pid: process.pid,
+        ppid: process.ppid,
+    };
+    process.on("uncaughtException", (error) => {
+        logger.error("voiceWorker.uncaughtException", {
+            ...procInfo,
+            error: error?.stack ?? String(error),
+        });
+        if (IS_JOB_CHILD) {
+            setTimeout(() => process.exit(1), 1000).unref?.();
+        }
+    });
+    process.on("unhandledRejection", (reason) => {
+        logger.error("voiceWorker.unhandledRejection", {
+            ...procInfo,
+            error: reason instanceof Error ? reason.stack : String(reason),
+        });
+    });
+    for (const signal of ["SIGTERM", "SIGINT", "SIGHUP"]) {
+        process.on(signal, () => {
+            logger.warn("voiceWorker.signal", { ...procInfo, signal });
+            setTimeout(() => process.exit(0), 1500);
+        });
+    }
+    if (Number.isFinite(metricsIntervalMs) && metricsIntervalMs > 0) {
+        const mb = (bytes) => Math.round((bytes / 1024 / 1024) * 10) / 10;
+        const timer = setInterval(() => {
+            const usage = process.memoryUsage();
+            logger.debug("voiceWorker.mem", {
+                ...procInfo,
+                rssMb: mb(usage.rss),
+                heapUsedMb: mb(usage.heapUsed),
+                heapTotalMb: mb(usage.heapTotal),
+                externalMb: mb(usage.external),
+            });
+        }, metricsIntervalMs);
+        timer.unref?.();
+    }
+}
 /**
  * Register the English EOU inference runner in the worker process.
  *
@@ -55,3 +105,17 @@ export async function startVoiceAgentWorker(options) {
         apiSecret: server.apiSecret,
     }));
 }
+export async function startRealtimeVoiceAgentWorker(options) {
+    installVoiceWorkerProcessGuards();
+    if (process.env.LIVEKIT_EOU_TURN_DETECTION) {
+        delete process.env.LIVEKIT_EOU_TURN_DETECTION;
+        logger.info("realtime.worker.eouDisabled", {
+            reason: "s2s-in-model-turn-detection",
+        });
+    }
+    if (process.argv.includes("connect")) {
+        process.env.LK_REALTIME_CONNECT_MODE = "true";
+        logger.info("realtime.worker.connectMode", { enabled: true });
+    }
+    await startVoiceAgentWorker(options);
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@juspay/neurolink",
-  "version": "9.71.0",
+  "version": "9.73.0",
   "packageManager": "pnpm@10.15.1",
   "description": "Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applications with 21+ providers: OpenAI, Anthropic, Google AI Studio, Google Vertex, AWS Bedrock, Azure OpenAI, Mistral, LiteLLM, SageMaker, Hugging Face, Ollama, OpenAI-compatible, OpenRouter, DeepSeek, NVIDIA NIM, LM Studio, llama.cpp, plus voice (OpenAI TTS, ElevenLabs, Deepgram, Azure Speech).",
   "author": {
@@ -387,6 +387,7 @@
     "@livekit/agents-plugin-cartesia": "^1.4.5",
     "@livekit/agents-plugin-deepgram": "^1.4.5",
     "@livekit/agents-plugin-elevenlabs": "^1.4.5",
+    "@livekit/agents-plugin-google": "^1.4.5",
     "@livekit/agents-plugin-livekit": "^1.4.5",
     "@livekit/agents-plugin-silero": "^1.4.5",
     "@livekit/agents-plugin-soniox": "^1.4.5",