npm - @juspay/neurolink - Versions diffs - 9.69.3 → 9.70.1 - Mend

@juspay/neurolink 9.69.3 → 9.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +355 -347
package/dist/core/modules/GenerationHandler.js +75 -23
package/dist/core/modules/structuredOutputPolicy.d.ts +28 -0
package/dist/core/modules/structuredOutputPolicy.js +50 -0
package/dist/lib/core/modules/GenerationHandler.js +75 -23
package/dist/lib/core/modules/structuredOutputPolicy.d.ts +28 -0
package/dist/lib/core/modules/structuredOutputPolicy.js +51 -0
package/dist/lib/neurolink.js +58 -0
package/dist/lib/providers/anthropic.js +34 -7
package/dist/lib/providers/googleVertex.js +17 -2
package/dist/lib/types/generate.d.ts +47 -19
package/dist/lib/types/index.d.ts +1 -0
package/dist/lib/types/index.js +1 -0
package/dist/lib/types/livekit.d.ts +369 -0
package/dist/lib/types/livekit.js +13 -0
package/dist/lib/types/utilities.d.ts +16 -0
package/dist/lib/utils/json/coerce.d.ts +10 -0
package/dist/lib/utils/json/coerce.js +141 -0
package/dist/lib/utils/json/extract.d.ts +10 -0
package/dist/lib/utils/json/extract.js +61 -11
package/dist/lib/utils/tokenLimits.d.ts +20 -0
package/dist/lib/utils/tokenLimits.js +55 -0
package/dist/lib/voice/livekit/brain.d.ts +21 -0
package/dist/lib/voice/livekit/brain.js +75 -0
package/dist/lib/voice/livekit/config.d.ts +41 -0
package/dist/lib/voice/livekit/config.js +80 -0
package/dist/lib/voice/livekit/eventBridge.d.ts +27 -0
package/dist/lib/voice/livekit/eventBridge.js +360 -0
package/dist/lib/voice/livekit/index.d.ts +15 -0
package/dist/lib/voice/livekit/index.js +16 -0
package/dist/lib/voice/livekit/tokens.d.ts +19 -0
package/dist/lib/voice/livekit/tokens.js +51 -0
package/dist/lib/voice/livekit/voiceAgent.d.ts +32 -0
package/dist/lib/voice/livekit/voiceAgent.js +415 -0
package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +27 -0
package/dist/lib/voice/livekit/voiceAgentWorker.js +58 -0
package/dist/neurolink.js +58 -0
package/dist/providers/anthropic.js +34 -7
package/dist/providers/googleVertex.js +17 -2
package/dist/types/generate.d.ts +47 -19
package/dist/types/index.d.ts +1 -0
package/dist/types/index.js +1 -0
package/dist/types/livekit.d.ts +369 -0
package/dist/types/livekit.js +12 -0
package/dist/types/utilities.d.ts +16 -0
package/dist/utils/json/coerce.d.ts +10 -0
package/dist/utils/json/coerce.js +140 -0
package/dist/utils/json/extract.d.ts +10 -0
package/dist/utils/json/extract.js +61 -11
package/dist/utils/tokenLimits.d.ts +20 -0
package/dist/utils/tokenLimits.js +55 -0
package/dist/voice/livekit/brain.d.ts +21 -0
package/dist/voice/livekit/brain.js +74 -0
package/dist/voice/livekit/config.d.ts +41 -0
package/dist/voice/livekit/config.js +79 -0
package/dist/voice/livekit/eventBridge.d.ts +27 -0
package/dist/voice/livekit/eventBridge.js +359 -0
package/dist/voice/livekit/index.d.ts +15 -0
package/dist/voice/livekit/index.js +15 -0
package/dist/voice/livekit/tokens.d.ts +19 -0
package/dist/voice/livekit/tokens.js +50 -0
package/dist/voice/livekit/voiceAgent.d.ts +32 -0
package/dist/voice/livekit/voiceAgent.js +414 -0
package/dist/voice/livekit/voiceAgentWorker.d.ts +27 -0
package/dist/voice/livekit/voiceAgentWorker.js +57 -0
package/package.json +23 -6

package/dist/lib/voice/livekit/voiceAgent.js ADDED Viewed

@@ -0,0 +1,415 @@
+/**
+ * LiveKit Agents agent definition.
+ *
+ * `defineVoiceAgent` returns the agent object placed as the default export of a
+ * worker entry file. The framework runs it as a Job (one per call, in its own
+ * process): it connects to the room, builds the NeuroLink brain via the
+ * supplied factory, wires Silero VAD + STT/TTS plugins, and overrides `llmNode`
+ * so every turn is generated by `neurolink.stream()`.
+ *
+ * `@livekit/agents` and the plugins are optional dependencies, imported
+ * dynamically so the core package does not require them unless the LiveKit
+ * voice agent is used. Type-only imports are erased at build time and add no
+ * runtime dependency.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import { ReadableStream } from "node:stream/web";
+import { logger } from "../../utils/logger.js";
+import { createVoiceBrain } from "./brain.js";
+import { resolveBrainDefaults, resolveEouTurnDetection } from "./config.js";
+import { attachEventBridge } from "./eventBridge.js";
+const DEFAULT_CONVERSATION_PREFIX = "voice";
+// Turn-end timing defaults (approach A: silence tuning). Longer silence +
+// endpointing floor so natural mid-sentence pauses ("...and so, [pause] um")
+// don't end the turn early and split one utterance into two. Overridable via
+// config (vad.minSilenceDuration / turn.minEndpointingDelay).
+const DEFAULT_MIN_SILENCE_DURATION = 1.0; // seconds (Silero VAD)
+const DEFAULT_MIN_ENDPOINTING_DELAY = 1000; // ms (framework endpointing floor)
+/**
+ * Find the most recent user utterance in a chat context.
+ * Uses the `type === "message"` discriminant — no type assertions.
+ */
+function latestUserText(chatCtx) {
+    const items = chatCtx.items;
+    for (let i = items.length - 1; i >= 0; i -= 1) {
+        const item = items[i];
+        if (item.type === "message" && item.role === "user") {
+            return item.textContent;
+        }
+    }
+    return undefined;
+}
+/**
+ * Build a text stream for a single turn from the brain, abortable on cancel.
+ * When the framework cancels the stream (barge-in), the brain's turn is aborted.
+ */
+function brainTurnStream(brain, transcript, conversationId, onAbortedBeforeOutput) {
+    const controller = new AbortController();
+    const generator = brain.streamReply({
+        transcript,
+        conversationId,
+        signal: controller.signal,
+    });
+    const iterator = generator[Symbol.asyncIterator]();
+    let producedOutput = false;
+    return new ReadableStream({
+        async pull(streamController) {
+            const next = await iterator.next();
+            if (next.done === true) {
+                streamController.close();
+                return;
+            }
+            producedOutput = true;
+            streamController.enqueue(next.value);
+        },
+        cancel() {
+            controller.abort();
+            if (!producedOutput) {
+                onAbortedBeforeOutput?.();
+            }
+        },
+    });
+}
+/**
+ * Construct the English semantic EOU turn detector, or `undefined` if disabled.
+ *
+ * Layered on top of VAD: VAD detects acoustic silence, then this model decides
+ * whether the user's turn is semantically complete, so natural mid-sentence
+ * pauses don't split one utterance. Opt-in via `LIVEKIT_EOU_TURN_DETECTION`.
+ * The runner is registered in the worker process (see `voiceAgentWorker.ts`);
+ * here we only construct the model handle, which dispatches inference to the
+ * shared executor via the running job context.
+ */
+async function loadEouTurnDetector() {
+    const { enabled, unlikelyThreshold } = resolveEouTurnDetection();
+    if (!enabled) {
+        return undefined;
+    }
+    const { turnDetector } = await import("@livekit/agents-plugin-livekit");
+    return new turnDetector.EnglishModel(unlikelyThreshold);
+}
+/**
+ * Construct the Silero VAD instance for the session.
+ *
+ * Stricter-than-default thresholds so background noise isn't treated as speech
+ * (a higher activation threshold and a minimum speech duration reject short,
+ * quiet noise bursts).
+ */
+async function loadVad(config) {
+    const silero = await import("@livekit/agents-plugin-silero");
+    return silero.VAD.load({
+        activationThreshold: config?.activationThreshold ?? 0.6,
+        minSpeechDuration: config?.minSpeechDuration ?? 0.2,
+        minSilenceDuration: config?.minSilenceDuration ?? DEFAULT_MIN_SILENCE_DURATION,
+    });
+}
+/**
+ * Construct the STT plugin instance from configuration.
+ *
+ * Only defined options are passed — passing `undefined` would override the
+ * plugin's own defaults (e.g. its default model) with `undefined` and break it.
+ */
+async function buildStt(config) {
+    if (config.provider === "soniox") {
+        const soniox = await import("@livekit/agents-plugin-soniox");
+        const opts = {};
+        if (config.model !== undefined) {
+            opts.model = config.model;
+        }
+        if (config.language !== undefined) {
+            // Soft hint only: Soniox biases toward this language but can still
+            // auto-detect another (e.g. the user switching to Telugu mid-call).
+            // Do NOT set `languageHintsStrict` — forcing the hinted language makes
+            // the realtime STT stall/error on other-language audio and the session
+            // never recovers (no further transcripts, so no audio at all).
+            opts.languageHints = [config.language];
+        }
+        if (config.maxEndpointDelayMs !== undefined) {
+            opts.maxEndpointDelayMs = config.maxEndpointDelayMs;
+        }
+        return new soniox.STT(opts);
+    }
+    if (config.provider === "deepgram") {
+        const deepgram = await import("@livekit/agents-plugin-deepgram");
+        const opts = {};
+        if (config.language !== undefined) {
+            opts.language = config.language;
+        }
+        return new deepgram.STT(opts);
+    }
+    throw new Error(`Unsupported LiveKit STT provider "${config.provider}" (supported: soniox, deepgram)`);
+}
+/**
+ * Construct the TTS plugin instance from configuration.
+ *
+ * Only defined options are passed — passing `undefined` would override the
+ * plugin's own defaults (default voice/model) with `undefined` and break it.
+ */
+async function buildTts(config) {
+    if (config.provider === "cartesia") {
+        const cartesia = await import("@livekit/agents-plugin-cartesia");
+        const opts = {};
+        if (config.voice !== undefined) {
+            opts.voice = config.voice;
+        }
+        if (config.model !== undefined) {
+            opts.model = config.model;
+        }
+        return new cartesia.TTS(opts);
+    }
+    if (config.provider === "elevenlabs") {
+        const elevenlabs = await import("@livekit/agents-plugin-elevenlabs");
+        const opts = {};
+        if (config.voice !== undefined) {
+            opts.voiceId = config.voice;
+        }
+        if (config.model !== undefined) {
+            opts.modelID = config.model;
+        }
+        return new elevenlabs.TTS(opts);
+    }
+    throw new Error(`Unsupported LiveKit TTS provider "${config.provider}" (supported: cartesia, elevenlabs)`);
+}
+/**
+ * Define a LiveKit voice agent backed by NeuroLink.
+ *
+ * Place the result as the default export of the worker entry file:
+ *
+ * ```ts
+ * export default defineVoiceAgent({
+ *   createNeuroLink: async () => buildConfiguredNeuroLink(),
+ *   stt: { provider: "deepgram" },
+ *   tts: { provider: "elevenlabs" },
+ * });
+ * ```
+ */
+export function defineVoiceAgent(config) {
+    const defaults = resolveBrainDefaults();
+    const provider = config.provider ?? defaults.provider;
+    const model = config.model ?? defaults.model;
+    const conversationPrefix = config.conversationIdPrefix ?? DEFAULT_CONVERSATION_PREFIX;
+    async function entry(ctx) {
+        const entryStartedAt = Date.now();
+        await ctx.connect();
+        logger.debug(`[LiveKitVoiceAgent] Joined room "${ctx.room.name}" in ${Date.now() - entryStartedAt}ms`);
+        // When the user actually stopped speaking (VAD), used to measure how long
+        // the agent waited after speech before committing the turn to the LLM.
+        let userStoppedSpeakingAt;
+        const neurolink = await config.createNeuroLink();
+        const brain = createVoiceBrain({
+            neurolink,
+            provider,
+            model,
+            systemPrompt: config.systemPrompt,
+            temperature: config.temperature,
+            maxTokens: config.maxTokens,
+            userId: config.userId,
+        });
+        const conversationId = `${conversationPrefix}-${ctx.room.name ?? ctx.job.id}`;
+        const { voice, llm } = await import("@livekit/agents");
+        const [vad, stt, tts, eouTurnDetector] = await Promise.all([
+            loadVad(config.vad),
+            buildStt(config.stt),
+            buildTts(config.tts),
+            loadEouTurnDetector(),
+        ]);
+        const transcriptEventsEnabled = config.events?.enabled === true &&
+            typeof neurolink.getEventEmitter === "function";
+        const transcriptEmitter = transcriptEventsEnabled
+            ? neurolink.getEventEmitter?.()
+            : undefined;
+        let userTranscriptBuffer = "";
+        let pendingPrefix = "";
+        function emitUserTranscriptSegment(segmentText, isFinal) {
+            if (transcriptEmitter === undefined) {
+                return;
+            }
+            const trimmed = segmentText.trim();
+            if (isFinal) {
+                userTranscriptBuffer =
+                    userTranscriptBuffer.length > 0
+                        ? `${userTranscriptBuffer} ${trimmed}`
+                        : trimmed;
+                transcriptEmitter.emit("voice:user-transcript", {
+                    text: userTranscriptBuffer,
+                    final: false,
+                });
+                return;
+            }
+            const live = userTranscriptBuffer.length > 0
+                ? `${userTranscriptBuffer} ${trimmed}`
+                : trimmed;
+            transcriptEmitter.emit("voice:user-transcript", {
+                text: live,
+                final: false,
+            });
+        }
+        /**
+         * Lock the user bubble at turn-end and reset the buffer for the next turn.
+         * `replacesPrevious` tells the client this committed turn absorbed a prior
+         * interrupted turn, so it should remove the orphaned previous user bubble.
+         */
+        function commitUserTranscript(finalText, replacesPrevious = false) {
+            if (transcriptEmitter !== undefined) {
+                transcriptEmitter.emit("voice:user-transcript", {
+                    text: finalText,
+                    final: true,
+                    replacesPrevious,
+                });
+            }
+            userTranscriptBuffer = "";
+        }
+        class NeuroLinkVoiceAgent extends voice.Agent {
+            async llmNode(chatCtx, _toolCtx, _modelSettings) {
+                const transcript = latestUserText(chatCtx);
+                if (transcript === undefined || transcript.trim().length === 0) {
+                    userTranscriptBuffer = "";
+                    return null;
+                }
+                const hadPrefix = pendingPrefix.length > 0;
+                const promptText = hadPrefix
+                    ? `${pendingPrefix} ${transcript}`
+                    : transcript;
+                pendingPrefix = "";
+                commitUserTranscript(promptText, hadPrefix);
+                if (userStoppedSpeakingAt !== undefined) {
+                    logger.debug(`[LiveKitVoiceAgent] Endpointing waited ${Date.now() - userStoppedSpeakingAt}ms before sending turn to LLM`);
+                }
+                return brainTurnStream(brain, promptText, conversationId, () => {
+                    // Interrupted before producing any reply → carry this turn's text
+                    // forward; the next turn merges it (prompt + UI).
+                    pendingPrefix = promptText;
+                });
+            }
+        }
+        class PlaceholderLLM extends llm.LLM {
+            label() {
+                return "neurolink-placeholder";
+            }
+            chat() {
+                throw new Error("PlaceholderLLM.chat must not be called — llmNode overrides generation");
+            }
+        }
+        const turnHandling = {
+            interruption: {
+                minWords: config.interruption?.minWords ?? 2,
+                minDuration: config.interruption?.minDuration ?? 600,
+            },
+        };
+        if (eouTurnDetector !== undefined) {
+            turnHandling.turnDetection = eouTurnDetector;
+            logger.info("[LiveKitVoiceAgent] Semantic end-of-utterance turn detection enabled (English)");
+        }
+        else if (config.turn?.mode) {
+            turnHandling.turnDetection = config.turn.mode;
+        }
+        const endpointing = {};
+        endpointing.minDelay =
+            config.turn?.minEndpointingDelay ?? DEFAULT_MIN_ENDPOINTING_DELAY;
+        if (config.turn?.maxEndpointingDelay !== undefined) {
+            endpointing.maxDelay = config.turn.maxEndpointingDelay;
+        }
+        if (Object.keys(endpointing).length > 0) {
+            turnHandling.endpointing = endpointing;
+        }
+        const session = new voice.AgentSession({
+            vad,
+            stt,
+            tts,
+            llm: new PlaceholderLLM(),
+            turnHandling,
+            // Do NOT speculatively call the LLM on preflight transcripts before the
+            // turn ends — with NeuroLink as the brain each call is a real LLM request,
+            // and it makes the agent feel like it responds while you're still talking.
+            preemptiveGeneration: false,
+        });
+        const agent = new NeuroLinkVoiceAgent({
+            instructions: config.systemPrompt ?? "",
+        });
+        // Inactivity watchdog: shut the per-call Job down after a stretch with no
+        // user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
+        // runs the shutdown callbacks (disposing the bridge) and the Job process
+        // exits — freeing its RAM and the EOU model — while the browser observes a
+        // room disconnect. Reset on every interaction below. Configure via
+        // VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
+        const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
+        const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
+        let inactivityTimer;
+        let inactivityFired = false;
+        function clearInactivityTimer() {
+            if (inactivityTimer !== undefined) {
+                clearTimeout(inactivityTimer);
+                inactivityTimer = undefined;
+            }
+        }
+        function noteActivity() {
+            if (!inactivityEnabled || inactivityFired) {
+                return;
+            }
+            clearInactivityTimer();
+            inactivityTimer = setTimeout(() => {
+                inactivityFired = true;
+                logger.info(`[LiveKitVoiceAgent] Inactivity timeout (${Math.round(inactivityTimeoutMs / 1000)}s) reached — shutting down job for room "${ctx.room.name}"`);
+                ctx.shutdown("inactivity timeout");
+            }, inactivityTimeoutMs);
+            // The watchdog must not, by itself, keep the event loop alive.
+            inactivityTimer.unref?.();
+        }
+        ctx.addShutdownCallback(async () => {
+            clearInactivityTimer();
+        });
+        // Track when the user actually stops speaking (VAD) so endpointing latency
+        // can be measured, and reset the inactivity watchdog on user activity.
+        session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
+            noteActivity();
+            if (ev.oldState === "speaking" && ev.newState !== "speaking") {
+                userStoppedSpeakingAt = Date.now();
+            }
+        });
+        // Reset the inactivity watchdog on any agent speech/processing and on every
+        // committed conversation item (user turn or agent reply), so the timeout
+        // only fires during a genuine lull in the conversation.
+        session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
+            noteActivity();
+        });
+        session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
+            noteActivity();
+        });
+        // Forward user STT transcripts to the data-channel bridge as a single
+        // live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
+        // finalized SEGMENT (several per turn), so we never forward those as the
+        // turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
+        // buffer and emits `final: false`. The lone `final: true` is sent from
+        // `llmNode` at the real turn boundary.
+        if (transcriptEventsEnabled) {
+            session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
+                emitUserTranscriptSegment(ev.transcript, ev.isFinal);
+            });
+        }
+        logger.info("[LiveKitVoiceAgent] Session starting", {
+            room: ctx.room.name,
+            provider,
+            model,
+        });
+        await session.start({ agent, room: ctx.room });
+        // Start the inactivity countdown now that the session is live; every
+        // interaction handler above re-arms it.
+        noteActivity();
+        // Data-channel event bridge: forward NeuroLink events (text, tool calls,
+        // results, HITL prompts, status) to the browser, and accept HITL responses
+        // back. Only when enabled and the instance exposes its event emitter.
+        if (config.events?.enabled === true && neurolink.getEventEmitter) {
+            const bridge = await attachEventBridge({
+                room: ctx.room,
+                emitter: neurolink.getEventEmitter(),
+                options: config.events,
+            });
+            ctx.addShutdownCallback(async () => {
+                bridge.dispose();
+            });
+        }
+    }
+    return { entry };
+}
+//# sourceMappingURL=voiceAgent.js.map

package/dist/lib/voice/livekit/voiceAgentWorker.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * LiveKit Agents worker launcher.
+ *
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
+ * its own process, which provides worker-per-call isolation and horizontal
+ * scaling. Connection settings are resolved from the environment.
+ *
+ * `@livekit/agents` is an optional dependency, imported dynamically.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
+/**
+ * Launch the LiveKit voice agent worker.
+ *
+ * Call from a small runner script; `agentFile` must point to the file whose
+ * default export is the result of `defineVoiceAgent`.
+ *
+ * ```ts
+ * await startVoiceAgentWorker({
+ *   agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
+ *   agentName: "neurolink-voice",
+ * });
+ * ```
+ */
+export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;

package/dist/lib/voice/livekit/voiceAgentWorker.js ADDED Viewed

@@ -0,0 +1,58 @@
+/**
+ * LiveKit Agents worker launcher.
+ *
+ * Registers a worker with the LiveKit server (Cloud or self-hosted) for the
+ * given agent entry file. LiveKit dispatches one Job per room, each running in
+ * its own process, which provides worker-per-call isolation and horizontal
+ * scaling. Connection settings are resolved from the environment.
+ *
+ * `@livekit/agents` is an optional dependency, imported dynamically.
+ *
+ * See docs/features/livekit-voice-agent.md.
+ */
+import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
+const DEFAULT_AGENT_NAME = "neurolink-voice";
+const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
+/**
+ * Register the English EOU inference runner in the worker process.
+ *
+ * Must run before `cli.runApp`: the worker only spawns the shared inference
+ * executor when `InferenceRunner.registeredRunners` is non-empty at startup,
+ * and passes that registry to the executor process. Importing the plugin
+ * registers both English and multilingual runners, so we delete multilingual to
+ * keep only the English model loaded.
+ */
+async function registerEouTurnDetectorRunner() {
+    const { InferenceRunner } = await import("@livekit/agents");
+    // Importing the plugin's turn-detector module triggers registerRunner().
+    await import("@livekit/agents-plugin-livekit");
+    delete InferenceRunner.registeredRunners[EOU_METHOD_MULTILINGUAL];
+}
+/**
+ * Launch the LiveKit voice agent worker.
+ *
+ * Call from a small runner script; `agentFile` must point to the file whose
+ * default export is the result of `defineVoiceAgent`.
+ *
+ * ```ts
+ * await startVoiceAgentWorker({
+ *   agentFile: new URL("./voice-agent-entry.js", import.meta.url).pathname,
+ *   agentName: "neurolink-voice",
+ * });
+ * ```
+ */
+export async function startVoiceAgentWorker(options) {
+    const server = resolveLiveKitServerConfig();
+    const { cli, WorkerOptions } = await import("@livekit/agents");
+    if (resolveEouTurnDetection().enabled) {
+        await registerEouTurnDetectorRunner();
+    }
+    cli.runApp(new WorkerOptions({
+        agent: options.agentFile,
+        agentName: options.agentName ?? DEFAULT_AGENT_NAME,
+        wsURL: server.url,
+        apiKey: server.apiKey,
+        apiSecret: server.apiSecret,
+    }));
+}
+//# sourceMappingURL=voiceAgentWorker.js.map

package/dist/neurolink.js CHANGED Viewed

@@ -66,6 +66,7 @@ import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableErr
 import { hasLifecycleErrorFired, markLifecycleErrorFired, } from "./utils/lifecycleCallbacks.js";
 import { resolveLifecycleTimeoutMs } from "./utils/lifecycleTimeout.js";
 import { cloneOptionsForCallIsolation } from "./utils/cloneOptions.js";
+import { coerceJsonToSchema } from "./utils/json/coerce.js";
 // Factory processing imports
 import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
 import { logger, mcpLogger } from "./utils/logger.js";
@@ -3345,6 +3346,60 @@ Current user's request: ${currentInput}`;
     }
     finalizeGenerateRequestResult(params) {
         const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
+        // Provider-agnostic JSON coercion for schema requests. Structured-output
+        // enforcement makes valid JSON the overwhelming case; for every other
+        // provider path — including generate() overrides (Vertex, Anthropic,
+        // Bedrock, Google AI Studio) — object/array roots are recovered here via
+        // balanced-scan + jsonrepair and scalar JSON roots via plain JSON.parse,
+        // with the parsed value exposed as `structuredData`. If nothing
+        // JSON-shaped is recoverable (pure prose), the raw text is returned,
+        // `structuredData` stays undefined, and a WARN makes the case observable.
+        // Runs BEFORE the end-of-generation emits below so event consumers see
+        // the same coerced content/structuredData the caller receives.
+        if (textOptions.schema &&
+            textResult.structuredData === undefined &&
+            typeof textResult.content === "string") {
+            const coerced = coerceJsonToSchema(textResult.content, textOptions.schema);
+            if (coerced) {
+                textResult.content = coerced.content;
+                textResult.structuredData = coerced.structuredData;
+                if (coerced.repaired) {
+                    textResult.jsonRepaired = true;
+                }
+                if (coerced.truncated) {
+                    textResult.jsonTruncated = true;
+                }
+            }
+            else {
+                try {
+                    const scalar = JSON.parse(textResult.content);
+                    if (scalar !== null && scalar !== undefined) {
+                        textResult.structuredData = scalar;
+                    }
+                }
+                catch {
+                    logger.warn("[NeuroLink] schema requested but no JSON could be recovered from model output; returning raw text", { provider: textResult.provider, model: textResult.model });
+                }
+            }
+        }
+        // Surface truncation when a schema was requested: either the provider
+        // reported finishReason="length" or the recovered JSON came from an
+        // unclosed span. Either way `structuredData` may be incomplete — warn at
+        // info level so it is observable in production (not just debug logs).
+        if (textOptions.schema) {
+            if (textResult.finishReason === "length") {
+                textResult.jsonTruncated = true;
+            }
+            if (textResult.jsonTruncated) {
+                logger.warn("[NeuroLink] Structured output may be truncated (finishReason=length or unclosed JSON); " +
+                    "increase maxTokens to fit the full response.", {
+                    provider: textResult.provider,
+                    model: textResult.model,
+                    finishReason: textResult.finishReason,
+                    outputTokens: textResult.usage?.output,
+                });
+            }
+        }
         // Skip the top-level `generation:end` emission when the provider already
         // emitted it from its native generate path (Vertex / Google AI Studio).
         // Without this guard, native-path providers would surface TWO events
@@ -3378,7 +3433,10 @@ Current user's request: ${currentInput}`;
         this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
         const generateResult = {
             content: textResult.content,
+            structuredData: textResult.structuredData,
             finishReason: textResult.finishReason,
+            jsonRepaired: textResult.jsonRepaired,
+            jsonTruncated: textResult.jsonTruncated,
             provider: textResult.provider,
             model: textResult.model,
             usage: textResult.usage

package/dist/providers/anthropic.js CHANGED Viewed

@@ -21,6 +21,7 @@ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
 import { NoOutputGeneratedError } from "../utils/generationErrors.js";
 import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
 import { convertZodToJsonSchema } from "../utils/schemaConversion.js";
+import { resolveClaudeMaxTokens } from "../utils/tokenLimits.js";
 import { createChunkQueue, createDeferredAnalytics, stringifyToolInput, } from "./openaiChatCompletionsClient.js";
 /**
  * Beta headers for Claude Code integration.
@@ -493,10 +494,19 @@ const mapAnthropicStopReason = (raw) => {
             return "stop";
     }
 };
-// Anthropic's Messages API requires max_tokens on every request. The previous
-// @ai-sdk/anthropic implementation defaulted it to 4096 when the caller did
-// not specify maxTokens — preserve that wire behavior.
-const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096;
+// Anthropic's Messages API requires max_tokens on every request. When the
+// caller omits it, default to the model's real output ceiling via
+// resolveClaudeMaxTokens (e.g. 64K for Sonnet 4.x) instead of the legacy 4096,
+// which silently truncated large structured responses mid-JSON.
+//
+// Client-level request timeout. The Anthropic SDK throws "Streaming is required
+// for long requests" from a NON-streaming `messages.create` when `max_tokens`
+// is large AND no client-level timeout is configured (it can't estimate a safe
+// timeout). Setting an explicit client timeout — equal to the SDK's own default
+// for the non-throwing path — suppresses that pre-flight throw so large
+// max_tokens (our model-ceiling default) works. Per-request duration is still
+// bounded by the abort signal NeuroLink composes for each call.
+const ANTHROPIC_CLIENT_TIMEOUT_MS = 600_000;
 /**
  * Anthropic Provider v2 - BaseProvider Implementation
  * Enhanced with OAuth support, subscription tiers, and beta headers for Claude Code integration.
@@ -602,6 +612,7 @@ export class AnthropicProvider extends BaseProvider {
                 apiKey: "oauth-authenticated", // Placeholder, actual auth is in fetch wrapper
                 // Note: No headers passed - fetch wrapper sets oauth-2025-04-20 beta header
                 fetch: oauthFetch,
+                timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
             });
             logger.debug("[AnthropicProvider] Anthropic SDK client created with OAuth fetch wrapper");
             logger.debug("Anthropic Provider initialized with OAuth", {
@@ -647,6 +658,7 @@ export class AnthropicProvider extends BaseProvider {
                 defaultHeaders: headers,
                 ...(normalizedBaseURL && { baseURL: normalizedBaseURL }),
                 fetch: createProxyFetch(),
+                timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
             });
             logger.debug("Anthropic Provider initialized with API key", {
                 modelName: this.modelName,
@@ -1122,7 +1134,7 @@ export class AnthropicProvider extends BaseProvider {
                 const params = {
                     model: modelId,
                     messages,
-                    max_tokens: options.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
+                    max_tokens: resolveClaudeMaxTokens(modelId, options.maxOutputTokens),
                     ...(system ? { system } : {}),
                     ...(options.temperature !== undefined && options.temperature !== null
                         ? { temperature: options.temperature }
@@ -1137,7 +1149,22 @@ export class AnthropicProvider extends BaseProvider {
                     ...(toolChoice ? { tool_choice: toolChoice } : {}),
                     ...(thinking ? { thinking } : {}),
                 };
-                const timeoutController = createTimeoutController(getTimeoutForOptions(options), providerName, "generate");
+                // The 60s anthropic generate default was tuned for the old ~4096
+                // max_tokens. Now that the default ceiling is the model's real max,
+                // a large structured response needs more wall-clock to be produced —
+                // otherwise the inner controller aborts mid-generation (the AI-SDK
+                // doGenerate layer doesn't see the caller's `timeout`). Raise the
+                // floor to 5 min when a large output budget is in play — but only
+                // when the caller did NOT set an explicit timeout: an explicit value
+                // is a contract and must never be silently extended. The abort
+                // signal stays the real bound.
+                const callerTimeout = options
+                    .timeout;
+                const callerSpecifiedTimeout = callerTimeout !== undefined && callerTimeout !== null;
+                const generateTimeoutMs = params.max_tokens > 8192 && !callerSpecifiedTimeout
+                    ? Math.max(getTimeoutForOptions(options), 300_000)
+                    : getTimeoutForOptions(options);
+                const timeoutController = createTimeoutController(generateTimeoutMs, providerName, "generate");
                 let response;
                 try {
                     response = await client.messages.create(params, {
@@ -1356,7 +1383,7 @@ export class AnthropicProvider extends BaseProvider {
                 const params = {
                     model: modelId,
                     messages: conversation,
-                    max_tokens: options.maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
+                    max_tokens: resolveClaudeMaxTokens(modelId, options.maxTokens),
                     stream: true,
                     ...(payload.system ? { system: payload.system } : {}),
                     ...(options.temperature !== undefined && options.temperature !== null