@juspay/neurolink 9.53.0 → 9.54.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/adapters/tts/cartesiaHandler.d.ts +12 -0
  3. package/dist/adapters/tts/cartesiaHandler.js +130 -0
  4. package/dist/browser/neurolink.min.js +1 -1
  5. package/dist/cli/commands/auth.js +6 -0
  6. package/dist/cli/commands/voiceServer.d.ts +6 -0
  7. package/dist/cli/commands/voiceServer.js +17 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/lib/adapters/tts/cartesiaHandler.d.ts +12 -0
  10. package/dist/lib/adapters/tts/cartesiaHandler.js +131 -0
  11. package/dist/lib/providers/azureOpenai.d.ts +4 -1
  12. package/dist/lib/providers/azureOpenai.js +9 -3
  13. package/dist/lib/server/voice/frameBus.d.ts +8 -0
  14. package/dist/lib/server/voice/frameBus.js +25 -0
  15. package/dist/lib/server/voice/turnManager.d.ts +15 -0
  16. package/dist/lib/server/voice/turnManager.js +36 -0
  17. package/dist/lib/server/voice/types.d.ts +20 -0
  18. package/dist/lib/server/voice/types.js +2 -0
  19. package/dist/lib/server/voice/voiceServerApp.d.ts +1 -0
  20. package/dist/lib/server/voice/voiceServerApp.js +118 -0
  21. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +11 -0
  22. package/dist/lib/server/voice/voiceWebSocketHandler.js +536 -0
  23. package/dist/providers/azureOpenai.d.ts +4 -1
  24. package/dist/providers/azureOpenai.js +9 -3
  25. package/dist/server/voice/frameBus.d.ts +8 -0
  26. package/dist/server/voice/frameBus.js +24 -0
  27. package/dist/server/voice/public/app.js +275 -0
  28. package/dist/server/voice/public/index.html +18 -0
  29. package/dist/server/voice/public/pcm-worklet.js +67 -0
  30. package/dist/server/voice/public/styles.css +102 -0
  31. package/dist/server/voice/turnManager.d.ts +15 -0
  32. package/dist/server/voice/turnManager.js +35 -0
  33. package/dist/server/voice/types.d.ts +20 -0
  34. package/dist/server/voice/types.js +1 -0
  35. package/dist/server/voice/voiceServerApp.d.ts +1 -0
  36. package/dist/server/voice/voiceServerApp.js +117 -0
  37. package/dist/server/voice/voiceWebSocketHandler.d.ts +11 -0
  38. package/dist/server/voice/voiceWebSocketHandler.js +535 -0
  39. package/package.json +2 -1
@@ -0,0 +1,17 @@
1
+ import { startVoiceServer } from "../../lib/server/voice/voiceServerApp.js";
2
+ import { configureVoiceServerEnvironment } from "../../lib/server/voice/voiceWebSocketHandler.js";
3
+ export const voiceServerCommand = {
4
+ command: "voice-server",
5
+ describe: "Start the real-time voice assistant server (Soniox STT + Cartesia TTS + Cobra VAD)",
6
+ builder: (yargs) => yargs.option("port", {
7
+ alias: "p",
8
+ type: "number",
9
+ default: 3000,
10
+ describe: "Port to listen on",
11
+ }),
12
+ handler: async (argv) => {
13
+ configureVoiceServerEnvironment();
14
+ await startVoiceServer(argv.port);
15
+ },
16
+ };
17
+ //# sourceMappingURL=voiceServer.js.map
@@ -17,6 +17,7 @@ import { proxyStartCommand, proxyStatusCommand, proxyTelemetryCommand, proxySetu
17
17
  import { EvaluateCommandFactory } from "./commands/evaluate.js";
18
18
  import { TaskCommandFactory } from "./commands/task.js";
19
19
  import { AutoresearchCommandFactory } from "./commands/autoresearch.js";
20
+ import { voiceServerCommand } from "./commands/voiceServer.js";
20
21
  // Enhanced CLI with Professional UX
21
22
  export function initializeCliParser() {
22
23
  return (yargs(hideBin(process.argv))
@@ -206,6 +207,8 @@ export function initializeCliParser() {
206
207
  // Task Command Group - Scheduled and self-running tasks
207
208
  .command(TaskCommandFactory.createTaskCommands())
208
209
  // AutoResearch Command Group - Automated AI-driven research experiments
209
- .command(AutoresearchCommandFactory.createAutoresearchCommands())); // Close the main return statement
210
+ .command(AutoresearchCommandFactory.createAutoresearchCommands())
211
+ // Real-time voice server (Soniox STT + Cartesia TTS + Cobra VAD)
212
+ .command(voiceServerCommand)); // Close the main return statement
210
213
  }
211
214
  //# sourceMappingURL=parser.js.map
@@ -0,0 +1,12 @@
1
+ import { EventEmitter } from "events";
2
+ export declare function getCartesiaWsUrl(): string;
3
+ export declare class CartesiaStream extends EventEmitter {
4
+ private ws;
5
+ private contextId;
6
+ private isReady;
7
+ constructor(contextId: string);
8
+ ready(): Promise<void>;
9
+ send(text: string, cont?: boolean): void;
10
+ flush(): void;
11
+ close(): void;
12
+ }
@@ -0,0 +1,131 @@
1
+ import WebSocket from "ws";
2
+ import { EventEmitter } from "events";
3
+ import { logger } from "../../utils/logger.js";
4
+ import { withTimeout } from "../../utils/async/withTimeout.js";
5
+ export function getCartesiaWsUrl() {
6
+ const baseUrl = process.env.CARTESIA_WS_BASE_URL ?? "wss://api.cartesia.ai/tts/websocket";
7
+ const cartesiaVersion = process.env.CARTESIA_API_VERSION ?? "2025-04-16";
8
+ const wsUrl = new URL(baseUrl);
9
+ wsUrl.searchParams.set("cartesia_version", cartesiaVersion);
10
+ return wsUrl.toString();
11
+ }
12
+ export class CartesiaStream extends EventEmitter {
13
+ ws = null;
14
+ contextId;
15
+ isReady = false;
16
+ constructor(contextId) {
17
+ super();
18
+ this.contextId = contextId;
19
+ const apiKey = process.env.CARTESIA_API_KEY;
20
+ if (!apiKey) {
21
+ throw new Error("CARTESIA_API_KEY is not set in environment");
22
+ }
23
+ this.ws = new WebSocket(getCartesiaWsUrl(), {
24
+ headers: { "X-API-Key": apiKey },
25
+ });
26
+ this.ws.on("open", () => {
27
+ this.isReady = true;
28
+ logger.info("[CARTESIA] WS connected");
29
+ this.emit("ready");
30
+ });
31
+ this.ws.on("message", (data) => {
32
+ let msg;
33
+ try {
34
+ msg = JSON.parse(data.toString());
35
+ }
36
+ catch {
37
+ logger.error("[CARTESIA] Failed to parse message");
38
+ return;
39
+ }
40
+ // Handle error first so it always surfaces, even mid-stream
41
+ if (msg.error) {
42
+ const err = new Error(msg.error);
43
+ if (this.listenerCount("error") > 0) {
44
+ this.emit("error", err);
45
+ }
46
+ else {
47
+ logger.error("[CARTESIA] Unhandled error:", msg.error);
48
+ }
49
+ return;
50
+ }
51
+ if (msg.data) {
52
+ const audio = Buffer.from(msg.data, "base64");
53
+ this.emit("audio", audio);
54
+ }
55
+ if (msg.done) {
56
+ this.emit("done");
57
+ }
58
+ });
59
+ this.ws.on("error", (err) => {
60
+ if (this.listenerCount("error") > 0) {
61
+ this.emit("error", err);
62
+ }
63
+ else {
64
+ logger.error("[CARTESIA] Unhandled WebSocket error:", err.message);
65
+ }
66
+ });
67
+ this.ws.on("close", () => {
68
+ this.isReady = false;
69
+ this.emit("close");
70
+ });
71
+ }
72
+ async ready() {
73
+ if (this.isReady) {
74
+ return;
75
+ }
76
+ const connectPromise = new Promise((resolve, reject) => {
77
+ if (!this.ws) {
78
+ reject(new Error("Cartesia WebSocket is not initialized"));
79
+ return;
80
+ }
81
+ this.ws.once("open", resolve);
82
+ this.ws.once("error", reject);
83
+ this.ws.once("close", () => reject(new Error("Cartesia WebSocket closed before ready")));
84
+ });
85
+ return withTimeout(connectPromise, 5000, "Cartesia WS connect timed out");
86
+ }
87
+ send(text, cont = true) {
88
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
89
+ return;
90
+ }
91
+ this.ws.send(JSON.stringify({
92
+ context_id: this.contextId,
93
+ model_id: "sonic-3",
94
+ transcript: text,
95
+ voice: {
96
+ mode: "id",
97
+ id: "694f9389-aac1-45b6-b726-9d9369183238",
98
+ },
99
+ output_format: {
100
+ container: "raw",
101
+ encoding: "pcm_s16le",
102
+ sample_rate: 24000,
103
+ },
104
+ continue: cont,
105
+ }));
106
+ }
107
+ flush() {
108
+ this.send("", false);
109
+ }
110
+ close() {
111
+ if (!this.ws) {
112
+ return;
113
+ }
114
+ const ws = this.ws;
115
+ this.ws = null;
116
+ this.isReady = false;
117
+ if (ws.readyState === WebSocket.OPEN ||
118
+ ws.readyState === WebSocket.CONNECTING) {
119
+ try {
120
+ ws.close();
121
+ }
122
+ catch (error) {
123
+ logger.warn("[CARTESIA] Failed to close WebSocket cleanly", error);
124
+ }
125
+ }
126
+ ws.once("close", () => {
127
+ ws.removeAllListeners();
128
+ });
129
+ }
130
+ }
131
+ //# sourceMappingURL=cartesiaHandler.js.map
@@ -13,7 +13,10 @@ export declare class AzureOpenAIProvider extends BaseProvider {
13
13
  getProviderName(): AIProviderName;
14
14
  getDefaultModel(): string;
15
15
  /**
16
- * Returns the Vercel AI SDK model instance for Azure OpenAI
16
+ * Returns the Vercel AI SDK model instance for Azure OpenAI.
17
+ * Uses .chat() explicitly because @ai-sdk/azure v3+ defaults the bare
18
+ * provider() call to the Responses API, which many Azure deployments
19
+ * do not support yet.
17
20
  */
18
21
  getAISDKModel(): LanguageModel;
19
22
  protected formatProviderError(error: unknown): Error;
@@ -43,11 +43,14 @@ export class AzureOpenAIProvider extends BaseProvider {
43
43
  validateApiKey(createAzureEndpointConfig());
44
44
  }
45
45
  // Create the Azure provider instance with proxy support
46
- // Let the Azure SDK handle all URL construction automatically
46
+ // useDeploymentBasedUrls is required because @ai-sdk/azure v3+ defaults to
47
+ // the /v1/ URL format, but most Azure deployments still require the legacy
48
+ // /deployments/{deployment}/ URL pattern.
47
49
  this.azureProvider = createAzure({
48
50
  resourceName: this.resourceName,
49
51
  apiKey: this.apiKey,
50
52
  apiVersion: this.apiVersion,
53
+ useDeploymentBasedUrls: true,
51
54
  fetch: createProxyFetch(),
52
55
  });
53
56
  logger.debug("Azure Vercel Provider initialized", {
@@ -63,10 +66,13 @@ export class AzureOpenAIProvider extends BaseProvider {
63
66
  return this.deployment;
64
67
  }
65
68
  /**
66
- * Returns the Vercel AI SDK model instance for Azure OpenAI
69
+ * Returns the Vercel AI SDK model instance for Azure OpenAI.
70
+ * Uses .chat() explicitly because @ai-sdk/azure v3+ defaults the bare
71
+ * provider() call to the Responses API, which many Azure deployments
72
+ * do not support yet.
67
73
  */
68
74
  getAISDKModel() {
69
- return this.azureProvider(this.deployment);
75
+ return this.azureProvider.chat(this.deployment);
70
76
  }
71
77
  formatProviderError(error) {
72
78
  if (error instanceof TimeoutError) {
@@ -0,0 +1,8 @@
1
+ import type { Frame } from "./types.js";
2
+ export declare class FrameBus {
3
+ private handlers;
4
+ subscribe<T extends Frame["type"]>(type: T, fn: (frame: Extract<Frame, {
5
+ type: T;
6
+ }>) => void): void;
7
+ publish(frame: Frame): void;
8
+ }
@@ -0,0 +1,25 @@
1
+ import { logger } from "../../utils/logger.js";
2
+ export class FrameBus {
3
+ handlers = {};
4
+ subscribe(type, fn) {
5
+ if (!this.handlers[type]) {
6
+ this.handlers[type] = [];
7
+ }
8
+ this.handlers[type].push(fn);
9
+ }
10
+ publish(frame) {
11
+ const subs = this.handlers[frame.type];
12
+ if (!subs) {
13
+ return;
14
+ }
15
+ for (const fn of subs) {
16
+ try {
17
+ fn(frame);
18
+ }
19
+ catch (err) {
20
+ logger.error(`[FrameBus] Subscriber threw on ${frame.type}:`, err);
21
+ }
22
+ }
23
+ }
24
+ }
25
+ //# sourceMappingURL=frameBus.js.map
@@ -0,0 +1,15 @@
1
+ import type { FrameBus } from "./frameBus.js";
2
+ export declare enum TurnState {
3
+ IDLE = 0,
4
+ USER_SPEAKING = 1,
5
+ PROCESSING = 2,
6
+ ASSISTANT_SPEAKING = 3
7
+ }
8
+ export declare class TurnManager {
9
+ state: TurnState;
10
+ constructor(bus: FrameBus);
11
+ private onVadStart;
12
+ private onVadStop;
13
+ assistantSpeaking(): void;
14
+ reset(): void;
15
+ }
@@ -0,0 +1,36 @@
1
+ export var TurnState;
2
+ (function (TurnState) {
3
+ TurnState[TurnState["IDLE"] = 0] = "IDLE";
4
+ TurnState[TurnState["USER_SPEAKING"] = 1] = "USER_SPEAKING";
5
+ TurnState[TurnState["PROCESSING"] = 2] = "PROCESSING";
6
+ TurnState[TurnState["ASSISTANT_SPEAKING"] = 3] = "ASSISTANT_SPEAKING";
7
+ })(TurnState || (TurnState = {}));
8
+ export class TurnManager {
9
+ state = TurnState.IDLE;
10
+ constructor(bus) {
11
+ bus.subscribe("vad_start", () => this.onVadStart());
12
+ bus.subscribe("vad_stop", () => this.onVadStop());
13
+ }
14
+ onVadStart() {
15
+ // Only update state if TTS is NOT playing. During ASSISTANT_SPEAKING, the
16
+ // barge-in interrupt is triggered by Soniox non-final tokens — which arrive
17
+ // after a network round-trip. If we let VAD immediately flip state to
18
+ // USER_SPEAKING, the state check in handleSonioxMessage fails and the
19
+ // interrupt never fires.
20
+ if (this.state !== TurnState.ASSISTANT_SPEAKING) {
21
+ this.state = TurnState.USER_SPEAKING;
22
+ }
23
+ }
24
+ onVadStop() {
25
+ if (this.state === TurnState.USER_SPEAKING) {
26
+ this.state = TurnState.PROCESSING;
27
+ }
28
+ }
29
+ assistantSpeaking() {
30
+ this.state = TurnState.ASSISTANT_SPEAKING;
31
+ }
32
+ reset() {
33
+ this.state = TurnState.IDLE;
34
+ }
35
+ }
36
+ //# sourceMappingURL=turnManager.js.map
@@ -0,0 +1,20 @@
1
+ export type Frame = {
2
+ type: "audio";
3
+ data: Int16Array;
4
+ } | {
5
+ type: "vad_start";
6
+ } | {
7
+ type: "vad_stop";
8
+ } | {
9
+ type: "transcript";
10
+ text: string;
11
+ final: boolean;
12
+ } | {
13
+ type: "llm_token";
14
+ text: string;
15
+ } | {
16
+ type: "tts_audio";
17
+ data: Buffer;
18
+ } | {
19
+ type: "interrupt";
20
+ };
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ export declare function startVoiceServer(port?: number): Promise<void>;
@@ -0,0 +1,118 @@
1
+ import express from "express";
2
+ import fs from "fs";
3
+ import http from "http";
4
+ import path from "path";
5
+ import { fileURLToPath } from "url";
6
+ import { setupWebSocket } from "./voiceWebSocketHandler.js";
7
+ import { NeuroLink } from "../../neurolink.js";
8
+ import { logger } from "../../utils/logger.js";
9
+ import { withTimeout } from "../../utils/async/withTimeout.js";
10
+ import { getCartesiaWsUrl } from "../../adapters/tts/cartesiaHandler.js";
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = path.dirname(__filename);
13
+ /**
14
+ * Resolve the public/ directory containing static assets.
15
+ * The CLI build (tsc) only emits .ts → .js and does NOT copy non-TS assets,
16
+ * so __dirname/public may not exist when running from dist/.
17
+ * Fall back to the original source path in that case.
18
+ */
19
+ function resolvePublicPath() {
20
+ const compiled = path.join(__dirname, "public");
21
+ if (fs.existsSync(compiled)) {
22
+ return compiled;
23
+ }
24
+ // Resolve from project root → src/lib/server/voice/public
25
+ const source = path.resolve(__dirname, "../../../../src/lib/server/voice/public");
26
+ if (fs.existsSync(source)) {
27
+ return source;
28
+ }
29
+ return compiled; // let express.static handle the 404
30
+ }
31
+ export async function startVoiceServer(port = 3000) {
32
+ const app = express();
33
+ /* ---------- STATIC FILES ---------- */
34
+ const publicPath = resolvePublicPath();
35
+ logger.info("[SERVER] Serving static from:", publicPath);
36
+ app.use(express.static(publicPath));
37
+ app.get("/", (_, res) => {
38
+ res.sendFile(path.join(publicPath, "index.html"));
39
+ });
40
+ /* ---------- HEALTH CHECK ---------- */
41
+ app.get("/health", (_, res) => {
42
+ res.json({ status: "ok" });
43
+ });
44
+ const server = http.createServer(app);
45
+ /* ---------- WS ---------- */
46
+ setupWebSocket(server);
47
+ /* ---------- START ---------- */
48
+ await new Promise((resolve, reject) => {
49
+ server.once("error", reject);
50
+ server.listen(port, () => {
51
+ server.removeListener("error", reject);
52
+ logger.info(`[SERVER] Voice server running at http://localhost:${port}`);
53
+ resolve();
54
+ });
55
+ });
56
+ /* ---------- WARMUP ---------- */
57
+ // Pre-warm NeuroLink + Azure on startup so the first real user request isn't
58
+ // slow. NeuroLink's MCP init + Azure's connection pool both have cold-start
59
+ // overhead that shows up as 3-4s on the very first call. We also open and
60
+ // immediately close a Cartesia WS to prime the TLS handshake.
61
+ warmup().catch((err) => {
62
+ logger.warn("[WARMUP] Failed (non-fatal):", err.message);
63
+ });
64
+ }
65
+ async function warmup() {
66
+ const t = Date.now();
67
+ logger.info("[WARMUP] Warming up LLM + TTS...");
68
+ const neurolink = new NeuroLink();
69
+ const provider = process.env.VOICE_LLM_PROVIDER ?? "azure";
70
+ const model = process.env.VOICE_LLM_MODEL ?? "gpt-4o-automatic";
71
+ try {
72
+ const result = await withTimeout(neurolink.stream({
73
+ provider,
74
+ model,
75
+ input: { text: "hi" },
76
+ maxTokens: 3,
77
+ disableTools: true,
78
+ enableAnalytics: false,
79
+ enableEvaluation: false,
80
+ }), 15000, "LLM warmup timed out");
81
+ // Drain the stream so the connection is fully exercised.
82
+ for await (const _chunk of result.stream) {
83
+ /* drain */
84
+ }
85
+ logger.info(`[WARMUP] LLM warmup done in ${Date.now() - t}ms`);
86
+ }
87
+ catch (err) {
88
+ logger.warn("[WARMUP] LLM warmup failed (non-fatal):", err.message);
89
+ }
90
+ // Cartesia TLS warmup — open WS, wait for connect, then close.
91
+ try {
92
+ const { default: WebSocket } = await import("ws");
93
+ const apiKey = process.env.CARTESIA_API_KEY;
94
+ await new Promise((resolve) => {
95
+ const ws = new WebSocket(getCartesiaWsUrl(), {
96
+ headers: apiKey ? { "X-API-Key": apiKey } : undefined,
97
+ });
98
+ const timeout = setTimeout(() => {
99
+ ws.terminate();
100
+ resolve(); // non-fatal, just move on
101
+ }, 5000);
102
+ ws.once("open", () => {
103
+ clearTimeout(timeout);
104
+ ws.close();
105
+ resolve();
106
+ });
107
+ ws.once("error", () => {
108
+ clearTimeout(timeout);
109
+ resolve(); // non-fatal
110
+ });
111
+ });
112
+ logger.info(`[WARMUP] Cartesia warmup done in ${Date.now() - t}ms`);
113
+ }
114
+ catch {
115
+ // non-fatal
116
+ }
117
+ }
118
+ //# sourceMappingURL=voiceServerApp.js.map
@@ -0,0 +1,11 @@
1
+ import type { Server as HttpServer } from "http";
2
+ /**
3
+ * Call from the voice-server command handler BEFORE importing anything else
4
+ * so the env change is scoped to voice mode only.
5
+ */
6
+ export declare function configureVoiceServerEnvironment(): void;
7
+ export type Message = {
8
+ role: "system" | "user" | "assistant";
9
+ content: string;
10
+ };
11
+ export declare function setupWebSocket(server: HttpServer): void;