@juspay/neurolink 9.61.2 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +373 -355
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/neurolink.d.ts +19 -0
  20. package/dist/lib/neurolink.js +248 -12
  21. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  22. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  23. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  24. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  25. package/dist/lib/server/voice/tokenCompare.js +23 -0
  26. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  27. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  29. package/dist/lib/types/generate.d.ts +47 -0
  30. package/dist/lib/types/index.d.ts +1 -1
  31. package/dist/lib/types/index.js +1 -1
  32. package/dist/lib/types/realtime.d.ts +243 -0
  33. package/dist/lib/types/realtime.js +70 -0
  34. package/dist/lib/types/server.d.ts +68 -0
  35. package/dist/lib/types/span.d.ts +2 -0
  36. package/dist/lib/types/span.js +2 -0
  37. package/dist/lib/types/stream.d.ts +36 -14
  38. package/dist/lib/types/stt.d.ts +585 -0
  39. package/dist/lib/types/stt.js +90 -0
  40. package/dist/lib/types/tts.d.ts +23 -11
  41. package/dist/lib/types/tts.js +7 -0
  42. package/dist/lib/types/voice.d.ts +272 -0
  43. package/dist/lib/types/voice.js +137 -0
  44. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  45. package/dist/lib/utils/audioFormatDetector.js +34 -0
  46. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  47. package/dist/lib/utils/sttProcessor.js +295 -0
  48. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  49. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  50. package/dist/lib/voice/audio-utils.d.ts +135 -0
  51. package/dist/lib/voice/audio-utils.js +435 -0
  52. package/dist/lib/voice/errors.d.ts +123 -0
  53. package/dist/lib/voice/errors.js +386 -0
  54. package/dist/lib/voice/index.d.ts +26 -0
  55. package/dist/lib/voice/index.js +55 -0
  56. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  57. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  58. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  59. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  60. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  61. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  62. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  63. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  64. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  65. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  66. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  67. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  68. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  69. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  70. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  71. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  72. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  73. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  74. package/dist/lib/voice/stream-handler.d.ts +166 -0
  75. package/dist/lib/voice/stream-handler.js +514 -0
  76. package/dist/neurolink.d.ts +19 -0
  77. package/dist/neurolink.js +248 -12
  78. package/dist/observability/exporters/laminarExporter.js +1 -0
  79. package/dist/observability/exporters/posthogExporter.js +1 -0
  80. package/dist/observability/utils/spanSerializer.js +1 -0
  81. package/dist/server/voice/tokenCompare.d.ts +14 -0
  82. package/dist/server/voice/tokenCompare.js +22 -0
  83. package/dist/server/voice/voiceServerApp.js +62 -3
  84. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  85. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  86. package/dist/types/generate.d.ts +47 -0
  87. package/dist/types/index.d.ts +1 -1
  88. package/dist/types/index.js +1 -1
  89. package/dist/types/realtime.d.ts +243 -0
  90. package/dist/types/realtime.js +69 -0
  91. package/dist/types/server.d.ts +68 -0
  92. package/dist/types/span.d.ts +2 -0
  93. package/dist/types/span.js +2 -0
  94. package/dist/types/stream.d.ts +36 -14
  95. package/dist/types/stt.d.ts +585 -0
  96. package/dist/types/stt.js +89 -0
  97. package/dist/types/tts.d.ts +23 -11
  98. package/dist/types/tts.js +7 -0
  99. package/dist/types/voice.d.ts +272 -0
  100. package/dist/types/voice.js +136 -0
  101. package/dist/utils/audioFormatDetector.d.ts +15 -0
  102. package/dist/utils/audioFormatDetector.js +33 -0
  103. package/dist/utils/sttProcessor.d.ts +115 -0
  104. package/dist/utils/sttProcessor.js +294 -0
  105. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  106. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  107. package/dist/voice/audio-utils.d.ts +135 -0
  108. package/dist/voice/audio-utils.js +434 -0
  109. package/dist/voice/errors.d.ts +123 -0
  110. package/dist/voice/errors.js +385 -0
  111. package/dist/voice/index.d.ts +26 -0
  112. package/dist/voice/index.js +54 -0
  113. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  114. package/dist/voice/providers/AzureSTT.js +344 -0
  115. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  116. package/dist/voice/providers/AzureTTS.js +348 -0
  117. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  118. package/dist/voice/providers/DeepgramSTT.js +549 -0
  119. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  120. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  121. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  122. package/dist/voice/providers/GeminiLive.js +371 -0
  123. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  124. package/dist/voice/providers/GoogleSTT.js +453 -0
  125. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  126. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  127. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  128. package/dist/voice/providers/OpenAISTT.js +285 -0
  129. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  130. package/dist/voice/providers/OpenAITTS.js +270 -0
  131. package/dist/voice/stream-handler.d.ts +166 -0
  132. package/dist/voice/stream-handler.js +513 -0
  133. package/package.json +3 -1
@@ -0,0 +1,311 @@
1
+ /**
2
+ * ElevenLabs Text-to-Speech Handler
3
+ *
4
+ * Implementation of TTS using ElevenLabs API.
5
+ *
6
+ * @module voice/providers/ElevenLabsTTS
7
+ */
8
+ import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js";
9
+ import { logger } from "../../utils/logger.js";
10
+ import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js";
11
+ /**
12
+ * ElevenLabs Text-to-Speech Handler
13
+ *
14
+ * Supports high-quality multilingual TTS with voice cloning.
15
+ *
16
+ * @see https://elevenlabs.io/docs/api-reference
17
+ */
18
+ export class ElevenLabsTTS {
19
+ apiKey;
20
+ baseUrl = "https://api.elevenlabs.io/v1";
21
+ voicesCache = null;
22
+ static CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
23
+ /**
24
+ * Maximum text length (5000 characters)
25
+ */
26
+ maxTextLength = 5000;
27
+ constructor(apiKey) {
28
+ const resolvedKey = (apiKey ?? process.env.ELEVENLABS_API_KEY ?? "").trim();
29
+ this.apiKey = resolvedKey.length > 0 ? resolvedKey : null;
30
+ }
31
+ isConfigured() {
32
+ return this.apiKey !== null;
33
+ }
34
+ async getVoices(languageCode) {
35
+ if (!this.apiKey) {
36
+ throw new TTSError({
37
+ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
38
+ message: "ElevenLabs API key not configured",
39
+ category: ErrorCategory.CONFIGURATION,
40
+ severity: ErrorSeverity.HIGH,
41
+ retriable: false,
42
+ });
43
+ }
44
+ // Return cached voices if valid
45
+ if (this.voicesCache &&
46
+ Date.now() - this.voicesCache.timestamp < ElevenLabsTTS.CACHE_TTL_MS &&
47
+ !languageCode) {
48
+ return this.voicesCache.voices;
49
+ }
50
+ try {
51
+ const voicesController = new AbortController();
52
+ const voicesTimeoutId = setTimeout(() => voicesController.abort(), 30000);
53
+ let response;
54
+ try {
55
+ response = await fetch(`${this.baseUrl}/voices`, {
56
+ method: "GET",
57
+ headers: {
58
+ "xi-api-key": this.apiKey,
59
+ },
60
+ signal: voicesController.signal,
61
+ });
62
+ }
63
+ catch (fetchErr) {
64
+ if (fetchErr instanceof Error && fetchErr.name === "AbortError") {
65
+ throw new TTSError({
66
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
67
+ message: "ElevenLabs voices request timed out after 30 seconds",
68
+ category: ErrorCategory.NETWORK,
69
+ severity: ErrorSeverity.MEDIUM,
70
+ retriable: true,
71
+ originalError: fetchErr,
72
+ });
73
+ }
74
+ throw fetchErr;
75
+ }
76
+ finally {
77
+ clearTimeout(voicesTimeoutId);
78
+ }
79
+ if (!response.ok) {
80
+ throw new Error(`HTTP ${response.status}`);
81
+ }
82
+ const data = (await response.json());
83
+ let voices = data.voices.map((voice) => ({
84
+ id: voice.voice_id,
85
+ name: voice.name,
86
+ languageCode: "en", // ElevenLabs supports multiple languages per voice
87
+ languageCodes: [
88
+ "en",
89
+ "es",
90
+ "fr",
91
+ "de",
92
+ "it",
93
+ "pt",
94
+ "pl",
95
+ "hi",
96
+ "ar",
97
+ "zh",
98
+ "ja",
99
+ "ko",
100
+ ],
101
+ gender: this.mapGender(voice.labels?.gender),
102
+ type: "neural",
103
+ description: voice.labels?.description,
104
+ }));
105
+ // Filter by language if specified
106
+ if (languageCode) {
107
+ const requested = languageCode.toLowerCase();
108
+ const requestedBase = requested.split("-")[0];
109
+ voices = voices.filter((v) => v.languageCodes?.some((code) => {
110
+ const c = code.toLowerCase();
111
+ return (c === requested ||
112
+ c === requestedBase ||
113
+ c.startsWith(requestedBase));
114
+ }));
115
+ }
116
+ // Cache voices
117
+ if (!languageCode) {
118
+ this.voicesCache = { voices, timestamp: Date.now() };
119
+ }
120
+ return voices;
121
+ }
122
+ catch (err) {
123
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
124
+ logger.error(`[ElevenLabsTTSHandler] Failed to get voices: ${errorMessage}`);
125
+ throw new TTSError({
126
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
127
+ message: `Failed to get voices: ${errorMessage}`,
128
+ category: ErrorCategory.NETWORK,
129
+ severity: ErrorSeverity.MEDIUM,
130
+ retriable: true,
131
+ originalError: err instanceof Error ? err : undefined,
132
+ });
133
+ }
134
+ }
135
+ async synthesize(text, options = {}) {
136
+ if (!this.apiKey) {
137
+ throw new TTSError({
138
+ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
139
+ message: "ElevenLabs API key not configured",
140
+ category: ErrorCategory.CONFIGURATION,
141
+ severity: ErrorSeverity.HIGH,
142
+ retriable: false,
143
+ });
144
+ }
145
+ const startTime = Date.now();
146
+ const elevenOptions = options;
147
+ try {
148
+ // Get voice ID (use default if not specified)
149
+ const voiceId = options.voice ?? "21m00Tcm4TlvDq8ikWAM"; // Rachel voice as default
150
+ // Determine model
151
+ const model = elevenOptions.model ?? "eleven_multilingual_v2";
152
+ // Build request body
153
+ const requestBody = {
154
+ text,
155
+ model_id: model,
156
+ voice_settings: {
157
+ stability: elevenOptions.stability ?? 0.5,
158
+ similarity_boost: elevenOptions.similarityBoost ?? 0.75,
159
+ style: elevenOptions.style ?? 0.0,
160
+ use_speaker_boost: elevenOptions.useSpeakerBoost ?? true,
161
+ },
162
+ };
163
+ // Determine output format
164
+ const outputFormat = this.mapFormat(options.format ?? "mp3");
165
+ const controller = new AbortController();
166
+ const timeoutId = setTimeout(() => controller.abort(), 30000);
167
+ let response;
168
+ try {
169
+ response = await fetch(`${this.baseUrl}/text-to-speech/${voiceId}?output_format=${outputFormat}`, {
170
+ method: "POST",
171
+ headers: {
172
+ "xi-api-key": this.apiKey,
173
+ "Content-Type": "application/json",
174
+ },
175
+ body: JSON.stringify(requestBody),
176
+ signal: controller.signal,
177
+ });
178
+ }
179
+ catch (fetchErr) {
180
+ if (fetchErr instanceof Error && fetchErr.name === "AbortError") {
181
+ throw new TTSError({
182
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
183
+ message: "ElevenLabs TTS request timed out after 30 seconds",
184
+ category: ErrorCategory.NETWORK,
185
+ severity: ErrorSeverity.HIGH,
186
+ retriable: true,
187
+ originalError: fetchErr,
188
+ });
189
+ }
190
+ throw fetchErr;
191
+ }
192
+ finally {
193
+ clearTimeout(timeoutId);
194
+ }
195
+ if (!response.ok) {
196
+ const errorData = await response
197
+ .json()
198
+ .catch(() => Object.create(null));
199
+ const errorMessage = errorData.detail?.message ||
200
+ `HTTP ${response.status}`;
201
+ throw new Error(errorMessage);
202
+ }
203
+ const latency = Date.now() - startTime;
204
+ // Get audio buffer
205
+ const arrayBuffer = await response.arrayBuffer();
206
+ const audioBuffer = Buffer.from(arrayBuffer);
207
+ const result = {
208
+ buffer: audioBuffer,
209
+ // Use the *effective* output format from outputFormat, not the
210
+ // requested format — otherwise unsupported requests that fell back to
211
+ // mp3_44100_128 would mislabel the buffer (Copilot review).
212
+ format: this.effectiveFormat(outputFormat),
213
+ size: audioBuffer.length,
214
+ voice: voiceId,
215
+ sampleRate: this.getSampleRate(outputFormat),
216
+ metadata: {
217
+ latency,
218
+ provider: "elevenlabs-tts",
219
+ model,
220
+ requestedFormat: options.format,
221
+ outputFormat,
222
+ },
223
+ };
224
+ logger.info(`[ElevenLabsTTSHandler] Synthesized ${audioBuffer.length} bytes in ${latency}ms`);
225
+ return result;
226
+ }
227
+ catch (err) {
228
+ if (err instanceof TTSError) {
229
+ throw err;
230
+ }
231
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
232
+ logger.error(`[ElevenLabsTTSHandler] Synthesis failed: ${errorMessage}`);
233
+ throw new TTSError({
234
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
235
+ message: `Synthesis failed: ${errorMessage}`,
236
+ category: ErrorCategory.EXECUTION,
237
+ severity: ErrorSeverity.HIGH,
238
+ retriable: true,
239
+ context: { textLength: text.length },
240
+ originalError: err instanceof Error ? err : undefined,
241
+ });
242
+ }
243
+ }
244
+ /**
245
+ * Map gender string to standard type
246
+ */
247
+ mapGender(gender) {
248
+ if (!gender) {
249
+ return "neutral";
250
+ }
251
+ const lower = gender.toLowerCase();
252
+ if (lower.includes("male") && !lower.includes("female")) {
253
+ return "male";
254
+ }
255
+ if (lower.includes("female")) {
256
+ return "female";
257
+ }
258
+ return "neutral";
259
+ }
260
+ /**
261
+ * Map TTSAudioFormat to ElevenLabs output format
262
+ */
263
+ mapFormat(format) {
264
+ const formats = {
265
+ mp3: "mp3_44100_128",
266
+ wav: "pcm_44100",
267
+ ogg: "ogg_22050",
268
+ opus: "ogg_22050",
269
+ };
270
+ return formats[format] ?? "mp3_44100_128";
271
+ }
272
+ /**
273
+ * Get sample rate from format string
274
+ */
275
+ getSampleRate(format) {
276
+ if (format.includes("44100")) {
277
+ return 44100;
278
+ }
279
+ if (format.includes("22050")) {
280
+ return 22050;
281
+ }
282
+ if (format.includes("24000")) {
283
+ return 24000;
284
+ }
285
+ return 44100;
286
+ }
287
+ /**
288
+ * Map the ElevenLabs `output_format` string back to a canonical
289
+ * TTSAudioFormat. mapFormat() falls back to mp3_44100_128 for unsupported
290
+ * inputs, so this is needed to keep TTSResult.format honest.
291
+ *
292
+ * NOTE: ElevenLabs `pcm_*` outputs are RAW 16-bit signed-LE PCM samples
293
+ * with no RIFF/WAV header. We surface that as `pcm16` (which exists in the
294
+ * `TTSAudioFormat` union exactly for this case) — labeling it as `wav`
295
+ * would cause consumers writing the buffer to a `.wav` file or feeding it
296
+ * to a WAV parser to produce unplayable output (CodeRabbit review).
297
+ */
298
+ effectiveFormat(outputFormat) {
299
+ if (outputFormat.startsWith("mp3")) {
300
+ return "mp3";
301
+ }
302
+ if (outputFormat.startsWith("pcm")) {
303
+ return "pcm16";
304
+ }
305
+ if (outputFormat.startsWith("ogg")) {
306
+ return "opus";
307
+ }
308
+ return "mp3";
309
+ }
310
+ }
311
+ //# sourceMappingURL=ElevenLabsTTS.js.map
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Google Gemini Live Voice API Handler
3
+ *
4
+ * Implementation of bidirectional voice communication using Gemini's Live API.
5
+ *
6
+ * @module voice/providers/GeminiLive
7
+ */
8
+ import { BaseRealtimeHandler } from "../RealtimeVoiceAPI.js";
9
+ import type { TTSAudioFormat, RealtimeAudioChunk, RealtimeConfig, RealtimeSession } from "../../types/index.js";
10
+ /**
11
+ * Google Gemini Live Voice API Handler
12
+ *
13
+ * Implements bidirectional voice communication with Gemini's Live API.
14
+ *
15
+ * @see https://ai.google.dev/gemini-api/docs/live
16
+ */
17
+ export declare class GeminiLive extends BaseRealtimeHandler {
18
+ readonly name = "gemini-live";
19
+ private readonly apiKey;
20
+ private ws;
21
+ private audioChunkIndex;
22
+ private pendingFunctionCalls;
23
+ constructor(apiKey?: string);
24
+ isConfigured(): boolean;
25
+ getSupportedFormats(): TTSAudioFormat[];
26
+ connect(config: RealtimeConfig): Promise<RealtimeSession>;
27
+ disconnect(): Promise<void>;
28
+ sendAudio(audio: Buffer | RealtimeAudioChunk): Promise<void>;
29
+ sendText(text: string): Promise<void>;
30
+ triggerResponse(): Promise<void>;
31
+ cancelResponse(): Promise<void>;
32
+ /**
33
+ * Send setup message with configuration
34
+ */
35
+ private sendSetup;
36
+ /**
37
+ * Wait for setup complete message
38
+ */
39
+ private waitForSetupComplete;
40
+ /**
41
+ * Handle incoming WebSocket messages
42
+ */
43
+ private handleMessage;
44
+ /**
45
+ * Parse audio format from MIME type
46
+ */
47
+ private parseAudioFormat;
48
+ /**
49
+ * Handle function call from model
50
+ */
51
+ private handleFunctionCall;
52
+ }