@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
@@ -6,9 +6,13 @@
6
6
  * @module types/ttsTypes
7
7
  */
8
8
  /**
9
- * Supported audio formats for TTS output
9
+ * Supported audio formats for TTS output, STT input, and Realtime PCM streams.
10
+ *
11
+ * `pcm16` is included for the OpenAI Realtime PCM16 output stream — the chunk
12
+ * is raw PCM, not a RIFF/WAV-headered file. Consumers must not pass `pcm16`
13
+ * bytes to a WAV duration parser.
10
14
  */
11
- export type AudioFormat = "mp3" | "wav" | "ogg" | "opus";
15
+ export type TTSAudioFormat = "mp3" | "wav" | "ogg" | "opus" | "m4a" | "flac" | "webm" | "mp4" | "mpeg" | "mpga" | "pcm16";
12
16
  /**
13
17
  * TTS quality settings
14
18
  */
@@ -51,7 +55,7 @@ export type TTSOptions = {
51
55
  /** Voice identifier (e.g., "en-US-Neural2-C") */
52
56
  voice?: string;
53
57
  /** Audio format (default: mp3) */
54
- format?: AudioFormat;
58
+ format?: TTSAudioFormat;
55
59
  /** Speaking rate 0.25-4.0 (default: 1.0) */
56
60
  speed?: number;
57
61
  /** Voice pitch adjustment -20.0 to 20.0 semitones (default: 0.0) */
@@ -64,6 +68,8 @@ export type TTSOptions = {
64
68
  output?: string;
65
69
  /** Auto-play audio after generation (default: false) */
66
70
  play?: boolean;
71
+ /** Override TTS provider (e.g., "elevenlabs", "openai-tts", "azure-tts") */
72
+ provider?: string;
67
73
  };
68
74
  /**
69
75
  * TTS audio result returned from generation
@@ -72,7 +78,7 @@ export type TTSResult = {
72
78
  /** Audio data as Buffer */
73
79
  buffer: Buffer;
74
80
  /** Audio format */
75
- format: AudioFormat;
81
+ format: TTSAudioFormat;
76
82
  /** Audio file size in bytes */
77
83
  size: number;
78
84
  /** Duration in seconds (if available) */
@@ -105,9 +111,15 @@ export type AudioSaveResult = {
105
111
  error?: string;
106
112
  };
107
113
  /** Allowed TTS voice types */
108
- export type VoiceType = "standard" | "wavenet" | "neural" | "chirp" | "unknown";
114
+ export type TTSVoiceType = "standard" | "wavenet" | "neural" | "chirp" | "unknown";
109
115
  /** Allowed genders for TTS voices */
110
- export type Gender = "male" | "female" | "neutral";
116
+ export type TTSGender = "male" | "female" | "neutral";
117
+ /** @deprecated Use `TTSAudioFormat` instead. */
118
+ export type AudioFormat = TTSAudioFormat;
119
+ /** @deprecated Use `TTSVoiceType` instead. */
120
+ export type VoiceType = TTSVoiceType;
121
+ /** @deprecated Use `TTSGender` instead. */
122
+ export type Gender = TTSGender;
111
123
  /**
112
124
  * TTS voice information
113
125
  */
@@ -120,17 +132,17 @@ export type TTSVoice = {
120
132
  languageCode: string;
121
133
  /** All supported language codes */
122
134
  languageCodes: string[];
123
- /** Gender */
124
- gender: Gender;
135
+ /** TTSGender */
136
+ gender: TTSGender;
125
137
  /** Voice type */
126
- type?: VoiceType;
138
+ type?: TTSVoiceType;
127
139
  /** Voice description (optional) */
128
140
  description?: string;
129
141
  /** Natural sample rate in Hz (optional) */
130
142
  naturalSampleRateHertz?: number;
131
143
  };
132
144
  /** Valid audio formats as an array for runtime validation */
133
- export declare const VALID_AUDIO_FORMATS: readonly AudioFormat[];
145
+ export declare const VALID_AUDIO_FORMATS: readonly TTSAudioFormat[];
134
146
  /** Valid TTS quality levels as an array for runtime validation */
135
147
  export declare const VALID_TTS_QUALITIES: readonly TTSQuality[];
136
148
  /** Valid Google TTS audio formats */
@@ -153,7 +165,7 @@ export type TTSChunk = {
153
165
  /** Audio data chunk as Buffer */
154
166
  data: Buffer;
155
167
  /** Audio format of this chunk */
156
- format: AudioFormat;
168
+ format: TTSAudioFormat;
157
169
  /** Chunk sequence number (0-indexed) */
158
170
  index: number;
159
171
  /** Whether this is the final audio chunk */
@@ -11,6 +11,13 @@ export const VALID_AUDIO_FORMATS = [
11
11
  "wav",
12
12
  "ogg",
13
13
  "opus",
14
+ "m4a",
15
+ "flac",
16
+ "webm",
17
+ "mp4",
18
+ "mpeg",
19
+ "mpga",
20
+ "pcm16",
14
21
  ];
15
22
  /** Valid TTS quality levels as an array for runtime validation */
16
23
  export const VALID_TTS_QUALITIES = ["standard", "hd"];
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Voice and Speech Type Definitions for NeuroLink
3
+ *
4
+ * Core voice types: capabilities, provider config, audio utilities,
5
+ * events, and provider abstractions.
6
+ *
7
+ * STT types are in ./stt.ts
8
+ * Realtime types are in ./realtime.ts
9
+ * TTS types are in ./tts.ts
10
+ *
11
+ * @module types/voice
12
+ */
13
+ export * from "./tts.js";
14
+ export * from "./stt.js";
15
+ export * from "./realtime.js";
16
+ import type { TTSAudioFormat, TTSOptions, TTSResult, TTSVoice } from "./tts.js";
17
+ import type { TTSHandler } from "./common.js";
18
+ import type { STTResult, STTHandler } from "./stt.js";
19
+ import type { RealtimeHandler } from "./realtime.js";
20
+ /**
21
+ * Voice capability types supported by providers
22
+ */
23
+ export type VoiceCapability = "tts" | "stt" | "realtime" | "streaming";
24
+ /**
25
+ * Voice provider types
26
+ */
27
+ export type VoiceProviderType = "tts" | "stt" | "realtime";
28
+ /**
29
+ * Voice provider name union type
30
+ */
31
+ export type VoiceProviderName = "google-tts" | "elevenlabs" | "openai-tts" | "azure-tts" | "sarvam" | "murf" | "playai" | "speechify" | "cartesia" | "deepgram" | "gladia" | "whisper" | "assemblyai" | "google-stt" | "azure-stt" | "openai-realtime" | "gemini-live";
32
+ /**
33
+ * Base voice provider configuration
34
+ */
35
+ export type VoiceProviderConfig = {
36
+ /** Provider identifier */
37
+ name: string;
38
+ /** API key or credentials */
39
+ apiKey?: string;
40
+ /** Custom endpoint URL */
41
+ baseUrl?: string;
42
+ /** Request timeout in milliseconds */
43
+ timeout?: number;
44
+ /** Maximum retries for failed requests */
45
+ maxRetries?: number;
46
+ /** Provider-specific options */
47
+ options?: Record<string, unknown>;
48
+ };
49
+ /**
50
+ * Audio format details
51
+ */
52
+ export type AudioFormatDetails = {
53
+ /** Format name */
54
+ format: TTSAudioFormat;
55
+ /** MIME type */
56
+ mimeType: string;
57
+ /** File extension */
58
+ extension: string;
59
+ /** Whether format supports streaming */
60
+ supportsStreaming: boolean;
61
+ /** Typical sample rates */
62
+ sampleRates: number[];
63
+ /** Bit depths */
64
+ bitDepths: number[];
65
+ };
66
+ /**
67
+ * Audio conversion options
68
+ */
69
+ export type AudioConversionOptions = {
70
+ /** Target format */
71
+ targetFormat: TTSAudioFormat;
72
+ /** Target sample rate */
73
+ sampleRate?: number;
74
+ /** Target bit depth */
75
+ bitDepth?: number;
76
+ /** Number of channels */
77
+ channels?: number;
78
+ /** Normalize audio level */
79
+ normalize?: boolean;
80
+ };
81
+ /**
82
+ * Audio stream chunk for streaming operations
83
+ */
84
+ export type AudioStreamChunk = {
85
+ /** Audio data */
86
+ data: Buffer;
87
+ /** Chunk index */
88
+ index: number;
89
+ /** Whether this is the final chunk */
90
+ isFinal: boolean;
91
+ /** Audio format */
92
+ format: TTSAudioFormat;
93
+ /** Sample rate */
94
+ sampleRate: number;
95
+ /** Timestamp offset in milliseconds */
96
+ timestampMs: number;
97
+ /** Duration of this chunk in milliseconds */
98
+ durationMs: number;
99
+ };
100
+ /**
101
+ * Voice event types for event-driven architectures
102
+ */
103
+ export type VoiceEventType = "synthesis.started" | "synthesis.progress" | "synthesis.completed" | "synthesis.error" | "transcription.started" | "transcription.partial" | "transcription.completed" | "transcription.error" | "realtime.connected" | "realtime.audio.received" | "realtime.text.received" | "realtime.disconnected" | "realtime.error";
104
+ /**
105
+ * Voice event for event-driven operations
106
+ */
107
+ export type VoiceEvent<T = unknown> = {
108
+ type: VoiceEventType;
109
+ timestamp: Date;
110
+ provider: VoiceProviderName;
111
+ data: T;
112
+ metadata?: Record<string, unknown>;
113
+ };
114
+ /**
115
+ * Voice operation result union
116
+ */
117
+ export type VoiceResult = TTSResult | STTResult;
118
+ /**
119
+ * Voice conversation turn
120
+ */
121
+ export type VoiceTurn = {
122
+ role: "user" | "assistant";
123
+ text: string;
124
+ audio?: Buffer;
125
+ timestamp: Date;
126
+ metadata?: {
127
+ duration?: number;
128
+ confidence?: number;
129
+ language?: string;
130
+ provider?: string;
131
+ voice?: string;
132
+ [key: string]: unknown;
133
+ };
134
+ };
135
+ /**
136
+ * TTS-capable voice provider type
137
+ */
138
+ export type TTSProvider = {
139
+ /**
140
+ * Synthesize text to speech
141
+ */
142
+ synthesize(text: string, options: TTSOptions): Promise<TTSResult>;
143
+ /**
144
+ * Stream synthesized audio chunks
145
+ */
146
+ synthesizeStream?(text: string, options: TTSOptions): AsyncIterable<TTSStreamChunk>;
147
+ /**
148
+ * Get available voices
149
+ */
150
+ getVoices(languageCode?: string): Promise<TTSVoice[]>;
151
+ /**
152
+ * Maximum text length supported
153
+ */
154
+ readonly maxTextLength: number;
155
+ };
156
+ /**
157
+ * TTS stream chunk for streaming synthesis
158
+ */
159
+ export type TTSStreamChunk = {
160
+ /** Audio data chunk */
161
+ data: Buffer;
162
+ /** Chunk sequence number */
163
+ index: number;
164
+ /** Whether this is the final chunk */
165
+ isFinal: boolean;
166
+ /** Audio format */
167
+ format: string;
168
+ /** Sample rate */
169
+ sampleRate?: number;
170
+ /** Timestamp offset in audio (milliseconds) */
171
+ timestampMs?: number;
172
+ };
173
+ /**
174
+ * Voice error codes (general)
175
+ */
176
+ export declare const VOICE_ERROR_CODES: {
177
+ readonly PROVIDER_NOT_FOUND: "VOICE_PROVIDER_NOT_FOUND";
178
+ readonly INVALID_CONFIGURATION: "VOICE_INVALID_CONFIGURATION";
179
+ readonly INITIALIZATION_FAILED: "VOICE_INITIALIZATION_FAILED";
180
+ readonly OPERATION_CANCELLED: "VOICE_OPERATION_CANCELLED";
181
+ readonly PROVIDER_NOT_CONFIGURED: "VOICE_PROVIDER_NOT_CONFIGURED";
182
+ readonly PROVIDER_NOT_SUPPORTED: "VOICE_PROVIDER_NOT_SUPPORTED";
183
+ readonly FEATURE_NOT_SUPPORTED: "VOICE_FEATURE_NOT_SUPPORTED";
184
+ readonly TTS_EMPTY_TEXT: "VOICE_TTS_EMPTY_TEXT";
185
+ readonly TTS_TEXT_TOO_LONG: "VOICE_TTS_TEXT_TOO_LONG";
186
+ readonly TTS_SYNTHESIS_FAILED: "VOICE_TTS_SYNTHESIS_FAILED";
187
+ readonly STT_EMPTY_AUDIO: "VOICE_STT_EMPTY_AUDIO";
188
+ readonly STT_INVALID_FORMAT: "VOICE_STT_INVALID_FORMAT";
189
+ readonly STT_TRANSCRIPTION_FAILED: "VOICE_STT_TRANSCRIPTION_FAILED";
190
+ readonly REALTIME_CONNECTION_FAILED: "VOICE_REALTIME_CONNECTION_FAILED";
191
+ readonly REALTIME_SESSION_ERROR: "VOICE_REALTIME_SESSION_ERROR";
192
+ readonly NETWORK_ERROR: "VOICE_NETWORK_ERROR";
193
+ readonly TIMEOUT: "VOICE_TIMEOUT";
194
+ };
195
+ /**
196
+ * Supported audio formats with details
197
+ */
198
+ export declare const AUDIO_FORMAT_DETAILS: Partial<Record<TTSAudioFormat, AudioFormatDetails>>;
199
+ import type { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
200
+ export type VoiceErrorOptions = {
201
+ code: string;
202
+ message: string;
203
+ category?: ErrorCategory;
204
+ severity?: ErrorSeverity;
205
+ retriable?: boolean;
206
+ context?: Record<string, unknown>;
207
+ originalError?: Error;
208
+ provider?: string;
209
+ };
210
+ export type AudioMetadata = {
211
+ format: TTSAudioFormat;
212
+ duration: number;
213
+ sampleRate: number;
214
+ channels: number;
215
+ bitDepth: number;
216
+ samples: number;
217
+ size: number;
218
+ };
219
+ export type StreamHandlerConfig = {
220
+ chunkDurationMs?: number;
221
+ sampleRate?: number;
222
+ bytesPerSample?: number;
223
+ format?: TTSAudioFormat;
224
+ highWaterMark?: number;
225
+ bufferTimeoutMs?: number;
226
+ };
227
+ export type StreamEvents = {
228
+ chunk: (chunk: AudioStreamChunk) => void;
229
+ end: () => void;
230
+ error: (error: Error) => void;
231
+ drain: () => void;
232
+ pause: () => void;
233
+ resume: () => void;
234
+ };
235
+ export type VoiceHandler = TTSHandler | STTHandler | RealtimeHandler;
236
+ export type AzureTTSOptions = TTSOptions & {
237
+ useSSML?: boolean;
238
+ ssmlTemplate?: string;
239
+ outputFormat?: string;
240
+ wordBoundary?: boolean;
241
+ /**
242
+ * Pass `text` through as raw SSML when it begins with `<speak`.
243
+ *
244
+ * **Security:** raw SSML can change voice, embed external content, or
245
+ * inject markup. Only enable when `text` originates from a TRUSTED source
246
+ * (your own server-built template, not end-user input). When this flag
247
+ * is false (default), all input — including text starting with `<speak`
248
+ * — is XML-escaped, preventing SSML injection.
249
+ *
250
+ * @default false
251
+ */
252
+ allowRawSSML?: boolean;
253
+ };
254
+ export type ElevenLabsModel = "eleven_multilingual_v2" | "eleven_turbo_v2_5" | "eleven_turbo_v2" | "eleven_monolingual_v1";
255
+ export type ElevenLabsTTSOptions = TTSOptions & {
256
+ model?: ElevenLabsModel;
257
+ stability?: number;
258
+ similarityBoost?: number;
259
+ style?: number;
260
+ useSpeakerBoost?: boolean;
261
+ };
262
+ export type GoogleVoiceType = "Standard" | "WaveNet" | "Neural2" | "Studio" | "Polyglot";
263
+ export type GoogleTTSOptions = TTSOptions & {
264
+ voiceType?: GoogleVoiceType;
265
+ sampleRateHertz?: number;
266
+ effectsProfileId?: string[];
267
+ };
268
+ export type OpenAIVoice = "alloy" | "echo" | "fable" | "onyx" | "nova" | "shimmer";
269
+ export type OpenAITTSModel = "tts-1" | "tts-1-hd";
270
+ export type OpenAITTSOptions = TTSOptions & {
271
+ model?: OpenAITTSModel;
272
+ };
@@ -0,0 +1,137 @@
1
+ /**
2
+ * Voice and Speech Type Definitions for NeuroLink
3
+ *
4
+ * Core voice types: capabilities, provider config, audio utilities,
5
+ * events, and provider abstractions.
6
+ *
7
+ * STT types are in ./stt.ts
8
+ * Realtime types are in ./realtime.ts
9
+ * TTS types are in ./tts.ts
10
+ *
11
+ * @module types/voice
12
+ */
13
+ // Re-export all TTS types
14
+ export * from "./tts.js";
15
+ // Re-export all STT types
16
+ export * from "./stt.js";
17
+ // Re-export all Realtime types
18
+ export * from "./realtime.js";
19
+ // ============================================================================
20
+ // ERROR CODES
21
+ // ============================================================================
22
+ /**
23
+ * Voice error codes (general)
24
+ */
25
+ export const VOICE_ERROR_CODES = {
26
+ PROVIDER_NOT_FOUND: "VOICE_PROVIDER_NOT_FOUND",
27
+ INVALID_CONFIGURATION: "VOICE_INVALID_CONFIGURATION",
28
+ INITIALIZATION_FAILED: "VOICE_INITIALIZATION_FAILED",
29
+ OPERATION_CANCELLED: "VOICE_OPERATION_CANCELLED",
30
+ // General
31
+ PROVIDER_NOT_CONFIGURED: "VOICE_PROVIDER_NOT_CONFIGURED",
32
+ PROVIDER_NOT_SUPPORTED: "VOICE_PROVIDER_NOT_SUPPORTED",
33
+ FEATURE_NOT_SUPPORTED: "VOICE_FEATURE_NOT_SUPPORTED",
34
+ // TTS specific
35
+ TTS_EMPTY_TEXT: "VOICE_TTS_EMPTY_TEXT",
36
+ TTS_TEXT_TOO_LONG: "VOICE_TTS_TEXT_TOO_LONG",
37
+ TTS_SYNTHESIS_FAILED: "VOICE_TTS_SYNTHESIS_FAILED",
38
+ // STT specific
39
+ STT_EMPTY_AUDIO: "VOICE_STT_EMPTY_AUDIO",
40
+ STT_INVALID_FORMAT: "VOICE_STT_INVALID_FORMAT",
41
+ STT_TRANSCRIPTION_FAILED: "VOICE_STT_TRANSCRIPTION_FAILED",
42
+ // Realtime specific
43
+ REALTIME_CONNECTION_FAILED: "VOICE_REALTIME_CONNECTION_FAILED",
44
+ REALTIME_SESSION_ERROR: "VOICE_REALTIME_SESSION_ERROR",
45
+ // Network
46
+ NETWORK_ERROR: "VOICE_NETWORK_ERROR",
47
+ TIMEOUT: "VOICE_TIMEOUT",
48
+ };
49
+ // ============================================================================
50
+ // CONSTANTS
51
+ // ============================================================================
52
+ /**
53
+ * Supported audio formats with details
54
+ */
55
+ export const AUDIO_FORMAT_DETAILS = {
56
+ mp3: {
57
+ format: "mp3",
58
+ mimeType: "audio/mpeg",
59
+ extension: ".mp3",
60
+ supportsStreaming: true,
61
+ sampleRates: [8000, 16000, 22050, 24000, 44100, 48000],
62
+ bitDepths: [16],
63
+ },
64
+ wav: {
65
+ format: "wav",
66
+ mimeType: "audio/wav",
67
+ extension: ".wav",
68
+ supportsStreaming: false,
69
+ sampleRates: [8000, 16000, 22050, 24000, 44100, 48000],
70
+ bitDepths: [8, 16, 24, 32],
71
+ },
72
+ ogg: {
73
+ format: "ogg",
74
+ mimeType: "audio/ogg",
75
+ extension: ".ogg",
76
+ supportsStreaming: true,
77
+ sampleRates: [8000, 16000, 22050, 24000, 44100, 48000],
78
+ bitDepths: [16],
79
+ },
80
+ opus: {
81
+ format: "opus",
82
+ mimeType: "audio/opus",
83
+ extension: ".opus",
84
+ supportsStreaming: true,
85
+ sampleRates: [8000, 12000, 16000, 24000, 48000],
86
+ bitDepths: [16],
87
+ },
88
+ m4a: {
89
+ format: "m4a",
90
+ mimeType: "audio/mp4",
91
+ extension: ".m4a",
92
+ supportsStreaming: false,
93
+ sampleRates: [44100, 48000],
94
+ bitDepths: [16],
95
+ },
96
+ flac: {
97
+ format: "flac",
98
+ mimeType: "audio/flac",
99
+ extension: ".flac",
100
+ supportsStreaming: false,
101
+ sampleRates: [44100, 48000, 96000],
102
+ bitDepths: [16, 24],
103
+ },
104
+ webm: {
105
+ format: "webm",
106
+ mimeType: "audio/webm",
107
+ extension: ".webm",
108
+ supportsStreaming: true,
109
+ sampleRates: [44100, 48000],
110
+ bitDepths: [16],
111
+ },
112
+ mp4: {
113
+ format: "mp4",
114
+ mimeType: "audio/mp4",
115
+ extension: ".mp4",
116
+ supportsStreaming: false,
117
+ sampleRates: [44100, 48000],
118
+ bitDepths: [16],
119
+ },
120
+ mpeg: {
121
+ format: "mpeg",
122
+ mimeType: "audio/mpeg",
123
+ extension: ".mpeg",
124
+ supportsStreaming: true,
125
+ sampleRates: [8000, 16000, 22050, 24000, 44100, 48000],
126
+ bitDepths: [16],
127
+ },
128
+ mpga: {
129
+ format: "mpga",
130
+ mimeType: "audio/mpeg",
131
+ extension: ".mpga",
132
+ supportsStreaming: true,
133
+ sampleRates: [8000, 16000, 22050, 24000, 44100, 48000],
134
+ bitDepths: [16],
135
+ },
136
+ };
137
+ //# sourceMappingURL=voice.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Shared utility: infer a `TTSAudioFormat` from a file path.
3
+ *
4
+ * Used by the CLI generate/stream handlers (m2) to set `stt.format` so
5
+ * `STTProcessor.transcribe()` can fail fast on incompatible provider/format
6
+ * combinations (e.g. MP3 to azure-stt). Pulled into a single helper to
7
+ * avoid duplicating the 11-element format list across two CLI handlers.
8
+ */
9
+ import type { TTSAudioFormat } from "../types/index.js";
10
+ /**
11
+ * Returns the `TTSAudioFormat` that matches the file extension of `path`,
12
+ * or `undefined` when the path is missing or its extension isn't a known
13
+ * audio format. The check is case-insensitive.
14
+ */
15
+ export declare function inferAudioFormatFromPath(path: string | undefined): TTSAudioFormat | undefined;
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Shared utility: infer a `TTSAudioFormat` from a file path.
3
+ *
4
+ * Used by the CLI generate/stream handlers (m2) to set `stt.format` so
5
+ * `STTProcessor.transcribe()` can fail fast on incompatible provider/format
6
+ * combinations (e.g. MP3 to azure-stt). Pulled into a single helper to
7
+ * avoid duplicating the 11-element format list across two CLI handlers.
8
+ */
9
+ const VALID_FORMATS = [
10
+ "mp3",
11
+ "wav",
12
+ "ogg",
13
+ "opus",
14
+ "m4a",
15
+ "flac",
16
+ "webm",
17
+ "mp4",
18
+ "mpeg",
19
+ "mpga",
20
+ "pcm16",
21
+ ];
22
+ /**
23
+ * Returns the `TTSAudioFormat` that matches the file extension of `path`,
24
+ * or `undefined` when the path is missing or its extension isn't a known
25
+ * audio format. The check is case-insensitive.
26
+ */
27
+ export function inferAudioFormatFromPath(path) {
28
+ if (!path) {
29
+ return undefined;
30
+ }
31
+ const ext = path.toLowerCase().split(".").pop();
32
+ return ext && VALID_FORMATS.includes(ext) ? ext : undefined;
33
+ }
34
+ //# sourceMappingURL=audioFormatDetector.js.map
@@ -5,6 +5,7 @@
5
5
  import { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
6
6
  import { logger } from "./logger.js";
7
7
  import { CircuitBreakerOpenError } from "../types/index.js";
8
+ import { HITLTimeoutError } from "../hitl/hitlErrors.js";
8
9
  // Error codes for different scenarios
9
10
  export const ERROR_CODES = {
10
11
  // Tool errors
@@ -950,6 +951,9 @@ export function isRetriableError(error) {
950
951
  if (error instanceof NeuroLinkError) {
951
952
  return error.retriable;
952
953
  }
954
+ if (error instanceof HITLTimeoutError) {
955
+ return false;
956
+ }
953
957
  // Check for common retriable error patterns
954
958
  const retriablePatterns = [
955
959
  /timeout/i,
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Speech-to-Text (STT) Processing Utility
3
+ *
4
+ * Central orchestrator for all STT operations across providers.
5
+ * Manages provider-specific STT handlers and audio transcription.
6
+ *
7
+ * @module utils/sttProcessor
8
+ */
9
+ import type { STTOptions, STTResult, STTHandler } from "../types/index.js";
10
+ /**
11
+ * STT processor class for orchestrating speech-to-text operations
12
+ *
13
+ * Follows the same pattern as TTSProcessor, CSVProcessor, ImageProcessor, and PDFProcessor.
14
+ * Provides a unified interface for STT transcription across multiple providers.
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * // Register a handler
19
+ * STTProcessor.registerHandler('whisper', whisperHandler);
20
+ *
21
+ * // Check if provider is supported
22
+ * if (STTProcessor.supports('whisper')) {
23
+ * // Provider is registered
24
+ * }
25
+ * ```
26
+ */
27
+ export declare class STTProcessor {
28
+ /**
29
+ * Handler registry mapping provider names to STT handlers
30
+ * Uses Map for O(1) lookups and better type safety
31
+ *
32
+ * @private
33
+ */
34
+ private static readonly handlers;
35
+ /**
36
+ * Default maximum audio duration for STT transcription (in seconds)
37
+ *
38
+ * Providers can override this value by specifying the `maxAudioDuration` property
39
+ * in their respective `STTHandler` implementation. If not specified, this default
40
+ * value will be used (5 minutes).
41
+ *
42
+ * @private
43
+ */
44
+ private static readonly DEFAULT_MAX_AUDIO_DURATION;
45
+ /**
46
+ * Register an STT handler for a specific provider
47
+ *
48
+ * Allows providers to register their STT implementation at runtime.
49
+ *
50
+ * @param providerName - Provider identifier (e.g., 'whisper', 'deepgram')
51
+ * @param handler - STT handler implementation
52
+ *
53
+ * @example
54
+ * ```typescript
55
+ * const whisperHandler: STTHandler = {
56
+ * transcribe: async (audio, options) => { ... },
57
+ * getSupportedFormats: () => ["mp3", "wav"],
58
+ * isConfigured: () => true
59
+ * };
60
+ *
61
+ * STTProcessor.registerHandler('whisper', whisperHandler);
62
+ * ```
63
+ */
64
+ static registerHandler(providerName: string, handler: STTHandler): void;
65
+ /**
66
+ * Get a registered STT handler by provider name
67
+ *
68
+ * @private
69
+ * @param providerName - Provider identifier
70
+ * @returns Handler instance or undefined if not registered
71
+ */
72
+ private static getHandler;
73
+ /**
74
+ * Check if a provider is supported (has a registered STT handler)
75
+ *
76
+ * @param providerName - Provider identifier
77
+ * @returns True if handler is registered
78
+ *
79
+ * @example
80
+ * ```typescript
81
+ * if (STTProcessor.supports('whisper')) {
82
+ * console.log('Whisper STT is supported');
83
+ * }
84
+ * ```
85
+ */
86
+ static supports(providerName: string): boolean;
87
+ /**
88
+ * Transcribe audio to text using a registered STT provider
89
+ *
90
+ * Orchestrates the speech-to-text transcription process:
91
+ * 1. Validates audio input (non-empty)
92
+ * 2. Looks up the provider handler
93
+ * 3. Verifies provider configuration
94
+ * 4. Delegates transcription to the provider
95
+ * 5. Enriches result with provider metadata
96
+ *
97
+ * @param audio - Audio data as Buffer or ArrayBuffer
98
+ * @param provider - Provider identifier
99
+ * @param options - STT configuration options
100
+ * @returns Transcription result with text and metadata
101
+ * @throws STTError if validation fails or provider not supported/configured
102
+ *
103
+ * @example
104
+ * ```typescript
105
+ * const result = await STTProcessor.transcribe(audioBuffer, "whisper", {
106
+ * language: "en-US",
107
+ * punctuation: true,
108
+ * });
109
+ *
110
+ * console.log(`Transcription: ${result.text}`);
111
+ * console.log(`Confidence: ${result.confidence}`);
112
+ * ```
113
+ */
114
+ static transcribe(audio: Buffer | ArrayBuffer, provider: string, options: STTOptions): Promise<STTResult>;
115
+ }