@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
@@ -0,0 +1,412 @@
1
+ /**
2
+ * OpenAI Realtime Voice API Handler
3
+ *
4
+ * Implementation of bidirectional voice communication using OpenAI's Realtime API.
5
+ *
6
+ * @module voice/providers/OpenAIRealtime
7
+ */
8
+ import { logger } from "../../utils/logger.js";
9
+ import { RealtimeError } from "../errors.js";
10
+ import { BaseRealtimeHandler } from "../RealtimeVoiceAPI.js";
11
+ /**
12
+ * OpenAI Realtime API Handler
13
+ *
14
+ * Implements bidirectional voice communication with OpenAI's Realtime API.
15
+ *
16
+ * @see https://platform.openai.com/docs/api-reference/realtime
17
+ */
18
+ export class OpenAIRealtime extends BaseRealtimeHandler {
19
+ name = "openai-realtime";
20
+ apiKey;
21
+ ws = null;
22
+ audioChunkIndex = 0;
23
+ constructor(apiKey) {
24
+ super();
25
+ // Match the trim+null-coerce pattern used by sibling providers
26
+ // (OpenAITTS/AzureSTT/AzureTTS/ElevenLabsTTS/GeminiLive/OpenAISTT/GoogleSTT)
27
+ // so empty/whitespace `OPENAI_API_KEY=""` surfaces as PROVIDER_NOT_CONFIGURED
28
+ // instead of a downstream 401, and `isConfigured()` agrees with `connect()`.
29
+ const resolvedKey = (apiKey ?? process.env.OPENAI_API_KEY ?? "").trim();
30
+ this.apiKey = resolvedKey.length > 0 ? resolvedKey : null;
31
+ }
32
+ isConfigured() {
33
+ return this.apiKey !== null;
34
+ }
35
+ getSupportedFormats() {
36
+ // Session uses pcm16 for both input_audio_format and output_audio_format,
37
+ // and audio-delta/audio-done chunks are tagged as `format: "pcm16"`. The
38
+ // advertised list must match what's actually emitted.
39
+ return ["pcm16"];
40
+ }
41
+ async connect(config) {
42
+ if (!this.apiKey) {
43
+ throw RealtimeError.providerNotConfigured("openai-realtime");
44
+ }
45
+ if (this.isConnected()) {
46
+ throw RealtimeError.sessionAlreadyActive("openai-realtime");
47
+ }
48
+ this.emitStateChange("connecting");
49
+ try {
50
+ // Import WebSocket
51
+ const { default: WebSocket } = await import("ws");
52
+ // Determine model
53
+ const model = config.model ?? "gpt-4o-realtime-preview-2024-12-17";
54
+ // Connect to OpenAI Realtime API
55
+ const wsUrl = `wss://api.openai.com/v1/realtime?model=${model}`;
56
+ this.ws = new WebSocket(wsUrl, {
57
+ headers: {
58
+ Authorization: `Bearer ${this.apiKey}`,
59
+ "OpenAI-Beta": "realtime=v1",
60
+ },
61
+ });
62
+ // Wait for connection. Capture a local reference so the closure
63
+ // doesn't need a non-null assertion on `this.ws` (Issue 9).
64
+ const ws = this.ws;
65
+ await new Promise((resolve, reject) => {
66
+ const timeout = setTimeout(() => {
67
+ reject(new Error("Connection timeout"));
68
+ }, config.timeout ?? 30000);
69
+ ws.on("open", () => {
70
+ clearTimeout(timeout);
71
+ resolve();
72
+ });
73
+ ws.on("error", (err) => {
74
+ clearTimeout(timeout);
75
+ reject(err);
76
+ });
77
+ });
78
+ // Set up message handler
79
+ this.ws.on("message", (data) => {
80
+ this.handleMessage(data);
81
+ });
82
+ this.ws.on("close", () => {
83
+ this.emitStateChange("disconnected");
84
+ this.session = null;
85
+ });
86
+ this.ws.on("error", (err) => {
87
+ this.emitError(err);
88
+ });
89
+ // Send session update with configuration
90
+ await this.sendSessionUpdate(config);
91
+ // Wait for session.created event
92
+ const sessionId = await this.waitForSessionCreated();
93
+ // Create session object
94
+ this.session = this.createSession(sessionId, config);
95
+ this.emitStateChange("connected");
96
+ logger.info(`[OpenAIRealtimeHandler] Connected to session: ${sessionId}`);
97
+ return this.session;
98
+ }
99
+ catch (err) {
100
+ this.emitStateChange("error");
101
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
102
+ throw RealtimeError.connectionFailed(errorMessage, "openai-realtime", err instanceof Error ? err : undefined);
103
+ }
104
+ }
105
+ async disconnect() {
106
+ if (!this.ws) {
107
+ return;
108
+ }
109
+ this.emitStateChange("disconnecting");
110
+ try {
111
+ this.ws.close();
112
+ this.ws = null;
113
+ this.session = null;
114
+ this.audioChunkIndex = 0;
115
+ this.emitStateChange("disconnected");
116
+ logger.info("[OpenAIRealtimeHandler] Disconnected");
117
+ }
118
+ catch (err) {
119
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
120
+ throw RealtimeError.protocolError(`Disconnect failed: ${errorMessage}`, "openai-realtime", err instanceof Error ? err : undefined);
121
+ }
122
+ }
123
+ async sendAudio(audio) {
124
+ if (!this.ws || !this.isConnected()) {
125
+ throw RealtimeError.sessionNotActive("openai-realtime");
126
+ }
127
+ const audioBuffer = Buffer.isBuffer(audio) ? audio : audio.data;
128
+ // Send audio append event
129
+ const event = {
130
+ type: "input_audio_buffer.append",
131
+ audio: audioBuffer.toString("base64"),
132
+ };
133
+ this.ws.send(JSON.stringify(event));
134
+ }
135
+ async sendText(text) {
136
+ if (!this.ws || !this.isConnected()) {
137
+ throw RealtimeError.sessionNotActive("openai-realtime");
138
+ }
139
+ // Send conversation item create event
140
+ const event = {
141
+ type: "conversation.item.create",
142
+ item: {
143
+ type: "message",
144
+ role: "user",
145
+ content: [
146
+ {
147
+ type: "input_text",
148
+ text,
149
+ },
150
+ ],
151
+ },
152
+ };
153
+ this.ws.send(JSON.stringify(event));
154
+ // Trigger response
155
+ await this.triggerResponse();
156
+ }
157
+ async triggerResponse() {
158
+ if (!this.ws || !this.isConnected()) {
159
+ throw RealtimeError.sessionNotActive("openai-realtime");
160
+ }
161
+ // Commit audio buffer
162
+ this.ws.send(JSON.stringify({
163
+ type: "input_audio_buffer.commit",
164
+ }));
165
+ // Create response
166
+ this.ws.send(JSON.stringify({
167
+ type: "response.create",
168
+ }));
169
+ }
170
+ async cancelResponse() {
171
+ if (!this.ws || !this.isConnected()) {
172
+ return;
173
+ }
174
+ this.ws.send(JSON.stringify({
175
+ type: "response.cancel",
176
+ }));
177
+ }
178
+ /**
179
+ * Send session update with configuration
180
+ */
181
+ async sendSessionUpdate(config) {
182
+ if (!this.ws) {
183
+ return;
184
+ }
185
+ const sessionConfig = {
186
+ modalities: ["text", "audio"],
187
+ input_audio_format: "pcm16",
188
+ output_audio_format: "pcm16",
189
+ input_audio_transcription: {
190
+ model: "whisper-1",
191
+ },
192
+ };
193
+ // Add voice if specified
194
+ if (config.voice) {
195
+ sessionConfig.voice = config.voice;
196
+ }
197
+ // Add turn detection
198
+ if (config.turnDetection) {
199
+ sessionConfig.turn_detection = {
200
+ type: config.turnDetection,
201
+ threshold: config.vadThreshold ?? 0.5,
202
+ prefix_padding_ms: 300,
203
+ silence_duration_ms: 500,
204
+ };
205
+ }
206
+ // Add system prompt
207
+ if (config.systemPrompt) {
208
+ sessionConfig.instructions = config.systemPrompt;
209
+ }
210
+ // Add tools
211
+ if (config.tools && config.tools.length > 0) {
212
+ sessionConfig.tools = config.tools.map((tool) => ({
213
+ type: "function",
214
+ name: tool.name,
215
+ description: tool.description,
216
+ parameters: tool.parameters,
217
+ }));
218
+ }
219
+ const event = {
220
+ type: "session.update",
221
+ session: sessionConfig,
222
+ };
223
+ this.ws.send(JSON.stringify(event));
224
+ }
225
+ /**
226
+ * Wait for session.created event
227
+ */
228
+ waitForSessionCreated() {
229
+ return new Promise((resolve, reject) => {
230
+ const handler = (data) => {
231
+ try {
232
+ const event = JSON.parse(data.toString());
233
+ if (event.type === "session.created") {
234
+ clearTimeout(timeout);
235
+ this.ws?.off("message", handler);
236
+ const sessionEvent = event;
237
+ resolve(sessionEvent.session.id);
238
+ }
239
+ else if (event.type === "error") {
240
+ clearTimeout(timeout);
241
+ this.ws?.off("message", handler);
242
+ reject(new Error(event.error?.message ??
243
+ "Unknown error"));
244
+ }
245
+ }
246
+ catch {
247
+ // Ignore parse errors
248
+ }
249
+ };
250
+ const timeout = setTimeout(() => {
251
+ // M1: detach the message handler before rejecting so subsequent
252
+ // OpenAI Realtime messages don't invoke a dangling handler for the
253
+ // connection lifetime. (The success and event-error paths above
254
+ // already off-detach; only the timeout path was leaking.)
255
+ this.ws?.off("message", handler);
256
+ reject(new Error("Timeout waiting for session.created"));
257
+ }, 10000);
258
+ this.ws?.on("message", handler);
259
+ });
260
+ }
261
+ /**
262
+ * Handle incoming WebSocket messages
263
+ */
264
+ handleMessage(data) {
265
+ try {
266
+ const event = JSON.parse(data.toString());
267
+ switch (event.type) {
268
+ case "response.audio.delta": {
269
+ const audioEvent = event;
270
+ const audioData = Buffer.from(audioEvent.delta, "base64");
271
+ this.emitAudio({
272
+ data: audioData,
273
+ index: this.audioChunkIndex++,
274
+ isFinal: false,
275
+ // M7: session is configured with output_audio_format "pcm16",
276
+ // so OpenAI sends raw 16-bit PCM, not WAV-headered bytes.
277
+ // Tagging as "pcm16" prevents downstream consumers (e.g.
278
+ // calculateWavDuration) from mis-parsing the buffer as RIFF/WAV.
279
+ format: "pcm16",
280
+ sampleRate: 24000,
281
+ });
282
+ break;
283
+ }
284
+ case "response.audio.done": {
285
+ // Audio stream complete
286
+ this.emitAudio({
287
+ data: Buffer.alloc(0),
288
+ index: this.audioChunkIndex++,
289
+ isFinal: true,
290
+ // M7: session is configured with output_audio_format "pcm16",
291
+ // so OpenAI sends raw 16-bit PCM, not WAV-headered bytes.
292
+ // Tagging as "pcm16" prevents downstream consumers (e.g.
293
+ // calculateWavDuration) from mis-parsing the buffer as RIFF/WAV.
294
+ format: "pcm16",
295
+ sampleRate: 24000,
296
+ });
297
+ break;
298
+ }
299
+ case "response.audio_transcript.delta": {
300
+ const transcriptEvent = event;
301
+ if (transcriptEvent.delta) {
302
+ this.emitText(transcriptEvent.delta, false);
303
+ }
304
+ break;
305
+ }
306
+ case "response.audio_transcript.done": {
307
+ // Final transcript
308
+ const finalEvent = event;
309
+ if (finalEvent.transcript) {
310
+ this.emitText(finalEvent.transcript, true);
311
+ }
312
+ break;
313
+ }
314
+ case "conversation.item.input_audio_transcription.completed": {
315
+ const transcriptEvent = event;
316
+ if (transcriptEvent.transcript) {
317
+ this.emitTranscript(transcriptEvent.transcript, true);
318
+ }
319
+ break;
320
+ }
321
+ case "response.function_call_arguments.done": {
322
+ const funcEvent = event;
323
+ if (funcEvent.name && funcEvent.call_id && funcEvent.arguments) {
324
+ try {
325
+ const args = JSON.parse(funcEvent.arguments);
326
+ // NEW6: defense-in-depth. handleFunctionCall already wraps its
327
+ // body in try/catch, so the inner path is covered today. This
328
+ // outer .catch is here to ensure any future un-caught path
329
+ // (e.g. a refactor that drops the inner catch, or `logger.error`
330
+ // itself throwing inside that catch) doesn't crash the process
331
+ // or hang the session via an unhandled-rejection. Issue 5.
332
+ void this.handleFunctionCall(funcEvent.name, args, funcEvent.call_id).catch((err) => {
333
+ logger.error(`[OpenAIRealtimeHandler] handleFunctionCall failed: ${err instanceof Error ? err.message : String(err)}`);
334
+ });
335
+ }
336
+ catch {
337
+ logger.warn("[OpenAIRealtimeHandler] Failed to parse function arguments");
338
+ }
339
+ }
340
+ break;
341
+ }
342
+ case "response.done": {
343
+ this.emitTurnEnd();
344
+ this.audioChunkIndex = 0;
345
+ break;
346
+ }
347
+ case "input_audio_buffer.speech_started": {
348
+ this.emitTurnStart();
349
+ break;
350
+ }
351
+ case "error": {
352
+ const errorEvent = event;
353
+ const errorMessage = errorEvent.error?.message ?? "Unknown error";
354
+ this.emitError(new Error(errorMessage));
355
+ break;
356
+ }
357
+ default:
358
+ // Log unhandled events at debug level
359
+ logger.debug(`[OpenAIRealtimeHandler] Unhandled event: ${event.type}`);
360
+ }
361
+ }
362
+ catch (err) {
363
+ logger.warn(`[OpenAIRealtimeHandler] Failed to parse message: ${err instanceof Error ? err.message : String(err)}`);
364
+ }
365
+ }
366
+ /**
367
+ * Handle function call from model
368
+ */
369
+ async handleFunctionCall(name, args, callId) {
370
+ try {
371
+ const result = await this.emitFunctionCall(name, args);
372
+ // Send function result back
373
+ if (this.ws && this.isConnected()) {
374
+ this.ws.send(JSON.stringify({
375
+ type: "conversation.item.create",
376
+ item: {
377
+ type: "function_call_output",
378
+ call_id: callId,
379
+ output: JSON.stringify(result),
380
+ },
381
+ }));
382
+ // Trigger response with function result
383
+ await this.triggerResponse();
384
+ }
385
+ }
386
+ catch (err) {
387
+ const errMessage = err instanceof Error ? err.message : String(err);
388
+ logger.error(`[OpenAIRealtimeHandler] Function call failed: ${errMessage}`);
389
+ // M6: send a function_call_output with the error so OpenAI Realtime
390
+ // can resume the turn. Without this the session stalls indefinitely
391
+ // (the model waits for a function result before continuing) and the
392
+ // user hears silence.
393
+ if (this.ws && this.isConnected()) {
394
+ try {
395
+ this.ws.send(JSON.stringify({
396
+ type: "conversation.item.create",
397
+ item: {
398
+ type: "function_call_output",
399
+ call_id: callId,
400
+ output: JSON.stringify({ error: errMessage }),
401
+ },
402
+ }));
403
+ await this.triggerResponse();
404
+ }
405
+ catch (sendErr) {
406
+ logger.error(`[OpenAIRealtimeHandler] Failed to send error result for ${callId}: ${sendErr instanceof Error ? sendErr.message : String(sendErr)}`);
407
+ }
408
+ }
409
+ }
410
+ }
411
+ }
412
+ //# sourceMappingURL=OpenAIRealtime.js.map
@@ -0,0 +1,41 @@
1
+ /**
2
+ * OpenAI Whisper Speech-to-Text Handler
3
+ *
4
+ * Implementation of STT using OpenAI's Whisper model.
5
+ *
6
+ * @module voice/providers/OpenAISTT
7
+ */
8
+ import type { TTSAudioFormat, STTHandler, STTLanguage, STTOptions, STTResult } from "../../types/index.js";
9
+ /**
10
+ * OpenAI Whisper Speech-to-Text Handler
11
+ *
12
+ * Supports transcription and translation using OpenAI's Whisper model.
13
+ *
14
+ * @see https://platform.openai.com/docs/api-reference/audio
15
+ */
16
+ export declare class OpenAISTT implements STTHandler {
17
+ private readonly apiKey;
18
+ private readonly baseUrl;
19
+ /**
20
+ * Maximum audio duration in seconds (25 minutes)
21
+ */
22
+ readonly maxAudioDuration: number;
23
+ /**
24
+ * Whisper does not support streaming
25
+ */
26
+ readonly supportsStreaming = false;
27
+ constructor(apiKey?: string);
28
+ isConfigured(): boolean;
29
+ getSupportedFormats(): TTSAudioFormat[];
30
+ getSupportedLanguages(): Promise<STTLanguage[]>;
31
+ transcribe(audio: Buffer | ArrayBuffer, options?: STTOptions): Promise<STTResult>;
32
+ /**
33
+ * Get MIME type for audio format. Whisper auto-detects from headers, but
34
+ * sending a correct MIME helps providers / proxies that sniff Content-Type.
35
+ * Must stay aligned with `getSupportedFormats()`.
36
+ */
37
+ private getMimeType;
38
+ }
39
+ export { OpenAISTT as WhisperSTT };
40
+ export { OpenAISTT as WhisperSTTHandler };
41
+ export { OpenAISTT as OpenAISTTHandler };