voice-router-dev 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -210,24 +210,165 @@ function createVoiceRouter(config, adapters) {
210
210
  return router;
211
211
  }
212
212
 
213
+ // src/generated/deepgram/schema/listenV1EncodingParameter.ts
214
+ var ListenV1EncodingParameter = {
215
+ linear16: "linear16",
216
+ flac: "flac",
217
+ mulaw: "mulaw",
218
+ opus: "opus",
219
+ speex: "speex",
220
+ g729: "g729"
221
+ };
222
+
223
+ // src/generated/deepgram/schema/speakV1EncodingParameter.ts
224
+ var SpeakV1EncodingParameter = {
225
+ linear16: "linear16",
226
+ aac: "aac",
227
+ opus: "opus",
228
+ mp3: "mp3",
229
+ flac: "flac",
230
+ mulaw: "mulaw",
231
+ alaw: "alaw"
232
+ };
233
+
234
+ // src/generated/deepgram/schema/speakV1ContainerParameter.ts
235
+ var SpeakV1ContainerParameter = {
236
+ none: "none",
237
+ wav: "wav",
238
+ ogg: "ogg"
239
+ };
240
+
241
+ // src/generated/deepgram/schema/speakV1SampleRateParameter.ts
242
+ var SpeakV1SampleRateParameter = {
243
+ NUMBER_16000: 16e3,
244
+ NUMBER_24000: 24e3,
245
+ NUMBER_32000: 32e3,
246
+ NUMBER_48000: 48e3,
247
+ null: null,
248
+ NUMBER_8000: 8e3,
249
+ NUMBER_22050: 22050
250
+ };
251
+
252
+ // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
253
+ var StreamingSupportedEncodingEnum = {
254
+ "wav/pcm": "wav/pcm",
255
+ "wav/alaw": "wav/alaw",
256
+ "wav/ulaw": "wav/ulaw"
257
+ };
258
+
259
+ // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
260
+ var StreamingSupportedSampleRateEnum = {
261
+ NUMBER_8000: 8e3,
262
+ NUMBER_16000: 16e3,
263
+ NUMBER_32000: 32e3,
264
+ NUMBER_44100: 44100,
265
+ NUMBER_48000: 48e3
266
+ };
267
+
268
+ // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
269
+ var StreamingSupportedBitDepthEnum = {
270
+ NUMBER_8: 8,
271
+ NUMBER_16: 16,
272
+ NUMBER_24: 24,
273
+ NUMBER_32: 32
274
+ };
275
+
276
+ // src/constants/defaults.ts
277
+ var DEFAULT_TIMEOUTS = {
278
+ /** Standard HTTP request timeout for API calls (60 seconds) */
279
+ HTTP_REQUEST: 6e4,
280
+ /** Audio processing timeout for long audio files (120 seconds) */
281
+ AUDIO_PROCESSING: 12e4,
282
+ /** WebSocket connection establishment timeout (10 seconds) */
283
+ WS_CONNECTION: 1e4,
284
+ /** WebSocket graceful close timeout (5 seconds) */
285
+ WS_CLOSE: 5e3
286
+ };
287
+ var DEFAULT_POLLING = {
288
+ /** Maximum number of polling attempts before timing out */
289
+ MAX_ATTEMPTS: 60,
290
+ /** Standard interval between polling attempts (2 seconds) */
291
+ INTERVAL_MS: 2e3,
292
+ /** Slower interval for long-running jobs (3 seconds) */
293
+ SLOW_INTERVAL_MS: 3e3
294
+ };
295
+
296
+ // src/utils/errors.ts
297
+ var ERROR_CODES = {
298
+ /** Failed to parse API response or WebSocket message */
299
+ PARSE_ERROR: "PARSE_ERROR",
300
+ /** WebSocket connection error */
301
+ WEBSOCKET_ERROR: "WEBSOCKET_ERROR",
302
+ /** Async transcription job did not complete within timeout */
303
+ POLLING_TIMEOUT: "POLLING_TIMEOUT",
304
+ /** Transcription processing failed on provider side */
305
+ TRANSCRIPTION_ERROR: "TRANSCRIPTION_ERROR",
306
+ /** Connection attempt timed out */
307
+ CONNECTION_TIMEOUT: "CONNECTION_TIMEOUT",
308
+ /** Invalid input provided to API */
309
+ INVALID_INPUT: "INVALID_INPUT",
310
+ /** Requested operation not supported by provider */
311
+ NOT_SUPPORTED: "NOT_SUPPORTED",
312
+ /** No transcription results available */
313
+ NO_RESULTS: "NO_RESULTS",
314
+ /** Unspecified or unknown error */
315
+ UNKNOWN_ERROR: "UNKNOWN_ERROR"
316
+ };
317
+ var ERROR_MESSAGES = {
318
+ PARSE_ERROR: "Failed to parse response data",
319
+ WEBSOCKET_ERROR: "WebSocket connection error",
320
+ POLLING_TIMEOUT: "Transcription did not complete within timeout period",
321
+ TRANSCRIPTION_ERROR: "Transcription processing failed",
322
+ CONNECTION_TIMEOUT: "Connection attempt timed out",
323
+ INVALID_INPUT: "Invalid input provided",
324
+ NOT_SUPPORTED: "Operation not supported by this provider",
325
+ NO_RESULTS: "No transcription results available",
326
+ UNKNOWN_ERROR: "An unknown error occurred"
327
+ };
328
+ function createError(code, customMessage, details) {
329
+ return {
330
+ code,
331
+ message: customMessage || ERROR_MESSAGES[code],
332
+ details
333
+ };
334
+ }
335
+
213
336
  // src/adapters/base-adapter.ts
214
337
  var BaseAdapter = class {
215
338
  initialize(config) {
216
339
  this.config = config;
217
340
  }
218
341
  /**
219
- * Helper method to create error responses
342
+ * Helper method to create error responses with stack traces
343
+ *
344
+ * @param error - Error object or unknown error
345
+ * @param statusCode - Optional HTTP status code
346
+ * @param code - Optional error code (defaults to extracted or UNKNOWN_ERROR)
220
347
  */
221
- createErrorResponse(error, statusCode) {
348
+ createErrorResponse(error, statusCode, code) {
222
349
  const err = error;
350
+ const httpStatus = statusCode || err.statusCode || err.response?.status;
351
+ const httpStatusText = err.response?.statusText;
352
+ const responseData = err.response?.data;
223
353
  return {
224
354
  success: false,
225
355
  provider: this.name,
226
356
  error: {
227
- code: err.code || "UNKNOWN_ERROR",
357
+ code: code || err.code || ERROR_CODES.UNKNOWN_ERROR,
228
358
  message: err.message || "An unknown error occurred",
229
- statusCode: statusCode || err.statusCode,
230
- details: error
359
+ statusCode: httpStatus,
360
+ details: {
361
+ // Include full error object
362
+ error,
363
+ // Include stack trace if available
364
+ stack: err.stack,
365
+ // Include HTTP response details
366
+ httpStatus,
367
+ httpStatusText,
368
+ responseData,
369
+ // Include provider name for debugging
370
+ provider: this.name
371
+ }
231
372
  }
232
373
  };
233
374
  }
@@ -242,6 +383,64 @@ var BaseAdapter = class {
242
383
  throw new Error(`API key is required for ${this.name} provider`);
243
384
  }
244
385
  }
386
+ /**
387
+ * Build axios config for generated API client functions
388
+ *
389
+ * @param authHeaderName - Header name for API key (e.g., "Authorization", "x-gladia-key")
390
+ * @param authHeaderValue - Optional function to format auth header value (defaults to raw API key)
391
+ * @returns Axios config object
392
+ */
393
+ getAxiosConfig(authHeaderName = "Authorization", authHeaderValue) {
394
+ this.validateConfig();
395
+ const authValue = authHeaderValue ? authHeaderValue(this.config.apiKey) : this.config.apiKey;
396
+ return {
397
+ baseURL: this.config.baseUrl || this.baseUrl,
398
+ timeout: this.config.timeout || DEFAULT_TIMEOUTS.HTTP_REQUEST,
399
+ headers: {
400
+ [authHeaderName]: authValue,
401
+ "Content-Type": "application/json",
402
+ ...this.config.headers
403
+ }
404
+ };
405
+ }
406
+ /**
407
+ * Generic polling helper for async transcription jobs
408
+ *
409
+ * Polls getTranscript() until job completes or times out.
410
+ *
411
+ * @param transcriptId - Job/transcript ID to poll
412
+ * @param options - Polling configuration
413
+ * @returns Final transcription result
414
+ */
415
+ async pollForCompletion(transcriptId, options) {
416
+ const { maxAttempts = DEFAULT_POLLING.MAX_ATTEMPTS, intervalMs = DEFAULT_POLLING.INTERVAL_MS } = options || {};
417
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
418
+ const result = await this.getTranscript(transcriptId);
419
+ if (!result.success) {
420
+ return result;
421
+ }
422
+ const status = result.data?.status;
423
+ if (status === "completed") {
424
+ return result;
425
+ }
426
+ if (status === "error") {
427
+ return this.createErrorResponse(
428
+ new Error("Transcription failed"),
429
+ void 0,
430
+ ERROR_CODES.TRANSCRIPTION_ERROR
431
+ );
432
+ }
433
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
434
+ }
435
+ return {
436
+ success: false,
437
+ provider: this.name,
438
+ error: {
439
+ code: ERROR_CODES.POLLING_TIMEOUT,
440
+ message: `Transcription did not complete after ${maxAttempts} attempts`
441
+ }
442
+ };
443
+ }
245
444
  };
246
445
 
247
446
  // src/adapters/gladia-adapter.ts
@@ -288,6 +487,143 @@ function mapEncodingToProvider(unifiedEncoding, provider) {
288
487
  return providerEncoding;
289
488
  }
290
489
 
490
+ // src/utils/websocket-helpers.ts
491
+ function waitForWebSocketOpen(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CONNECTION) {
492
+ return new Promise((resolve, reject) => {
493
+ const timeout = setTimeout(() => {
494
+ reject(new Error("WebSocket connection timeout"));
495
+ }, timeoutMs);
496
+ ws.once("open", () => {
497
+ clearTimeout(timeout);
498
+ resolve();
499
+ });
500
+ ws.once("error", (error) => {
501
+ clearTimeout(timeout);
502
+ reject(error);
503
+ });
504
+ });
505
+ }
506
+ function closeWebSocket(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CLOSE) {
507
+ return new Promise((resolve) => {
508
+ const timeout = setTimeout(() => {
509
+ ws.terminate();
510
+ resolve();
511
+ }, timeoutMs);
512
+ ws.close();
513
+ ws.once("close", () => {
514
+ clearTimeout(timeout);
515
+ resolve();
516
+ });
517
+ });
518
+ }
519
+ function setupWebSocketHandlers(ws, callbacks, setSessionStatus) {
520
+ ws.on("open", () => {
521
+ setSessionStatus("open");
522
+ callbacks?.onOpen?.();
523
+ });
524
+ ws.on("error", (error) => {
525
+ callbacks?.onError?.(createError(ERROR_CODES.WEBSOCKET_ERROR, error.message, error));
526
+ });
527
+ ws.on("close", (code, reason) => {
528
+ setSessionStatus("closed");
529
+ callbacks?.onClose?.(code, reason.toString());
530
+ });
531
+ }
532
+ function validateSessionForAudio(sessionStatus, wsReadyState, WebSocketOpen) {
533
+ if (sessionStatus !== "open") {
534
+ throw new Error(`Cannot send audio: session is ${sessionStatus}`);
535
+ }
536
+ if (wsReadyState !== WebSocketOpen) {
537
+ throw new Error("WebSocket is not open");
538
+ }
539
+ }
540
+
541
+ // src/utils/validation.ts
542
+ function validateEnumValue(value, enumType, fieldName, provider) {
543
+ const validValues = Object.values(enumType);
544
+ const isValid = validValues.some((v) => v === value);
545
+ if (!isValid) {
546
+ throw new Error(
547
+ `${provider} does not support ${fieldName} '${value}'. Supported values (from OpenAPI spec): ${validValues.join(", ")}`
548
+ );
549
+ }
550
+ return value;
551
+ }
552
+
553
+ // src/utils/transcription-helpers.ts
554
+ function extractSpeakersFromUtterances(utterances, getSpeakerId, formatLabel) {
555
+ if (!utterances || utterances.length === 0) {
556
+ return void 0;
557
+ }
558
+ const speakerSet = /* @__PURE__ */ new Set();
559
+ utterances.forEach((utterance) => {
560
+ const speakerId = getSpeakerId(utterance);
561
+ if (speakerId !== void 0) {
562
+ speakerSet.add(String(speakerId));
563
+ }
564
+ });
565
+ if (speakerSet.size === 0) {
566
+ return void 0;
567
+ }
568
+ return Array.from(speakerSet).map((speakerId) => ({
569
+ id: speakerId,
570
+ label: formatLabel ? formatLabel(speakerId) : `Speaker ${speakerId}`
571
+ }));
572
+ }
573
+ function extractWords(words, mapper) {
574
+ if (!words || words.length === 0) {
575
+ return void 0;
576
+ }
577
+ const normalizedWords = words.map(mapper);
578
+ return normalizedWords.length > 0 ? normalizedWords : void 0;
579
+ }
580
+ var STATUS_MAPPINGS = {
581
+ gladia: {
582
+ queued: "queued",
583
+ processing: "processing",
584
+ done: "completed",
585
+ error: "error"
586
+ },
587
+ assemblyai: {
588
+ queued: "queued",
589
+ processing: "processing",
590
+ completed: "completed",
591
+ error: "error"
592
+ },
593
+ deepgram: {
594
+ queued: "queued",
595
+ processing: "processing",
596
+ completed: "completed",
597
+ error: "error"
598
+ },
599
+ azure: {
600
+ succeeded: "completed",
601
+ running: "processing",
602
+ notstarted: "queued",
603
+ failed: "error"
604
+ },
605
+ speechmatics: {
606
+ running: "processing",
607
+ done: "completed",
608
+ rejected: "error",
609
+ expired: "error"
610
+ }
611
+ };
612
+ function normalizeStatus(providerStatus, provider, defaultStatus = "queued") {
613
+ if (!providerStatus) return defaultStatus;
614
+ const mapping = STATUS_MAPPINGS[provider];
615
+ const statusKey = providerStatus.toString().toLowerCase();
616
+ if (statusKey in mapping) {
617
+ return mapping[statusKey];
618
+ }
619
+ for (const [key, value] of Object.entries(mapping)) {
620
+ if (statusKey.includes(key)) {
621
+ return value;
622
+ }
623
+ }
624
+ return defaultStatus;
625
+ }
626
+
291
627
  // src/generated/gladia/api/gladiaControlAPI.ts
292
628
  import axios from "axios";
293
629
 
@@ -824,21 +1160,6 @@ var StreamingResponseStatus = {
824
1160
  error: "error"
825
1161
  };
826
1162
 
827
- // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
828
- var StreamingSupportedBitDepthEnum = {
829
- NUMBER_8: 8,
830
- NUMBER_16: 16,
831
- NUMBER_24: 24,
832
- NUMBER_32: 32
833
- };
834
-
835
- // src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
836
- var StreamingSupportedEncodingEnum = {
837
- "wav/pcm": "wav/pcm",
838
- "wav/alaw": "wav/alaw",
839
- "wav/ulaw": "wav/ulaw"
840
- };
841
-
842
1163
  // src/generated/gladia/schema/streamingSupportedModels.ts
843
1164
  var StreamingSupportedModels = {
844
1165
  "solaria-1": "solaria-1"
@@ -850,15 +1171,6 @@ var StreamingSupportedRegions = {
850
1171
  "eu-west": "eu-west"
851
1172
  };
852
1173
 
853
- // src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
854
- var StreamingSupportedSampleRateEnum = {
855
- NUMBER_8000: 8e3,
856
- NUMBER_16000: 16e3,
857
- NUMBER_32000: 32e3,
858
- NUMBER_44100: 44100,
859
- NUMBER_48000: 48e3
860
- };
861
-
862
1174
  // src/generated/gladia/schema/subtitlesFormatEnum.ts
863
1175
  var SubtitlesFormatEnum = {
864
1176
  srt: "srt",
@@ -1408,21 +1720,10 @@ var GladiaAdapter = class extends BaseAdapter {
1408
1720
  }
1409
1721
  /**
1410
1722
  * Get axios config for generated API client functions
1411
- * Configures headers and base URL
1723
+ * Configures headers and base URL using Gladia's x-gladia-key header
1412
1724
  */
1413
1725
  getAxiosConfig() {
1414
- if (!this.config) {
1415
- throw new Error("Adapter not initialized. Call initialize() first.");
1416
- }
1417
- return {
1418
- baseURL: this.config.baseUrl || this.baseUrl,
1419
- timeout: this.config.timeout || 6e4,
1420
- headers: {
1421
- "x-gladia-key": this.config.apiKey,
1422
- "Content-Type": "application/json",
1423
- ...this.config.headers
1424
- }
1425
- };
1726
+ return super.getAxiosConfig("x-gladia-key");
1426
1727
  }
1427
1728
  /**
1428
1729
  * Submit audio for transcription
@@ -1589,29 +1890,13 @@ var GladiaAdapter = class extends BaseAdapter {
1589
1890
  * Normalize Gladia response to unified format
1590
1891
  */
1591
1892
  normalizeResponse(response) {
1592
- let status;
1593
- switch (response.status) {
1594
- case "queued":
1595
- status = "queued";
1596
- break;
1597
- case "processing":
1598
- status = "processing";
1599
- break;
1600
- case "done":
1601
- status = "completed";
1602
- break;
1603
- case "error":
1604
- status = "error";
1605
- break;
1606
- default:
1607
- status = "queued";
1608
- }
1893
+ const status = normalizeStatus(response.status, "gladia");
1609
1894
  if (response.status === "error") {
1610
1895
  return {
1611
1896
  success: false,
1612
1897
  provider: this.name,
1613
1898
  error: {
1614
- code: response.error_code?.toString() || "TRANSCRIPTION_ERROR",
1899
+ code: response.error_code?.toString() || ERROR_CODES.TRANSCRIPTION_ERROR,
1615
1900
  message: "Transcription failed",
1616
1901
  statusCode: response.error_code || void 0
1617
1902
  },
@@ -1651,22 +1936,11 @@ var GladiaAdapter = class extends BaseAdapter {
1651
1936
  * Extract speaker information from Gladia response
1652
1937
  */
1653
1938
  extractSpeakers(transcription) {
1654
- if (!transcription?.utterances) {
1655
- return void 0;
1656
- }
1657
- const speakerSet = /* @__PURE__ */ new Set();
1658
- transcription.utterances.forEach((utterance) => {
1659
- if (utterance.speaker !== void 0) {
1660
- speakerSet.add(utterance.speaker);
1661
- }
1662
- });
1663
- if (speakerSet.size === 0) {
1664
- return void 0;
1665
- }
1666
- return Array.from(speakerSet).map((speakerId) => ({
1667
- id: speakerId.toString(),
1668
- label: `Speaker ${speakerId}`
1669
- }));
1939
+ return extractSpeakersFromUtterances(
1940
+ transcription?.utterances,
1941
+ (utterance) => utterance.speaker,
1942
+ (id) => `Speaker ${id}`
1943
+ );
1670
1944
  }
1671
1945
  /**
1672
1946
  * Extract word timestamps from Gladia response
@@ -1677,14 +1951,17 @@ var GladiaAdapter = class extends BaseAdapter {
1677
1951
  }
1678
1952
  const allWords = transcription.utterances.flatMap(
1679
1953
  (utterance) => utterance.words.map((word) => ({
1680
- text: word.word,
1681
- start: word.start,
1682
- end: word.end,
1683
- confidence: word.confidence,
1684
- speaker: utterance.speaker?.toString()
1954
+ word,
1955
+ speaker: utterance.speaker
1685
1956
  }))
1686
1957
  );
1687
- return allWords.length > 0 ? allWords : void 0;
1958
+ return extractWords(allWords, (item) => ({
1959
+ text: item.word.word,
1960
+ start: item.word.start,
1961
+ end: item.word.end,
1962
+ confidence: item.word.confidence,
1963
+ speaker: item.speaker?.toString()
1964
+ }));
1688
1965
  }
1689
1966
  /**
1690
1967
  * Extract utterances from Gladia response
@@ -1710,38 +1987,6 @@ var GladiaAdapter = class extends BaseAdapter {
1710
1987
  /**
1711
1988
  * Poll for transcription completion
1712
1989
  */
1713
- async pollForCompletion(jobId, maxAttempts = 60, intervalMs = 2e3) {
1714
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
1715
- const result = await this.getTranscript(jobId);
1716
- if (!result.success) {
1717
- return result;
1718
- }
1719
- const status = result.data?.status;
1720
- if (status === "completed") {
1721
- return result;
1722
- }
1723
- if (status === "error") {
1724
- return {
1725
- success: false,
1726
- provider: this.name,
1727
- error: {
1728
- code: "TRANSCRIPTION_ERROR",
1729
- message: "Transcription failed"
1730
- },
1731
- raw: result.raw
1732
- };
1733
- }
1734
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
1735
- }
1736
- return {
1737
- success: false,
1738
- provider: this.name,
1739
- error: {
1740
- code: "POLLING_TIMEOUT",
1741
- message: `Transcription did not complete after ${maxAttempts} attempts`
1742
- }
1743
- };
1744
- }
1745
1990
  /**
1746
1991
  * Stream audio for real-time transcription
1747
1992
  *
@@ -1785,14 +2030,12 @@ var GladiaAdapter = class extends BaseAdapter {
1785
2030
  this.validateConfig();
1786
2031
  let validatedSampleRate;
1787
2032
  if (options?.sampleRate) {
1788
- const validRates = Object.values(StreamingSupportedSampleRateEnum);
1789
- const isValidRate = validRates.some((rate) => rate === options.sampleRate);
1790
- if (!isValidRate) {
1791
- throw new Error(
1792
- `Gladia does not support sample rate ${options.sampleRate} Hz. Supported rates (from OpenAPI spec): ${validRates.join(", ")} Hz`
1793
- );
1794
- }
1795
- validatedSampleRate = options.sampleRate;
2033
+ validatedSampleRate = validateEnumValue(
2034
+ options.sampleRate,
2035
+ StreamingSupportedSampleRateEnum,
2036
+ "sample rate",
2037
+ "Gladia"
2038
+ );
1796
2039
  }
1797
2040
  const streamingRequest = {
1798
2041
  encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
@@ -1814,9 +2057,8 @@ var GladiaAdapter = class extends BaseAdapter {
1814
2057
  const { id, url: wsUrl } = initResponse.data;
1815
2058
  const ws = new WebSocket(wsUrl);
1816
2059
  let sessionStatus = "connecting";
1817
- ws.on("open", () => {
1818
- sessionStatus = "open";
1819
- callbacks?.onOpen?.();
2060
+ setupWebSocketHandlers(ws, callbacks, (status) => {
2061
+ sessionStatus = status;
1820
2062
  });
1821
2063
  ws.on("message", (data) => {
1822
2064
  try {
@@ -1861,48 +2103,20 @@ var GladiaAdapter = class extends BaseAdapter {
1861
2103
  }
1862
2104
  } catch (error) {
1863
2105
  callbacks?.onError?.({
1864
- code: "PARSE_ERROR",
2106
+ code: ERROR_CODES.PARSE_ERROR,
1865
2107
  message: "Failed to parse WebSocket message",
1866
2108
  details: error
1867
2109
  });
1868
2110
  }
1869
2111
  });
1870
- ws.on("error", (error) => {
1871
- callbacks?.onError?.({
1872
- code: "WEBSOCKET_ERROR",
1873
- message: error.message,
1874
- details: error
1875
- });
1876
- });
1877
- ws.on("close", (code, reason) => {
1878
- sessionStatus = "closed";
1879
- callbacks?.onClose?.(code, reason.toString());
1880
- });
1881
- await new Promise((resolve, reject) => {
1882
- const timeout = setTimeout(() => {
1883
- reject(new Error("WebSocket connection timeout"));
1884
- }, 1e4);
1885
- ws.once("open", () => {
1886
- clearTimeout(timeout);
1887
- resolve();
1888
- });
1889
- ws.once("error", (error) => {
1890
- clearTimeout(timeout);
1891
- reject(error);
1892
- });
1893
- });
2112
+ await waitForWebSocketOpen(ws);
1894
2113
  return {
1895
2114
  id,
1896
2115
  provider: this.name,
1897
2116
  createdAt: /* @__PURE__ */ new Date(),
1898
2117
  getStatus: () => sessionStatus,
1899
2118
  sendAudio: async (chunk) => {
1900
- if (sessionStatus !== "open") {
1901
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
1902
- }
1903
- if (ws.readyState !== WebSocket.OPEN) {
1904
- throw new Error("WebSocket is not open");
1905
- }
2119
+ validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
1906
2120
  ws.send(chunk.data);
1907
2121
  if (chunk.isLast) {
1908
2122
  ws.send(
@@ -1924,18 +2138,8 @@ var GladiaAdapter = class extends BaseAdapter {
1924
2138
  })
1925
2139
  );
1926
2140
  }
1927
- return new Promise((resolve) => {
1928
- const timeout = setTimeout(() => {
1929
- ws.terminate();
1930
- resolve();
1931
- }, 5e3);
1932
- ws.close();
1933
- ws.once("close", () => {
1934
- clearTimeout(timeout);
1935
- sessionStatus = "closed";
1936
- resolve();
1937
- });
1938
- });
2141
+ await closeWebSocket(ws);
2142
+ sessionStatus = "closed";
1939
2143
  }
1940
2144
  };
1941
2145
  }
@@ -2269,9 +2473,6 @@ var createTranscript = (transcriptParams, options) => {
2269
2473
  var getTranscript = (transcriptId, options) => {
2270
2474
  return axios2.get(`/v2/transcript/${transcriptId}`, options);
2271
2475
  };
2272
- var createTemporaryToken = (createRealtimeTemporaryTokenParams, options) => {
2273
- return axios2.post("/v2/realtime/token", createRealtimeTemporaryTokenParams, options);
2274
- };
2275
2476
 
2276
2477
  // src/adapters/assemblyai-adapter.ts
2277
2478
  var AssemblyAIAdapter = class extends BaseAdapter {
@@ -2289,26 +2490,17 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2289
2490
  entityDetection: true,
2290
2491
  piiRedaction: true
2291
2492
  };
2292
- this.baseUrl = "https://api.assemblyai.com/v2";
2293
- this.wsBaseUrl = "wss://api.assemblyai.com/v2/realtime/ws";
2493
+ this.baseUrl = "https://api.assemblyai.com";
2494
+ // Generated functions already include /v2 path
2495
+ this.wsBaseUrl = "wss://streaming.assemblyai.com/v3/ws";
2294
2496
  }
2497
+ // v3 Universal Streaming endpoint
2295
2498
  /**
2296
2499
  * Get axios config for generated API client functions
2297
- * Configures headers and base URL
2500
+ * Configures headers and base URL using authorization header
2298
2501
  */
2299
2502
  getAxiosConfig() {
2300
- if (!this.config) {
2301
- throw new Error("Adapter not initialized. Call initialize() first.");
2302
- }
2303
- return {
2304
- baseURL: this.config.baseUrl || this.baseUrl,
2305
- timeout: this.config.timeout || 6e4,
2306
- headers: {
2307
- authorization: this.config.apiKey,
2308
- "Content-Type": "application/json",
2309
- ...this.config.headers
2310
- }
2311
- };
2503
+ return super.getAxiosConfig("authorization");
2312
2504
  }
2313
2505
  /**
2314
2506
  * Submit audio for transcription
@@ -2586,41 +2778,6 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2586
2778
  }))
2587
2779
  }));
2588
2780
  }
2589
- /**
2590
- * Poll for transcription completion
2591
- */
2592
- async pollForCompletion(transcriptId, maxAttempts = 60, intervalMs = 3e3) {
2593
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
2594
- const result = await this.getTranscript(transcriptId);
2595
- if (!result.success) {
2596
- return result;
2597
- }
2598
- const status = result.data?.status;
2599
- if (status === "completed") {
2600
- return result;
2601
- }
2602
- if (status === "error") {
2603
- return {
2604
- success: false,
2605
- provider: this.name,
2606
- error: {
2607
- code: "TRANSCRIPTION_ERROR",
2608
- message: "Transcription failed"
2609
- },
2610
- raw: result.raw
2611
- };
2612
- }
2613
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
2614
- }
2615
- return {
2616
- success: false,
2617
- provider: this.name,
2618
- error: {
2619
- code: "POLLING_TIMEOUT",
2620
- message: `Transcription did not complete after ${maxAttempts} attempts`
2621
- }
2622
- };
2623
- }
2624
2781
  /**
2625
2782
  * Stream audio for real-time transcription
2626
2783
  *
@@ -2661,14 +2818,17 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2661
2818
  */
2662
2819
  async transcribeStream(options, callbacks) {
2663
2820
  this.validateConfig();
2664
- const tokenResponse = await createTemporaryToken(
2665
- { expires_in: 3600 },
2666
- // Token expires in 1 hour
2667
- this.getAxiosConfig()
2668
- );
2669
- const token = tokenResponse.data.token;
2670
- const wsUrl = `${this.wsBaseUrl}?sample_rate=${options?.sampleRate || 16e3}&token=${token}`;
2671
- const ws = new WebSocket2(wsUrl);
2821
+ if (!this.config?.apiKey) {
2822
+ throw new Error("API key is required for streaming");
2823
+ }
2824
+ const sampleRate = options?.sampleRate || 16e3;
2825
+ const encoding = options?.encoding || "pcm_s16le";
2826
+ const wsUrl = `${this.wsBaseUrl}?sample_rate=${sampleRate}&encoding=${encoding}`;
2827
+ const ws = new WebSocket2(wsUrl, {
2828
+ headers: {
2829
+ Authorization: this.config.apiKey
2830
+ }
2831
+ });
2672
2832
  let sessionStatus = "connecting";
2673
2833
  const sessionId = `assemblyai-${Date.now()}-${Math.random().toString(36).substring(7)}`;
2674
2834
  ws.on("open", () => {
@@ -2678,41 +2838,42 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2678
2838
  ws.on("message", (data) => {
2679
2839
  try {
2680
2840
  const message = JSON.parse(data.toString());
2681
- if (message.message_type === "SessionBegins") {
2841
+ if ("error" in message) {
2842
+ callbacks?.onError?.({
2843
+ code: "API_ERROR",
2844
+ message: message.error
2845
+ });
2846
+ return;
2847
+ }
2848
+ if (message.type === "Begin") {
2849
+ const beginMsg = message;
2682
2850
  callbacks?.onMetadata?.({
2683
- sessionId: message.session_id,
2684
- expiresAt: message.expires_at
2851
+ sessionId: beginMsg.id,
2852
+ expiresAt: new Date(beginMsg.expires_at).toISOString()
2685
2853
  });
2686
- } else if (message.message_type === "PartialTranscript") {
2854
+ } else if (message.type === "Turn") {
2855
+ const turnMsg = message;
2687
2856
  callbacks?.onTranscript?.({
2688
2857
  type: "transcript",
2689
- text: message.text,
2690
- isFinal: false,
2691
- confidence: message.confidence,
2692
- words: message.words.map((word) => ({
2858
+ text: turnMsg.transcript,
2859
+ isFinal: turnMsg.end_of_turn,
2860
+ confidence: turnMsg.end_of_turn_confidence,
2861
+ words: turnMsg.words.map((word) => ({
2693
2862
  text: word.text,
2694
2863
  start: word.start / 1e3,
2864
+ // Convert ms to seconds
2695
2865
  end: word.end / 1e3,
2696
2866
  confidence: word.confidence
2697
2867
  })),
2698
- data: message
2868
+ data: turnMsg
2699
2869
  });
2700
- } else if (message.message_type === "FinalTranscript") {
2701
- callbacks?.onTranscript?.({
2702
- type: "transcript",
2703
- text: message.text,
2704
- isFinal: true,
2705
- confidence: message.confidence,
2706
- words: message.words.map((word) => ({
2707
- text: word.text,
2708
- start: word.start / 1e3,
2709
- end: word.end / 1e3,
2710
- confidence: word.confidence
2711
- })),
2712
- data: message
2870
+ } else if (message.type === "Termination") {
2871
+ const termMsg = message;
2872
+ callbacks?.onMetadata?.({
2873
+ terminated: true,
2874
+ audioDurationSeconds: termMsg.audio_duration_seconds,
2875
+ sessionDurationSeconds: termMsg.session_duration_seconds
2713
2876
  });
2714
- } else if (message.message_type === "SessionTerminated") {
2715
- callbacks?.onMetadata?.({ terminated: true });
2716
2877
  }
2717
2878
  } catch (error) {
2718
2879
  callbacks?.onError?.({
@@ -3262,7 +3423,24 @@ function createDeepgramAdapter(config) {
3262
3423
  }
3263
3424
 
3264
3425
  // src/adapters/azure-stt-adapter.ts
3426
+ import axios5 from "axios";
3427
+
3428
+ // src/generated/azure/api/speechServicesAPIV31.ts
3265
3429
  import axios4 from "axios";
3430
+ var transcriptionsCreate = (transcription, options) => {
3431
+ return axios4.post("/transcriptions", transcription, options);
3432
+ };
3433
+ var transcriptionsGet = (id, options) => {
3434
+ return axios4.get(`/transcriptions/${id}`, options);
3435
+ };
3436
+ var transcriptionsListFiles = (id, params, options) => {
3437
+ return axios4.get(`/transcriptions/${id}/files`, {
3438
+ ...options,
3439
+ params: { ...params, ...options?.params }
3440
+ });
3441
+ };
3442
+
3443
+ // src/adapters/azure-stt-adapter.ts
3266
3444
  var AzureSTTAdapter = class extends BaseAdapter {
3267
3445
  constructor() {
3268
3446
  super(...arguments);
@@ -3279,20 +3457,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
3279
3457
  entityDetection: false,
3280
3458
  piiRedaction: false
3281
3459
  };
3460
+ this.baseUrl = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.1";
3282
3461
  }
3462
+ // Default, overridden in initialize()
3283
3463
  initialize(config) {
3284
3464
  super.initialize(config);
3285
3465
  this.region = config.region || "eastus";
3286
3466
  this.baseUrl = config.baseUrl || `https://${this.region}.api.cognitive.microsoft.com/speechtotext/v3.1`;
3287
- this.client = axios4.create({
3288
- baseURL: this.baseUrl,
3289
- timeout: config.timeout || 6e4,
3290
- headers: {
3291
- "Ocp-Apim-Subscription-Key": config.apiKey,
3292
- "Content-Type": "application/json",
3293
- ...config.headers
3294
- }
3295
- });
3467
+ }
3468
+ /**
3469
+ * Get axios config for generated API client functions
3470
+ * Configures headers and base URL using Azure subscription key
3471
+ */
3472
+ getAxiosConfig() {
3473
+ return super.getAxiosConfig("Ocp-Apim-Subscription-Key");
3296
3474
  }
3297
3475
  /**
3298
3476
  * Submit audio for transcription
@@ -3324,9 +3502,9 @@ var AzureSTTAdapter = class extends BaseAdapter {
3324
3502
  contentUrls: [audio.url],
3325
3503
  properties: this.buildTranscriptionProperties(options)
3326
3504
  };
3327
- const response = await this.client.post(
3328
- "/transcriptions",
3329
- transcriptionRequest
3505
+ const response = await transcriptionsCreate(
3506
+ transcriptionRequest,
3507
+ this.getAxiosConfig()
3330
3508
  );
3331
3509
  const transcription = response.data;
3332
3510
  return {
@@ -3357,9 +3535,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3357
3535
  async getTranscript(transcriptId) {
3358
3536
  this.validateConfig();
3359
3537
  try {
3360
- const statusResponse = await this.client.get(
3361
- `/transcriptions/${transcriptId}`
3362
- );
3538
+ const statusResponse = await transcriptionsGet(transcriptId, this.getAxiosConfig());
3363
3539
  const transcription = statusResponse.data;
3364
3540
  const status = this.normalizeStatus(transcription.status);
3365
3541
  if (status !== "completed") {
@@ -3387,7 +3563,11 @@ var AzureSTTAdapter = class extends BaseAdapter {
3387
3563
  raw: transcription
3388
3564
  };
3389
3565
  }
3390
- const filesResponse = await this.client.get(transcription.links.files);
3566
+ const filesResponse = await transcriptionsListFiles(
3567
+ transcriptId,
3568
+ void 0,
3569
+ this.getAxiosConfig()
3570
+ );
3391
3571
  const files = filesResponse.data?.values || [];
3392
3572
  const resultFile = files.find((file) => file.kind === "Transcription");
3393
3573
  if (!resultFile?.links?.contentUrl) {
@@ -3401,7 +3581,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3401
3581
  raw: transcription
3402
3582
  };
3403
3583
  }
3404
- const contentResponse = await axios4.get(resultFile.links.contentUrl);
3584
+ const contentResponse = await axios5.get(resultFile.links.contentUrl);
3405
3585
  const transcriptionData = contentResponse.data;
3406
3586
  return this.normalizeResponse(transcription, transcriptionData);
3407
3587
  } catch (error) {
@@ -3500,7 +3680,57 @@ function createAzureSTTAdapter(config) {
3500
3680
  }
3501
3681
 
3502
3682
  // src/adapters/openai-whisper-adapter.ts
3503
- import axios5 from "axios";
3683
+ import axios7 from "axios";
3684
+
3685
+ // src/generated/openai/api/openAIAPI.ts
3686
+ import axios6 from "axios";
3687
+ var createTranscription = (createTranscriptionRequest, options) => {
3688
+ const formData = new FormData();
3689
+ formData.append("file", createTranscriptionRequest.file);
3690
+ formData.append("model", createTranscriptionRequest.model);
3691
+ if (createTranscriptionRequest.language !== void 0) {
3692
+ formData.append("language", createTranscriptionRequest.language);
3693
+ }
3694
+ if (createTranscriptionRequest.prompt !== void 0) {
3695
+ formData.append("prompt", createTranscriptionRequest.prompt);
3696
+ }
3697
+ if (createTranscriptionRequest.response_format !== void 0) {
3698
+ formData.append("response_format", createTranscriptionRequest.response_format);
3699
+ }
3700
+ if (createTranscriptionRequest.temperature !== void 0) {
3701
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
3702
+ }
3703
+ if (createTranscriptionRequest.include !== void 0) {
3704
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
3705
+ }
3706
+ if (createTranscriptionRequest.timestamp_granularities !== void 0) {
3707
+ createTranscriptionRequest.timestamp_granularities.forEach(
3708
+ (value) => formData.append("timestamp_granularities", value)
3709
+ );
3710
+ }
3711
+ if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
3712
+ formData.append("stream", createTranscriptionRequest.stream.toString());
3713
+ }
3714
+ if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
3715
+ formData.append(
3716
+ "chunking_strategy",
3717
+ typeof createTranscriptionRequest.chunking_strategy === "object" ? JSON.stringify(createTranscriptionRequest.chunking_strategy) : createTranscriptionRequest.chunking_strategy
3718
+ );
3719
+ }
3720
+ if (createTranscriptionRequest.known_speaker_names !== void 0) {
3721
+ createTranscriptionRequest.known_speaker_names.forEach(
3722
+ (value) => formData.append("known_speaker_names", value)
3723
+ );
3724
+ }
3725
+ if (createTranscriptionRequest.known_speaker_references !== void 0) {
3726
+ createTranscriptionRequest.known_speaker_references.forEach(
3727
+ (value) => formData.append("known_speaker_references", value)
3728
+ );
3729
+ }
3730
+ return axios6.post("/audio/transcriptions", formData, options);
3731
+ };
3732
+
3733
+ // src/adapters/openai-whisper-adapter.ts
3504
3734
  var OpenAIWhisperAdapter = class extends BaseAdapter {
3505
3735
  constructor() {
3506
3736
  super(...arguments);
@@ -3522,19 +3752,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3522
3752
  };
3523
3753
  this.baseUrl = "https://api.openai.com/v1";
3524
3754
  }
3525
- initialize(config) {
3526
- super.initialize(config);
3527
- this.baseUrl = config.baseUrl || this.baseUrl;
3528
- this.client = axios5.create({
3529
- baseURL: this.baseUrl,
3530
- timeout: config.timeout || 12e4,
3531
- // 2 minutes default (audio processing can take time)
3532
- headers: {
3533
- Authorization: `Bearer ${config.apiKey}`,
3534
- "Content-Type": "multipart/form-data",
3535
- ...config.headers
3536
- }
3537
- });
3755
+ /**
3756
+ * Get axios config for generated API client functions
3757
+ * Configures headers and base URL using Bearer token authorization
3758
+ */
3759
+ getAxiosConfig() {
3760
+ return super.getAxiosConfig("Authorization", (apiKey) => `Bearer ${apiKey}`);
3538
3761
  }
3539
3762
  /**
3540
3763
  * Submit audio for transcription
@@ -3556,7 +3779,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3556
3779
  let audioData;
3557
3780
  let fileName = "audio.mp3";
3558
3781
  if (audio.type === "url") {
3559
- const response2 = await axios5.get(audio.url, {
3782
+ const response2 = await axios7.get(audio.url, {
3560
3783
  responseType: "arraybuffer"
3561
3784
  });
3562
3785
  audioData = Buffer.from(response2.data);
@@ -3581,40 +3804,37 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3581
3804
  const model = this.selectModel(options);
3582
3805
  const isDiarization = model === "gpt-4o-transcribe-diarize";
3583
3806
  const needsWords = options?.wordTimestamps === true;
3584
- const requestBody = {
3807
+ const request = {
3585
3808
  file: audioData,
3809
+ // Generated type expects Blob
3586
3810
  model
3587
3811
  };
3588
3812
  if (options?.language) {
3589
- requestBody.language = options.language;
3813
+ request.language = options.language;
3590
3814
  }
3591
3815
  if (options?.metadata?.prompt) {
3592
- requestBody.prompt = options.metadata.prompt;
3816
+ request.prompt = options.metadata.prompt;
3593
3817
  }
3594
3818
  if (options?.metadata?.temperature !== void 0) {
3595
- requestBody.temperature = options.metadata.temperature;
3819
+ request.temperature = options.metadata.temperature;
3596
3820
  }
3597
3821
  if (isDiarization) {
3598
- requestBody.response_format = "diarized_json";
3822
+ request.response_format = "diarized_json";
3599
3823
  if (options?.metadata?.knownSpeakerNames) {
3600
- requestBody["known_speaker_names"] = options.metadata.knownSpeakerNames;
3824
+ request.known_speaker_names = options.metadata.knownSpeakerNames;
3601
3825
  }
3602
3826
  if (options?.metadata?.knownSpeakerReferences) {
3603
- requestBody["known_speaker_references"] = options.metadata.knownSpeakerReferences;
3827
+ request.known_speaker_references = options.metadata.knownSpeakerReferences;
3604
3828
  }
3605
3829
  } else if (needsWords || options?.diarization) {
3606
- requestBody.response_format = "verbose_json";
3830
+ request.response_format = "verbose_json";
3607
3831
  if (needsWords) {
3608
- requestBody.timestamp_granularities = ["word", "segment"];
3832
+ request.timestamp_granularities = ["word", "segment"];
3609
3833
  }
3610
3834
  } else {
3611
- requestBody.response_format = "json";
3835
+ request.response_format = "json";
3612
3836
  }
3613
- const response = await this.client.post("/audio/transcriptions", requestBody, {
3614
- headers: {
3615
- "Content-Type": "multipart/form-data"
3616
- }
3617
- });
3837
+ const response = await createTranscription(request, this.getAxiosConfig());
3618
3838
  return this.normalizeResponse(response.data, model, isDiarization);
3619
3839
  } catch (error) {
3620
3840
  return this.createErrorResponse(error);
@@ -3735,7 +3955,7 @@ function createOpenAIWhisperAdapter(config) {
3735
3955
  }
3736
3956
 
3737
3957
  // src/adapters/speechmatics-adapter.ts
3738
- import axios6 from "axios";
3958
+ import axios8 from "axios";
3739
3959
  var SpeechmaticsAdapter = class extends BaseAdapter {
3740
3960
  constructor() {
3741
3961
  super(...arguments);
@@ -3757,7 +3977,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
3757
3977
  initialize(config) {
3758
3978
  super.initialize(config);
3759
3979
  this.baseUrl = config.baseUrl || this.baseUrl;
3760
- this.client = axios6.create({
3980
+ this.client = axios8.create({
3761
3981
  baseURL: this.baseUrl,
3762
3982
  timeout: config.timeout || 12e4,
3763
3983
  headers: {
@@ -4892,9 +5112,16 @@ export {
4892
5112
  GladiaAdapter,
4893
5113
  schema_exports as GladiaTypes,
4894
5114
  GladiaWebhookHandler,
5115
+ ListenV1EncodingParameter,
4895
5116
  OpenAIWhisperAdapter,
5117
+ SpeakV1ContainerParameter,
5118
+ SpeakV1EncodingParameter,
5119
+ SpeakV1SampleRateParameter,
4896
5120
  SpeechmaticsAdapter,
4897
5121
  SpeechmaticsWebhookHandler,
5122
+ StreamingSupportedBitDepthEnum,
5123
+ StreamingSupportedEncodingEnum,
5124
+ StreamingSupportedSampleRateEnum,
4898
5125
  VoiceRouter,
4899
5126
  WebhookRouter,
4900
5127
  createAssemblyAIAdapter,