voice-router-dev 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -268,24 +268,102 @@ function createVoiceRouter(config, adapters) {
268
268
  return router;
269
269
  }
270
270
 
271
+ // src/constants/defaults.ts
272
+ var DEFAULT_TIMEOUTS = {
273
+ /** Standard HTTP request timeout for API calls (60 seconds) */
274
+ HTTP_REQUEST: 6e4,
275
+ /** Audio processing timeout for long audio files (120 seconds) */
276
+ AUDIO_PROCESSING: 12e4,
277
+ /** WebSocket connection establishment timeout (10 seconds) */
278
+ WS_CONNECTION: 1e4,
279
+ /** WebSocket graceful close timeout (5 seconds) */
280
+ WS_CLOSE: 5e3
281
+ };
282
+ var DEFAULT_POLLING = {
283
+ /** Maximum number of polling attempts before timing out */
284
+ MAX_ATTEMPTS: 60,
285
+ /** Standard interval between polling attempts (2 seconds) */
286
+ INTERVAL_MS: 2e3,
287
+ /** Slower interval for long-running jobs (3 seconds) */
288
+ SLOW_INTERVAL_MS: 3e3
289
+ };
290
+
291
+ // src/utils/errors.ts
292
+ var ERROR_CODES = {
293
+ /** Failed to parse API response or WebSocket message */
294
+ PARSE_ERROR: "PARSE_ERROR",
295
+ /** WebSocket connection error */
296
+ WEBSOCKET_ERROR: "WEBSOCKET_ERROR",
297
+ /** Async transcription job did not complete within timeout */
298
+ POLLING_TIMEOUT: "POLLING_TIMEOUT",
299
+ /** Transcription processing failed on provider side */
300
+ TRANSCRIPTION_ERROR: "TRANSCRIPTION_ERROR",
301
+ /** Connection attempt timed out */
302
+ CONNECTION_TIMEOUT: "CONNECTION_TIMEOUT",
303
+ /** Invalid input provided to API */
304
+ INVALID_INPUT: "INVALID_INPUT",
305
+ /** Requested operation not supported by provider */
306
+ NOT_SUPPORTED: "NOT_SUPPORTED",
307
+ /** No transcription results available */
308
+ NO_RESULTS: "NO_RESULTS",
309
+ /** Unspecified or unknown error */
310
+ UNKNOWN_ERROR: "UNKNOWN_ERROR"
311
+ };
312
+ var ERROR_MESSAGES = {
313
+ PARSE_ERROR: "Failed to parse response data",
314
+ WEBSOCKET_ERROR: "WebSocket connection error",
315
+ POLLING_TIMEOUT: "Transcription did not complete within timeout period",
316
+ TRANSCRIPTION_ERROR: "Transcription processing failed",
317
+ CONNECTION_TIMEOUT: "Connection attempt timed out",
318
+ INVALID_INPUT: "Invalid input provided",
319
+ NOT_SUPPORTED: "Operation not supported by this provider",
320
+ NO_RESULTS: "No transcription results available",
321
+ UNKNOWN_ERROR: "An unknown error occurred"
322
+ };
323
+ function createError(code, customMessage, details) {
324
+ return {
325
+ code,
326
+ message: customMessage || ERROR_MESSAGES[code],
327
+ details
328
+ };
329
+ }
330
+
271
331
  // src/adapters/base-adapter.ts
272
332
  var BaseAdapter = class {
273
333
  initialize(config) {
274
334
  this.config = config;
275
335
  }
276
336
  /**
277
- * Helper method to create error responses
337
+ * Helper method to create error responses with stack traces
338
+ *
339
+ * @param error - Error object or unknown error
340
+ * @param statusCode - Optional HTTP status code
341
+ * @param code - Optional error code (defaults to extracted or UNKNOWN_ERROR)
278
342
  */
279
- createErrorResponse(error, statusCode) {
343
+ createErrorResponse(error, statusCode, code) {
280
344
  const err = error;
345
+ const httpStatus = statusCode || err.statusCode || err.response?.status;
346
+ const httpStatusText = err.response?.statusText;
347
+ const responseData = err.response?.data;
281
348
  return {
282
349
  success: false,
283
350
  provider: this.name,
284
351
  error: {
285
- code: err.code || "UNKNOWN_ERROR",
352
+ code: code || err.code || ERROR_CODES.UNKNOWN_ERROR,
286
353
  message: err.message || "An unknown error occurred",
287
- statusCode: statusCode || err.statusCode,
288
- details: error
354
+ statusCode: httpStatus,
355
+ details: {
356
+ // Include full error object
357
+ error,
358
+ // Include stack trace if available
359
+ stack: err.stack,
360
+ // Include HTTP response details
361
+ httpStatus,
362
+ httpStatusText,
363
+ responseData,
364
+ // Include provider name for debugging
365
+ provider: this.name
366
+ }
289
367
  }
290
368
  };
291
369
  }
@@ -300,6 +378,64 @@ var BaseAdapter = class {
300
378
  throw new Error(`API key is required for ${this.name} provider`);
301
379
  }
302
380
  }
381
+ /**
382
+ * Build axios config for generated API client functions
383
+ *
384
+ * @param authHeaderName - Header name for API key (e.g., "Authorization", "x-gladia-key")
385
+ * @param authHeaderValue - Optional function to format auth header value (defaults to raw API key)
386
+ * @returns Axios config object
387
+ */
388
+ getAxiosConfig(authHeaderName = "Authorization", authHeaderValue) {
389
+ this.validateConfig();
390
+ const authValue = authHeaderValue ? authHeaderValue(this.config.apiKey) : this.config.apiKey;
391
+ return {
392
+ baseURL: this.config.baseUrl || this.baseUrl,
393
+ timeout: this.config.timeout || DEFAULT_TIMEOUTS.HTTP_REQUEST,
394
+ headers: {
395
+ [authHeaderName]: authValue,
396
+ "Content-Type": "application/json",
397
+ ...this.config.headers
398
+ }
399
+ };
400
+ }
401
+ /**
402
+ * Generic polling helper for async transcription jobs
403
+ *
404
+ * Polls getTranscript() until job completes or times out.
405
+ *
406
+ * @param transcriptId - Job/transcript ID to poll
407
+ * @param options - Polling configuration
408
+ * @returns Final transcription result
409
+ */
410
+ async pollForCompletion(transcriptId, options) {
411
+ const { maxAttempts = DEFAULT_POLLING.MAX_ATTEMPTS, intervalMs = DEFAULT_POLLING.INTERVAL_MS } = options || {};
412
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
413
+ const result = await this.getTranscript(transcriptId);
414
+ if (!result.success) {
415
+ return result;
416
+ }
417
+ const status = result.data?.status;
418
+ if (status === "completed") {
419
+ return result;
420
+ }
421
+ if (status === "error") {
422
+ return this.createErrorResponse(
423
+ new Error("Transcription failed"),
424
+ void 0,
425
+ ERROR_CODES.TRANSCRIPTION_ERROR
426
+ );
427
+ }
428
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
429
+ }
430
+ return {
431
+ success: false,
432
+ provider: this.name,
433
+ error: {
434
+ code: ERROR_CODES.POLLING_TIMEOUT,
435
+ message: `Transcription did not complete after ${maxAttempts} attempts`
436
+ }
437
+ };
438
+ }
303
439
  };
304
440
 
305
441
  // src/adapters/gladia-adapter.ts
@@ -346,6 +482,143 @@ function mapEncodingToProvider(unifiedEncoding, provider) {
346
482
  return providerEncoding;
347
483
  }
348
484
 
485
+ // src/utils/websocket-helpers.ts
486
+ function waitForWebSocketOpen(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CONNECTION) {
487
+ return new Promise((resolve, reject) => {
488
+ const timeout = setTimeout(() => {
489
+ reject(new Error("WebSocket connection timeout"));
490
+ }, timeoutMs);
491
+ ws.once("open", () => {
492
+ clearTimeout(timeout);
493
+ resolve();
494
+ });
495
+ ws.once("error", (error) => {
496
+ clearTimeout(timeout);
497
+ reject(error);
498
+ });
499
+ });
500
+ }
501
+ function closeWebSocket(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CLOSE) {
502
+ return new Promise((resolve) => {
503
+ const timeout = setTimeout(() => {
504
+ ws.terminate();
505
+ resolve();
506
+ }, timeoutMs);
507
+ ws.close();
508
+ ws.once("close", () => {
509
+ clearTimeout(timeout);
510
+ resolve();
511
+ });
512
+ });
513
+ }
514
+ function setupWebSocketHandlers(ws, callbacks, setSessionStatus) {
515
+ ws.on("open", () => {
516
+ setSessionStatus("open");
517
+ callbacks?.onOpen?.();
518
+ });
519
+ ws.on("error", (error) => {
520
+ callbacks?.onError?.(createError(ERROR_CODES.WEBSOCKET_ERROR, error.message, error));
521
+ });
522
+ ws.on("close", (code, reason) => {
523
+ setSessionStatus("closed");
524
+ callbacks?.onClose?.(code, reason.toString());
525
+ });
526
+ }
527
+ function validateSessionForAudio(sessionStatus, wsReadyState, WebSocketOpen) {
528
+ if (sessionStatus !== "open") {
529
+ throw new Error(`Cannot send audio: session is ${sessionStatus}`);
530
+ }
531
+ if (wsReadyState !== WebSocketOpen) {
532
+ throw new Error("WebSocket is not open");
533
+ }
534
+ }
535
+
536
+ // src/utils/validation.ts
537
+ function validateEnumValue(value, enumType, fieldName, provider) {
538
+ const validValues = Object.values(enumType);
539
+ const isValid = validValues.some((v) => v === value);
540
+ if (!isValid) {
541
+ throw new Error(
542
+ `${provider} does not support ${fieldName} '${value}'. Supported values (from OpenAPI spec): ${validValues.join(", ")}`
543
+ );
544
+ }
545
+ return value;
546
+ }
547
+
548
+ // src/utils/transcription-helpers.ts
549
+ function extractSpeakersFromUtterances(utterances, getSpeakerId, formatLabel) {
550
+ if (!utterances || utterances.length === 0) {
551
+ return void 0;
552
+ }
553
+ const speakerSet = /* @__PURE__ */ new Set();
554
+ utterances.forEach((utterance) => {
555
+ const speakerId = getSpeakerId(utterance);
556
+ if (speakerId !== void 0) {
557
+ speakerSet.add(String(speakerId));
558
+ }
559
+ });
560
+ if (speakerSet.size === 0) {
561
+ return void 0;
562
+ }
563
+ return Array.from(speakerSet).map((speakerId) => ({
564
+ id: speakerId,
565
+ label: formatLabel ? formatLabel(speakerId) : `Speaker ${speakerId}`
566
+ }));
567
+ }
568
+ function extractWords(words, mapper) {
569
+ if (!words || words.length === 0) {
570
+ return void 0;
571
+ }
572
+ const normalizedWords = words.map(mapper);
573
+ return normalizedWords.length > 0 ? normalizedWords : void 0;
574
+ }
575
+ var STATUS_MAPPINGS = {
576
+ gladia: {
577
+ queued: "queued",
578
+ processing: "processing",
579
+ done: "completed",
580
+ error: "error"
581
+ },
582
+ assemblyai: {
583
+ queued: "queued",
584
+ processing: "processing",
585
+ completed: "completed",
586
+ error: "error"
587
+ },
588
+ deepgram: {
589
+ queued: "queued",
590
+ processing: "processing",
591
+ completed: "completed",
592
+ error: "error"
593
+ },
594
+ azure: {
595
+ succeeded: "completed",
596
+ running: "processing",
597
+ notstarted: "queued",
598
+ failed: "error"
599
+ },
600
+ speechmatics: {
601
+ running: "processing",
602
+ done: "completed",
603
+ rejected: "error",
604
+ expired: "error"
605
+ }
606
+ };
607
+ function normalizeStatus(providerStatus, provider, defaultStatus = "queued") {
608
+ if (!providerStatus) return defaultStatus;
609
+ const mapping = STATUS_MAPPINGS[provider];
610
+ const statusKey = providerStatus.toString().toLowerCase();
611
+ if (statusKey in mapping) {
612
+ return mapping[statusKey];
613
+ }
614
+ for (const [key, value] of Object.entries(mapping)) {
615
+ if (statusKey.includes(key)) {
616
+ return value;
617
+ }
618
+ }
619
+ return defaultStatus;
620
+ }
621
+
349
622
  // src/generated/gladia/api/gladiaControlAPI.ts
350
623
  var import_axios = __toESM(require("axios"));
351
624
 
@@ -1466,21 +1739,10 @@ var GladiaAdapter = class extends BaseAdapter {
1466
1739
  }
1467
1740
  /**
1468
1741
  * Get axios config for generated API client functions
1469
- * Configures headers and base URL
1742
+ * Configures headers and base URL using Gladia's x-gladia-key header
1470
1743
  */
1471
1744
  getAxiosConfig() {
1472
- if (!this.config) {
1473
- throw new Error("Adapter not initialized. Call initialize() first.");
1474
- }
1475
- return {
1476
- baseURL: this.config.baseUrl || this.baseUrl,
1477
- timeout: this.config.timeout || 6e4,
1478
- headers: {
1479
- "x-gladia-key": this.config.apiKey,
1480
- "Content-Type": "application/json",
1481
- ...this.config.headers
1482
- }
1483
- };
1745
+ return super.getAxiosConfig("x-gladia-key");
1484
1746
  }
1485
1747
  /**
1486
1748
  * Submit audio for transcription
@@ -1647,29 +1909,13 @@ var GladiaAdapter = class extends BaseAdapter {
1647
1909
  * Normalize Gladia response to unified format
1648
1910
  */
1649
1911
  normalizeResponse(response) {
1650
- let status;
1651
- switch (response.status) {
1652
- case "queued":
1653
- status = "queued";
1654
- break;
1655
- case "processing":
1656
- status = "processing";
1657
- break;
1658
- case "done":
1659
- status = "completed";
1660
- break;
1661
- case "error":
1662
- status = "error";
1663
- break;
1664
- default:
1665
- status = "queued";
1666
- }
1912
+ const status = normalizeStatus(response.status, "gladia");
1667
1913
  if (response.status === "error") {
1668
1914
  return {
1669
1915
  success: false,
1670
1916
  provider: this.name,
1671
1917
  error: {
1672
- code: response.error_code?.toString() || "TRANSCRIPTION_ERROR",
1918
+ code: response.error_code?.toString() || ERROR_CODES.TRANSCRIPTION_ERROR,
1673
1919
  message: "Transcription failed",
1674
1920
  statusCode: response.error_code || void 0
1675
1921
  },
@@ -1709,22 +1955,11 @@ var GladiaAdapter = class extends BaseAdapter {
1709
1955
  * Extract speaker information from Gladia response
1710
1956
  */
1711
1957
  extractSpeakers(transcription) {
1712
- if (!transcription?.utterances) {
1713
- return void 0;
1714
- }
1715
- const speakerSet = /* @__PURE__ */ new Set();
1716
- transcription.utterances.forEach((utterance) => {
1717
- if (utterance.speaker !== void 0) {
1718
- speakerSet.add(utterance.speaker);
1719
- }
1720
- });
1721
- if (speakerSet.size === 0) {
1722
- return void 0;
1723
- }
1724
- return Array.from(speakerSet).map((speakerId) => ({
1725
- id: speakerId.toString(),
1726
- label: `Speaker ${speakerId}`
1727
- }));
1958
+ return extractSpeakersFromUtterances(
1959
+ transcription?.utterances,
1960
+ (utterance) => utterance.speaker,
1961
+ (id) => `Speaker ${id}`
1962
+ );
1728
1963
  }
1729
1964
  /**
1730
1965
  * Extract word timestamps from Gladia response
@@ -1735,14 +1970,17 @@ var GladiaAdapter = class extends BaseAdapter {
1735
1970
  }
1736
1971
  const allWords = transcription.utterances.flatMap(
1737
1972
  (utterance) => utterance.words.map((word) => ({
1738
- text: word.word,
1739
- start: word.start,
1740
- end: word.end,
1741
- confidence: word.confidence,
1742
- speaker: utterance.speaker?.toString()
1973
+ word,
1974
+ speaker: utterance.speaker
1743
1975
  }))
1744
1976
  );
1745
- return allWords.length > 0 ? allWords : void 0;
1977
+ return extractWords(allWords, (item) => ({
1978
+ text: item.word.word,
1979
+ start: item.word.start,
1980
+ end: item.word.end,
1981
+ confidence: item.word.confidence,
1982
+ speaker: item.speaker?.toString()
1983
+ }));
1746
1984
  }
1747
1985
  /**
1748
1986
  * Extract utterances from Gladia response
@@ -1768,38 +2006,6 @@ var GladiaAdapter = class extends BaseAdapter {
1768
2006
  /**
1769
2007
  * Poll for transcription completion
1770
2008
  */
1771
- async pollForCompletion(jobId, maxAttempts = 60, intervalMs = 2e3) {
1772
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
1773
- const result = await this.getTranscript(jobId);
1774
- if (!result.success) {
1775
- return result;
1776
- }
1777
- const status = result.data?.status;
1778
- if (status === "completed") {
1779
- return result;
1780
- }
1781
- if (status === "error") {
1782
- return {
1783
- success: false,
1784
- provider: this.name,
1785
- error: {
1786
- code: "TRANSCRIPTION_ERROR",
1787
- message: "Transcription failed"
1788
- },
1789
- raw: result.raw
1790
- };
1791
- }
1792
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
1793
- }
1794
- return {
1795
- success: false,
1796
- provider: this.name,
1797
- error: {
1798
- code: "POLLING_TIMEOUT",
1799
- message: `Transcription did not complete after ${maxAttempts} attempts`
1800
- }
1801
- };
1802
- }
1803
2009
  /**
1804
2010
  * Stream audio for real-time transcription
1805
2011
  *
@@ -1843,14 +2049,12 @@ var GladiaAdapter = class extends BaseAdapter {
1843
2049
  this.validateConfig();
1844
2050
  let validatedSampleRate;
1845
2051
  if (options?.sampleRate) {
1846
- const validRates = Object.values(StreamingSupportedSampleRateEnum);
1847
- const isValidRate = validRates.some((rate) => rate === options.sampleRate);
1848
- if (!isValidRate) {
1849
- throw new Error(
1850
- `Gladia does not support sample rate ${options.sampleRate} Hz. Supported rates (from OpenAPI spec): ${validRates.join(", ")} Hz`
1851
- );
1852
- }
1853
- validatedSampleRate = options.sampleRate;
2052
+ validatedSampleRate = validateEnumValue(
2053
+ options.sampleRate,
2054
+ StreamingSupportedSampleRateEnum,
2055
+ "sample rate",
2056
+ "Gladia"
2057
+ );
1854
2058
  }
1855
2059
  const streamingRequest = {
1856
2060
  encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
@@ -1872,9 +2076,8 @@ var GladiaAdapter = class extends BaseAdapter {
1872
2076
  const { id, url: wsUrl } = initResponse.data;
1873
2077
  const ws = new import_ws.default(wsUrl);
1874
2078
  let sessionStatus = "connecting";
1875
- ws.on("open", () => {
1876
- sessionStatus = "open";
1877
- callbacks?.onOpen?.();
2079
+ setupWebSocketHandlers(ws, callbacks, (status) => {
2080
+ sessionStatus = status;
1878
2081
  });
1879
2082
  ws.on("message", (data) => {
1880
2083
  try {
@@ -1919,48 +2122,20 @@ var GladiaAdapter = class extends BaseAdapter {
1919
2122
  }
1920
2123
  } catch (error) {
1921
2124
  callbacks?.onError?.({
1922
- code: "PARSE_ERROR",
2125
+ code: ERROR_CODES.PARSE_ERROR,
1923
2126
  message: "Failed to parse WebSocket message",
1924
2127
  details: error
1925
2128
  });
1926
2129
  }
1927
2130
  });
1928
- ws.on("error", (error) => {
1929
- callbacks?.onError?.({
1930
- code: "WEBSOCKET_ERROR",
1931
- message: error.message,
1932
- details: error
1933
- });
1934
- });
1935
- ws.on("close", (code, reason) => {
1936
- sessionStatus = "closed";
1937
- callbacks?.onClose?.(code, reason.toString());
1938
- });
1939
- await new Promise((resolve, reject) => {
1940
- const timeout = setTimeout(() => {
1941
- reject(new Error("WebSocket connection timeout"));
1942
- }, 1e4);
1943
- ws.once("open", () => {
1944
- clearTimeout(timeout);
1945
- resolve();
1946
- });
1947
- ws.once("error", (error) => {
1948
- clearTimeout(timeout);
1949
- reject(error);
1950
- });
1951
- });
2131
+ await waitForWebSocketOpen(ws);
1952
2132
  return {
1953
2133
  id,
1954
2134
  provider: this.name,
1955
2135
  createdAt: /* @__PURE__ */ new Date(),
1956
2136
  getStatus: () => sessionStatus,
1957
2137
  sendAudio: async (chunk) => {
1958
- if (sessionStatus !== "open") {
1959
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
1960
- }
1961
- if (ws.readyState !== import_ws.default.OPEN) {
1962
- throw new Error("WebSocket is not open");
1963
- }
2138
+ validateSessionForAudio(sessionStatus, ws.readyState, import_ws.default.OPEN);
1964
2139
  ws.send(chunk.data);
1965
2140
  if (chunk.isLast) {
1966
2141
  ws.send(
@@ -1982,18 +2157,8 @@ var GladiaAdapter = class extends BaseAdapter {
1982
2157
  })
1983
2158
  );
1984
2159
  }
1985
- return new Promise((resolve) => {
1986
- const timeout = setTimeout(() => {
1987
- ws.terminate();
1988
- resolve();
1989
- }, 5e3);
1990
- ws.close();
1991
- ws.once("close", () => {
1992
- clearTimeout(timeout);
1993
- sessionStatus = "closed";
1994
- resolve();
1995
- });
1996
- });
2160
+ await closeWebSocket(ws);
2161
+ sessionStatus = "closed";
1997
2162
  }
1998
2163
  };
1999
2164
  }
@@ -2347,26 +2512,16 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2347
2512
  entityDetection: true,
2348
2513
  piiRedaction: true
2349
2514
  };
2350
- this.baseUrl = "https://api.assemblyai.com/v2";
2515
+ this.baseUrl = "https://api.assemblyai.com";
2516
+ // Generated functions already include /v2 path
2351
2517
  this.wsBaseUrl = "wss://api.assemblyai.com/v2/realtime/ws";
2352
2518
  }
2353
2519
  /**
2354
2520
  * Get axios config for generated API client functions
2355
- * Configures headers and base URL
2521
+ * Configures headers and base URL using authorization header
2356
2522
  */
2357
2523
  getAxiosConfig() {
2358
- if (!this.config) {
2359
- throw new Error("Adapter not initialized. Call initialize() first.");
2360
- }
2361
- return {
2362
- baseURL: this.config.baseUrl || this.baseUrl,
2363
- timeout: this.config.timeout || 6e4,
2364
- headers: {
2365
- authorization: this.config.apiKey,
2366
- "Content-Type": "application/json",
2367
- ...this.config.headers
2368
- }
2369
- };
2524
+ return super.getAxiosConfig("authorization");
2370
2525
  }
2371
2526
  /**
2372
2527
  * Submit audio for transcription
@@ -2644,41 +2799,6 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2644
2799
  }))
2645
2800
  }));
2646
2801
  }
2647
- /**
2648
- * Poll for transcription completion
2649
- */
2650
- async pollForCompletion(transcriptId, maxAttempts = 60, intervalMs = 3e3) {
2651
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
2652
- const result = await this.getTranscript(transcriptId);
2653
- if (!result.success) {
2654
- return result;
2655
- }
2656
- const status = result.data?.status;
2657
- if (status === "completed") {
2658
- return result;
2659
- }
2660
- if (status === "error") {
2661
- return {
2662
- success: false,
2663
- provider: this.name,
2664
- error: {
2665
- code: "TRANSCRIPTION_ERROR",
2666
- message: "Transcription failed"
2667
- },
2668
- raw: result.raw
2669
- };
2670
- }
2671
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
2672
- }
2673
- return {
2674
- success: false,
2675
- provider: this.name,
2676
- error: {
2677
- code: "POLLING_TIMEOUT",
2678
- message: `Transcription did not complete after ${maxAttempts} attempts`
2679
- }
2680
- };
2681
- }
2682
2802
  /**
2683
2803
  * Stream audio for real-time transcription
2684
2804
  *
@@ -3320,7 +3440,24 @@ function createDeepgramAdapter(config) {
3320
3440
  }
3321
3441
 
3322
3442
  // src/adapters/azure-stt-adapter.ts
3443
+ var import_axios5 = __toESM(require("axios"));
3444
+
3445
+ // src/generated/azure/api/speechServicesAPIV31.ts
3323
3446
  var import_axios4 = __toESM(require("axios"));
3447
+ var transcriptionsCreate = (transcription, options) => {
3448
+ return import_axios4.default.post("/transcriptions", transcription, options);
3449
+ };
3450
+ var transcriptionsGet = (id, options) => {
3451
+ return import_axios4.default.get(`/transcriptions/${id}`, options);
3452
+ };
3453
+ var transcriptionsListFiles = (id, params, options) => {
3454
+ return import_axios4.default.get(`/transcriptions/${id}/files`, {
3455
+ ...options,
3456
+ params: { ...params, ...options?.params }
3457
+ });
3458
+ };
3459
+
3460
+ // src/adapters/azure-stt-adapter.ts
3324
3461
  var AzureSTTAdapter = class extends BaseAdapter {
3325
3462
  constructor() {
3326
3463
  super(...arguments);
@@ -3337,20 +3474,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
3337
3474
  entityDetection: false,
3338
3475
  piiRedaction: false
3339
3476
  };
3477
+ this.baseUrl = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.1";
3340
3478
  }
3479
+ // Default, overridden in initialize()
3341
3480
  initialize(config) {
3342
3481
  super.initialize(config);
3343
3482
  this.region = config.region || "eastus";
3344
3483
  this.baseUrl = config.baseUrl || `https://${this.region}.api.cognitive.microsoft.com/speechtotext/v3.1`;
3345
- this.client = import_axios4.default.create({
3346
- baseURL: this.baseUrl,
3347
- timeout: config.timeout || 6e4,
3348
- headers: {
3349
- "Ocp-Apim-Subscription-Key": config.apiKey,
3350
- "Content-Type": "application/json",
3351
- ...config.headers
3352
- }
3353
- });
3484
+ }
3485
+ /**
3486
+ * Get axios config for generated API client functions
3487
+ * Configures headers and base URL using Azure subscription key
3488
+ */
3489
+ getAxiosConfig() {
3490
+ return super.getAxiosConfig("Ocp-Apim-Subscription-Key");
3354
3491
  }
3355
3492
  /**
3356
3493
  * Submit audio for transcription
@@ -3382,9 +3519,9 @@ var AzureSTTAdapter = class extends BaseAdapter {
3382
3519
  contentUrls: [audio.url],
3383
3520
  properties: this.buildTranscriptionProperties(options)
3384
3521
  };
3385
- const response = await this.client.post(
3386
- "/transcriptions",
3387
- transcriptionRequest
3522
+ const response = await transcriptionsCreate(
3523
+ transcriptionRequest,
3524
+ this.getAxiosConfig()
3388
3525
  );
3389
3526
  const transcription = response.data;
3390
3527
  return {
@@ -3415,9 +3552,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3415
3552
  async getTranscript(transcriptId) {
3416
3553
  this.validateConfig();
3417
3554
  try {
3418
- const statusResponse = await this.client.get(
3419
- `/transcriptions/${transcriptId}`
3420
- );
3555
+ const statusResponse = await transcriptionsGet(transcriptId, this.getAxiosConfig());
3421
3556
  const transcription = statusResponse.data;
3422
3557
  const status = this.normalizeStatus(transcription.status);
3423
3558
  if (status !== "completed") {
@@ -3445,7 +3580,11 @@ var AzureSTTAdapter = class extends BaseAdapter {
3445
3580
  raw: transcription
3446
3581
  };
3447
3582
  }
3448
- const filesResponse = await this.client.get(transcription.links.files);
3583
+ const filesResponse = await transcriptionsListFiles(
3584
+ transcriptId,
3585
+ void 0,
3586
+ this.getAxiosConfig()
3587
+ );
3449
3588
  const files = filesResponse.data?.values || [];
3450
3589
  const resultFile = files.find((file) => file.kind === "Transcription");
3451
3590
  if (!resultFile?.links?.contentUrl) {
@@ -3459,7 +3598,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3459
3598
  raw: transcription
3460
3599
  };
3461
3600
  }
3462
- const contentResponse = await import_axios4.default.get(resultFile.links.contentUrl);
3601
+ const contentResponse = await import_axios5.default.get(resultFile.links.contentUrl);
3463
3602
  const transcriptionData = contentResponse.data;
3464
3603
  return this.normalizeResponse(transcription, transcriptionData);
3465
3604
  } catch (error) {
@@ -3558,7 +3697,57 @@ function createAzureSTTAdapter(config) {
3558
3697
  }
3559
3698
 
3560
3699
  // src/adapters/openai-whisper-adapter.ts
3561
- var import_axios5 = __toESM(require("axios"));
3700
+ var import_axios7 = __toESM(require("axios"));
3701
+
3702
+ // src/generated/openai/api/openAIAPI.ts
3703
+ var import_axios6 = __toESM(require("axios"));
3704
+ var createTranscription = (createTranscriptionRequest, options) => {
3705
+ const formData = new FormData();
3706
+ formData.append("file", createTranscriptionRequest.file);
3707
+ formData.append("model", createTranscriptionRequest.model);
3708
+ if (createTranscriptionRequest.language !== void 0) {
3709
+ formData.append("language", createTranscriptionRequest.language);
3710
+ }
3711
+ if (createTranscriptionRequest.prompt !== void 0) {
3712
+ formData.append("prompt", createTranscriptionRequest.prompt);
3713
+ }
3714
+ if (createTranscriptionRequest.response_format !== void 0) {
3715
+ formData.append("response_format", createTranscriptionRequest.response_format);
3716
+ }
3717
+ if (createTranscriptionRequest.temperature !== void 0) {
3718
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
3719
+ }
3720
+ if (createTranscriptionRequest.include !== void 0) {
3721
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
3722
+ }
3723
+ if (createTranscriptionRequest.timestamp_granularities !== void 0) {
3724
+ createTranscriptionRequest.timestamp_granularities.forEach(
3725
+ (value) => formData.append("timestamp_granularities", value)
3726
+ );
3727
+ }
3728
+ if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
3729
+ formData.append("stream", createTranscriptionRequest.stream.toString());
3730
+ }
3731
+ if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
3732
+ formData.append(
3733
+ "chunking_strategy",
3734
+ typeof createTranscriptionRequest.chunking_strategy === "object" ? JSON.stringify(createTranscriptionRequest.chunking_strategy) : createTranscriptionRequest.chunking_strategy
3735
+ );
3736
+ }
3737
+ if (createTranscriptionRequest.known_speaker_names !== void 0) {
3738
+ createTranscriptionRequest.known_speaker_names.forEach(
3739
+ (value) => formData.append("known_speaker_names", value)
3740
+ );
3741
+ }
3742
+ if (createTranscriptionRequest.known_speaker_references !== void 0) {
3743
+ createTranscriptionRequest.known_speaker_references.forEach(
3744
+ (value) => formData.append("known_speaker_references", value)
3745
+ );
3746
+ }
3747
+ return import_axios6.default.post("/audio/transcriptions", formData, options);
3748
+ };
3749
+
3750
+ // src/adapters/openai-whisper-adapter.ts
3562
3751
  var OpenAIWhisperAdapter = class extends BaseAdapter {
3563
3752
  constructor() {
3564
3753
  super(...arguments);
@@ -3580,19 +3769,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3580
3769
  };
3581
3770
  this.baseUrl = "https://api.openai.com/v1";
3582
3771
  }
3583
- initialize(config) {
3584
- super.initialize(config);
3585
- this.baseUrl = config.baseUrl || this.baseUrl;
3586
- this.client = import_axios5.default.create({
3587
- baseURL: this.baseUrl,
3588
- timeout: config.timeout || 12e4,
3589
- // 2 minutes default (audio processing can take time)
3590
- headers: {
3591
- Authorization: `Bearer ${config.apiKey}`,
3592
- "Content-Type": "multipart/form-data",
3593
- ...config.headers
3594
- }
3595
- });
3772
+ /**
3773
+ * Get axios config for generated API client functions
3774
+ * Configures headers and base URL using Bearer token authorization
3775
+ */
3776
+ getAxiosConfig() {
3777
+ return super.getAxiosConfig("Authorization", (apiKey) => `Bearer ${apiKey}`);
3596
3778
  }
3597
3779
  /**
3598
3780
  * Submit audio for transcription
@@ -3614,7 +3796,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3614
3796
  let audioData;
3615
3797
  let fileName = "audio.mp3";
3616
3798
  if (audio.type === "url") {
3617
- const response2 = await import_axios5.default.get(audio.url, {
3799
+ const response2 = await import_axios7.default.get(audio.url, {
3618
3800
  responseType: "arraybuffer"
3619
3801
  });
3620
3802
  audioData = Buffer.from(response2.data);
@@ -3639,40 +3821,37 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3639
3821
  const model = this.selectModel(options);
3640
3822
  const isDiarization = model === "gpt-4o-transcribe-diarize";
3641
3823
  const needsWords = options?.wordTimestamps === true;
3642
- const requestBody = {
3824
+ const request = {
3643
3825
  file: audioData,
3826
+ // Generated type expects Blob
3644
3827
  model
3645
3828
  };
3646
3829
  if (options?.language) {
3647
- requestBody.language = options.language;
3830
+ request.language = options.language;
3648
3831
  }
3649
3832
  if (options?.metadata?.prompt) {
3650
- requestBody.prompt = options.metadata.prompt;
3833
+ request.prompt = options.metadata.prompt;
3651
3834
  }
3652
3835
  if (options?.metadata?.temperature !== void 0) {
3653
- requestBody.temperature = options.metadata.temperature;
3836
+ request.temperature = options.metadata.temperature;
3654
3837
  }
3655
3838
  if (isDiarization) {
3656
- requestBody.response_format = "diarized_json";
3839
+ request.response_format = "diarized_json";
3657
3840
  if (options?.metadata?.knownSpeakerNames) {
3658
- requestBody["known_speaker_names"] = options.metadata.knownSpeakerNames;
3841
+ request.known_speaker_names = options.metadata.knownSpeakerNames;
3659
3842
  }
3660
3843
  if (options?.metadata?.knownSpeakerReferences) {
3661
- requestBody["known_speaker_references"] = options.metadata.knownSpeakerReferences;
3844
+ request.known_speaker_references = options.metadata.knownSpeakerReferences;
3662
3845
  }
3663
3846
  } else if (needsWords || options?.diarization) {
3664
- requestBody.response_format = "verbose_json";
3847
+ request.response_format = "verbose_json";
3665
3848
  if (needsWords) {
3666
- requestBody.timestamp_granularities = ["word", "segment"];
3849
+ request.timestamp_granularities = ["word", "segment"];
3667
3850
  }
3668
3851
  } else {
3669
- requestBody.response_format = "json";
3852
+ request.response_format = "json";
3670
3853
  }
3671
- const response = await this.client.post("/audio/transcriptions", requestBody, {
3672
- headers: {
3673
- "Content-Type": "multipart/form-data"
3674
- }
3675
- });
3854
+ const response = await createTranscription(request, this.getAxiosConfig());
3676
3855
  return this.normalizeResponse(response.data, model, isDiarization);
3677
3856
  } catch (error) {
3678
3857
  return this.createErrorResponse(error);
@@ -3793,7 +3972,7 @@ function createOpenAIWhisperAdapter(config) {
3793
3972
  }
3794
3973
 
3795
3974
  // src/adapters/speechmatics-adapter.ts
3796
- var import_axios6 = __toESM(require("axios"));
3975
+ var import_axios8 = __toESM(require("axios"));
3797
3976
  var SpeechmaticsAdapter = class extends BaseAdapter {
3798
3977
  constructor() {
3799
3978
  super(...arguments);
@@ -3815,7 +3994,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
3815
3994
  initialize(config) {
3816
3995
  super.initialize(config);
3817
3996
  this.baseUrl = config.baseUrl || this.baseUrl;
3818
- this.client = import_axios6.default.create({
3997
+ this.client = import_axios8.default.create({
3819
3998
  baseURL: this.baseUrl,
3820
3999
  timeout: config.timeout || 12e4,
3821
4000
  headers: {