voice-router-dev 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -210,24 +210,102 @@ function createVoiceRouter(config, adapters) {
210
210
  return router;
211
211
  }
212
212
 
213
+ // src/constants/defaults.ts
214
+ var DEFAULT_TIMEOUTS = {
215
+ /** Standard HTTP request timeout for API calls (60 seconds) */
216
+ HTTP_REQUEST: 6e4,
217
+ /** Audio processing timeout for long audio files (120 seconds) */
218
+ AUDIO_PROCESSING: 12e4,
219
+ /** WebSocket connection establishment timeout (10 seconds) */
220
+ WS_CONNECTION: 1e4,
221
+ /** WebSocket graceful close timeout (5 seconds) */
222
+ WS_CLOSE: 5e3
223
+ };
224
+ var DEFAULT_POLLING = {
225
+ /** Maximum number of polling attempts before timing out */
226
+ MAX_ATTEMPTS: 60,
227
+ /** Standard interval between polling attempts (2 seconds) */
228
+ INTERVAL_MS: 2e3,
229
+ /** Slower interval for long-running jobs (3 seconds) */
230
+ SLOW_INTERVAL_MS: 3e3
231
+ };
232
+
233
+ // src/utils/errors.ts
234
+ var ERROR_CODES = {
235
+ /** Failed to parse API response or WebSocket message */
236
+ PARSE_ERROR: "PARSE_ERROR",
237
+ /** WebSocket connection error */
238
+ WEBSOCKET_ERROR: "WEBSOCKET_ERROR",
239
+ /** Async transcription job did not complete within timeout */
240
+ POLLING_TIMEOUT: "POLLING_TIMEOUT",
241
+ /** Transcription processing failed on provider side */
242
+ TRANSCRIPTION_ERROR: "TRANSCRIPTION_ERROR",
243
+ /** Connection attempt timed out */
244
+ CONNECTION_TIMEOUT: "CONNECTION_TIMEOUT",
245
+ /** Invalid input provided to API */
246
+ INVALID_INPUT: "INVALID_INPUT",
247
+ /** Requested operation not supported by provider */
248
+ NOT_SUPPORTED: "NOT_SUPPORTED",
249
+ /** No transcription results available */
250
+ NO_RESULTS: "NO_RESULTS",
251
+ /** Unspecified or unknown error */
252
+ UNKNOWN_ERROR: "UNKNOWN_ERROR"
253
+ };
254
+ var ERROR_MESSAGES = {
255
+ PARSE_ERROR: "Failed to parse response data",
256
+ WEBSOCKET_ERROR: "WebSocket connection error",
257
+ POLLING_TIMEOUT: "Transcription did not complete within timeout period",
258
+ TRANSCRIPTION_ERROR: "Transcription processing failed",
259
+ CONNECTION_TIMEOUT: "Connection attempt timed out",
260
+ INVALID_INPUT: "Invalid input provided",
261
+ NOT_SUPPORTED: "Operation not supported by this provider",
262
+ NO_RESULTS: "No transcription results available",
263
+ UNKNOWN_ERROR: "An unknown error occurred"
264
+ };
265
+ function createError(code, customMessage, details) {
266
+ return {
267
+ code,
268
+ message: customMessage || ERROR_MESSAGES[code],
269
+ details
270
+ };
271
+ }
272
+
213
273
  // src/adapters/base-adapter.ts
214
274
  var BaseAdapter = class {
215
275
  initialize(config) {
216
276
  this.config = config;
217
277
  }
218
278
  /**
219
- * Helper method to create error responses
279
+ * Helper method to create error responses with stack traces
280
+ *
281
+ * @param error - Error object or unknown error
282
+ * @param statusCode - Optional HTTP status code
283
+ * @param code - Optional error code (defaults to extracted or UNKNOWN_ERROR)
220
284
  */
221
- createErrorResponse(error, statusCode) {
285
+ createErrorResponse(error, statusCode, code) {
222
286
  const err = error;
287
+ const httpStatus = statusCode || err.statusCode || err.response?.status;
288
+ const httpStatusText = err.response?.statusText;
289
+ const responseData = err.response?.data;
223
290
  return {
224
291
  success: false,
225
292
  provider: this.name,
226
293
  error: {
227
- code: err.code || "UNKNOWN_ERROR",
294
+ code: code || err.code || ERROR_CODES.UNKNOWN_ERROR,
228
295
  message: err.message || "An unknown error occurred",
229
- statusCode: statusCode || err.statusCode,
230
- details: error
296
+ statusCode: httpStatus,
297
+ details: {
298
+ // Include full error object
299
+ error,
300
+ // Include stack trace if available
301
+ stack: err.stack,
302
+ // Include HTTP response details
303
+ httpStatus,
304
+ httpStatusText,
305
+ responseData,
306
+ // Include provider name for debugging
307
+ provider: this.name
308
+ }
231
309
  }
232
310
  };
233
311
  }
@@ -242,6 +320,64 @@ var BaseAdapter = class {
242
320
  throw new Error(`API key is required for ${this.name} provider`);
243
321
  }
244
322
  }
323
+ /**
324
+ * Build axios config for generated API client functions
325
+ *
326
+ * @param authHeaderName - Header name for API key (e.g., "Authorization", "x-gladia-key")
327
+ * @param authHeaderValue - Optional function to format auth header value (defaults to raw API key)
328
+ * @returns Axios config object
329
+ */
330
+ getAxiosConfig(authHeaderName = "Authorization", authHeaderValue) {
331
+ this.validateConfig();
332
+ const authValue = authHeaderValue ? authHeaderValue(this.config.apiKey) : this.config.apiKey;
333
+ return {
334
+ baseURL: this.config.baseUrl || this.baseUrl,
335
+ timeout: this.config.timeout || DEFAULT_TIMEOUTS.HTTP_REQUEST,
336
+ headers: {
337
+ [authHeaderName]: authValue,
338
+ "Content-Type": "application/json",
339
+ ...this.config.headers
340
+ }
341
+ };
342
+ }
343
+ /**
344
+ * Generic polling helper for async transcription jobs
345
+ *
346
+ * Polls getTranscript() until job completes or times out.
347
+ *
348
+ * @param transcriptId - Job/transcript ID to poll
349
+ * @param options - Polling configuration
350
+ * @returns Final transcription result
351
+ */
352
+ async pollForCompletion(transcriptId, options) {
353
+ const { maxAttempts = DEFAULT_POLLING.MAX_ATTEMPTS, intervalMs = DEFAULT_POLLING.INTERVAL_MS } = options || {};
354
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
355
+ const result = await this.getTranscript(transcriptId);
356
+ if (!result.success) {
357
+ return result;
358
+ }
359
+ const status = result.data?.status;
360
+ if (status === "completed") {
361
+ return result;
362
+ }
363
+ if (status === "error") {
364
+ return this.createErrorResponse(
365
+ new Error("Transcription failed"),
366
+ void 0,
367
+ ERROR_CODES.TRANSCRIPTION_ERROR
368
+ );
369
+ }
370
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
371
+ }
372
+ return {
373
+ success: false,
374
+ provider: this.name,
375
+ error: {
376
+ code: ERROR_CODES.POLLING_TIMEOUT,
377
+ message: `Transcription did not complete after ${maxAttempts} attempts`
378
+ }
379
+ };
380
+ }
245
381
  };
246
382
 
247
383
  // src/adapters/gladia-adapter.ts
@@ -288,6 +424,143 @@ function mapEncodingToProvider(unifiedEncoding, provider) {
288
424
  return providerEncoding;
289
425
  }
290
426
 
427
+ // src/utils/websocket-helpers.ts
428
+ function waitForWebSocketOpen(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CONNECTION) {
429
+ return new Promise((resolve, reject) => {
430
+ const timeout = setTimeout(() => {
431
+ reject(new Error("WebSocket connection timeout"));
432
+ }, timeoutMs);
433
+ ws.once("open", () => {
434
+ clearTimeout(timeout);
435
+ resolve();
436
+ });
437
+ ws.once("error", (error) => {
438
+ clearTimeout(timeout);
439
+ reject(error);
440
+ });
441
+ });
442
+ }
443
+ function closeWebSocket(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CLOSE) {
444
+ return new Promise((resolve) => {
445
+ const timeout = setTimeout(() => {
446
+ ws.terminate();
447
+ resolve();
448
+ }, timeoutMs);
449
+ ws.close();
450
+ ws.once("close", () => {
451
+ clearTimeout(timeout);
452
+ resolve();
453
+ });
454
+ });
455
+ }
456
+ function setupWebSocketHandlers(ws, callbacks, setSessionStatus) {
457
+ ws.on("open", () => {
458
+ setSessionStatus("open");
459
+ callbacks?.onOpen?.();
460
+ });
461
+ ws.on("error", (error) => {
462
+ callbacks?.onError?.(createError(ERROR_CODES.WEBSOCKET_ERROR, error.message, error));
463
+ });
464
+ ws.on("close", (code, reason) => {
465
+ setSessionStatus("closed");
466
+ callbacks?.onClose?.(code, reason.toString());
467
+ });
468
+ }
469
+ function validateSessionForAudio(sessionStatus, wsReadyState, WebSocketOpen) {
470
+ if (sessionStatus !== "open") {
471
+ throw new Error(`Cannot send audio: session is ${sessionStatus}`);
472
+ }
473
+ if (wsReadyState !== WebSocketOpen) {
474
+ throw new Error("WebSocket is not open");
475
+ }
476
+ }
477
+
478
+ // src/utils/validation.ts
479
+ function validateEnumValue(value, enumType, fieldName, provider) {
480
+ const validValues = Object.values(enumType);
481
+ const isValid = validValues.some((v) => v === value);
482
+ if (!isValid) {
483
+ throw new Error(
484
+ `${provider} does not support ${fieldName} '${value}'. Supported values (from OpenAPI spec): ${validValues.join(", ")}`
485
+ );
486
+ }
487
+ return value;
488
+ }
489
+
490
+ // src/utils/transcription-helpers.ts
491
+ function extractSpeakersFromUtterances(utterances, getSpeakerId, formatLabel) {
492
+ if (!utterances || utterances.length === 0) {
493
+ return void 0;
494
+ }
495
+ const speakerSet = /* @__PURE__ */ new Set();
496
+ utterances.forEach((utterance) => {
497
+ const speakerId = getSpeakerId(utterance);
498
+ if (speakerId !== void 0) {
499
+ speakerSet.add(String(speakerId));
500
+ }
501
+ });
502
+ if (speakerSet.size === 0) {
503
+ return void 0;
504
+ }
505
+ return Array.from(speakerSet).map((speakerId) => ({
506
+ id: speakerId,
507
+ label: formatLabel ? formatLabel(speakerId) : `Speaker ${speakerId}`
508
+ }));
509
+ }
510
+ function extractWords(words, mapper) {
511
+ if (!words || words.length === 0) {
512
+ return void 0;
513
+ }
514
+ const normalizedWords = words.map(mapper);
515
+ return normalizedWords.length > 0 ? normalizedWords : void 0;
516
+ }
517
+ var STATUS_MAPPINGS = {
518
+ gladia: {
519
+ queued: "queued",
520
+ processing: "processing",
521
+ done: "completed",
522
+ error: "error"
523
+ },
524
+ assemblyai: {
525
+ queued: "queued",
526
+ processing: "processing",
527
+ completed: "completed",
528
+ error: "error"
529
+ },
530
+ deepgram: {
531
+ queued: "queued",
532
+ processing: "processing",
533
+ completed: "completed",
534
+ error: "error"
535
+ },
536
+ azure: {
537
+ succeeded: "completed",
538
+ running: "processing",
539
+ notstarted: "queued",
540
+ failed: "error"
541
+ },
542
+ speechmatics: {
543
+ running: "processing",
544
+ done: "completed",
545
+ rejected: "error",
546
+ expired: "error"
547
+ }
548
+ };
549
+ function normalizeStatus(providerStatus, provider, defaultStatus = "queued") {
550
+ if (!providerStatus) return defaultStatus;
551
+ const mapping = STATUS_MAPPINGS[provider];
552
+ const statusKey = providerStatus.toString().toLowerCase();
553
+ if (statusKey in mapping) {
554
+ return mapping[statusKey];
555
+ }
556
+ for (const [key, value] of Object.entries(mapping)) {
557
+ if (statusKey.includes(key)) {
558
+ return value;
559
+ }
560
+ }
561
+ return defaultStatus;
562
+ }
563
+
291
564
  // src/generated/gladia/api/gladiaControlAPI.ts
292
565
  import axios from "axios";
293
566
 
@@ -1408,21 +1681,10 @@ var GladiaAdapter = class extends BaseAdapter {
1408
1681
  }
1409
1682
  /**
1410
1683
  * Get axios config for generated API client functions
1411
- * Configures headers and base URL
1684
+ * Configures headers and base URL using Gladia's x-gladia-key header
1412
1685
  */
1413
1686
  getAxiosConfig() {
1414
- if (!this.config) {
1415
- throw new Error("Adapter not initialized. Call initialize() first.");
1416
- }
1417
- return {
1418
- baseURL: this.config.baseUrl || this.baseUrl,
1419
- timeout: this.config.timeout || 6e4,
1420
- headers: {
1421
- "x-gladia-key": this.config.apiKey,
1422
- "Content-Type": "application/json",
1423
- ...this.config.headers
1424
- }
1425
- };
1687
+ return super.getAxiosConfig("x-gladia-key");
1426
1688
  }
1427
1689
  /**
1428
1690
  * Submit audio for transcription
@@ -1589,29 +1851,13 @@ var GladiaAdapter = class extends BaseAdapter {
1589
1851
  * Normalize Gladia response to unified format
1590
1852
  */
1591
1853
  normalizeResponse(response) {
1592
- let status;
1593
- switch (response.status) {
1594
- case "queued":
1595
- status = "queued";
1596
- break;
1597
- case "processing":
1598
- status = "processing";
1599
- break;
1600
- case "done":
1601
- status = "completed";
1602
- break;
1603
- case "error":
1604
- status = "error";
1605
- break;
1606
- default:
1607
- status = "queued";
1608
- }
1854
+ const status = normalizeStatus(response.status, "gladia");
1609
1855
  if (response.status === "error") {
1610
1856
  return {
1611
1857
  success: false,
1612
1858
  provider: this.name,
1613
1859
  error: {
1614
- code: response.error_code?.toString() || "TRANSCRIPTION_ERROR",
1860
+ code: response.error_code?.toString() || ERROR_CODES.TRANSCRIPTION_ERROR,
1615
1861
  message: "Transcription failed",
1616
1862
  statusCode: response.error_code || void 0
1617
1863
  },
@@ -1651,22 +1897,11 @@ var GladiaAdapter = class extends BaseAdapter {
1651
1897
  * Extract speaker information from Gladia response
1652
1898
  */
1653
1899
  extractSpeakers(transcription) {
1654
- if (!transcription?.utterances) {
1655
- return void 0;
1656
- }
1657
- const speakerSet = /* @__PURE__ */ new Set();
1658
- transcription.utterances.forEach((utterance) => {
1659
- if (utterance.speaker !== void 0) {
1660
- speakerSet.add(utterance.speaker);
1661
- }
1662
- });
1663
- if (speakerSet.size === 0) {
1664
- return void 0;
1665
- }
1666
- return Array.from(speakerSet).map((speakerId) => ({
1667
- id: speakerId.toString(),
1668
- label: `Speaker ${speakerId}`
1669
- }));
1900
+ return extractSpeakersFromUtterances(
1901
+ transcription?.utterances,
1902
+ (utterance) => utterance.speaker,
1903
+ (id) => `Speaker ${id}`
1904
+ );
1670
1905
  }
1671
1906
  /**
1672
1907
  * Extract word timestamps from Gladia response
@@ -1677,14 +1912,17 @@ var GladiaAdapter = class extends BaseAdapter {
1677
1912
  }
1678
1913
  const allWords = transcription.utterances.flatMap(
1679
1914
  (utterance) => utterance.words.map((word) => ({
1680
- text: word.word,
1681
- start: word.start,
1682
- end: word.end,
1683
- confidence: word.confidence,
1684
- speaker: utterance.speaker?.toString()
1915
+ word,
1916
+ speaker: utterance.speaker
1685
1917
  }))
1686
1918
  );
1687
- return allWords.length > 0 ? allWords : void 0;
1919
+ return extractWords(allWords, (item) => ({
1920
+ text: item.word.word,
1921
+ start: item.word.start,
1922
+ end: item.word.end,
1923
+ confidence: item.word.confidence,
1924
+ speaker: item.speaker?.toString()
1925
+ }));
1688
1926
  }
1689
1927
  /**
1690
1928
  * Extract utterances from Gladia response
@@ -1710,38 +1948,6 @@ var GladiaAdapter = class extends BaseAdapter {
1710
1948
  /**
1711
1949
  * Poll for transcription completion
1712
1950
  */
1713
- async pollForCompletion(jobId, maxAttempts = 60, intervalMs = 2e3) {
1714
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
1715
- const result = await this.getTranscript(jobId);
1716
- if (!result.success) {
1717
- return result;
1718
- }
1719
- const status = result.data?.status;
1720
- if (status === "completed") {
1721
- return result;
1722
- }
1723
- if (status === "error") {
1724
- return {
1725
- success: false,
1726
- provider: this.name,
1727
- error: {
1728
- code: "TRANSCRIPTION_ERROR",
1729
- message: "Transcription failed"
1730
- },
1731
- raw: result.raw
1732
- };
1733
- }
1734
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
1735
- }
1736
- return {
1737
- success: false,
1738
- provider: this.name,
1739
- error: {
1740
- code: "POLLING_TIMEOUT",
1741
- message: `Transcription did not complete after ${maxAttempts} attempts`
1742
- }
1743
- };
1744
- }
1745
1951
  /**
1746
1952
  * Stream audio for real-time transcription
1747
1953
  *
@@ -1785,14 +1991,12 @@ var GladiaAdapter = class extends BaseAdapter {
1785
1991
  this.validateConfig();
1786
1992
  let validatedSampleRate;
1787
1993
  if (options?.sampleRate) {
1788
- const validRates = Object.values(StreamingSupportedSampleRateEnum);
1789
- const isValidRate = validRates.some((rate) => rate === options.sampleRate);
1790
- if (!isValidRate) {
1791
- throw new Error(
1792
- `Gladia does not support sample rate ${options.sampleRate} Hz. Supported rates (from OpenAPI spec): ${validRates.join(", ")} Hz`
1793
- );
1794
- }
1795
- validatedSampleRate = options.sampleRate;
1994
+ validatedSampleRate = validateEnumValue(
1995
+ options.sampleRate,
1996
+ StreamingSupportedSampleRateEnum,
1997
+ "sample rate",
1998
+ "Gladia"
1999
+ );
1796
2000
  }
1797
2001
  const streamingRequest = {
1798
2002
  encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
@@ -1814,9 +2018,8 @@ var GladiaAdapter = class extends BaseAdapter {
1814
2018
  const { id, url: wsUrl } = initResponse.data;
1815
2019
  const ws = new WebSocket(wsUrl);
1816
2020
  let sessionStatus = "connecting";
1817
- ws.on("open", () => {
1818
- sessionStatus = "open";
1819
- callbacks?.onOpen?.();
2021
+ setupWebSocketHandlers(ws, callbacks, (status) => {
2022
+ sessionStatus = status;
1820
2023
  });
1821
2024
  ws.on("message", (data) => {
1822
2025
  try {
@@ -1861,48 +2064,20 @@ var GladiaAdapter = class extends BaseAdapter {
1861
2064
  }
1862
2065
  } catch (error) {
1863
2066
  callbacks?.onError?.({
1864
- code: "PARSE_ERROR",
2067
+ code: ERROR_CODES.PARSE_ERROR,
1865
2068
  message: "Failed to parse WebSocket message",
1866
2069
  details: error
1867
2070
  });
1868
2071
  }
1869
2072
  });
1870
- ws.on("error", (error) => {
1871
- callbacks?.onError?.({
1872
- code: "WEBSOCKET_ERROR",
1873
- message: error.message,
1874
- details: error
1875
- });
1876
- });
1877
- ws.on("close", (code, reason) => {
1878
- sessionStatus = "closed";
1879
- callbacks?.onClose?.(code, reason.toString());
1880
- });
1881
- await new Promise((resolve, reject) => {
1882
- const timeout = setTimeout(() => {
1883
- reject(new Error("WebSocket connection timeout"));
1884
- }, 1e4);
1885
- ws.once("open", () => {
1886
- clearTimeout(timeout);
1887
- resolve();
1888
- });
1889
- ws.once("error", (error) => {
1890
- clearTimeout(timeout);
1891
- reject(error);
1892
- });
1893
- });
2073
+ await waitForWebSocketOpen(ws);
1894
2074
  return {
1895
2075
  id,
1896
2076
  provider: this.name,
1897
2077
  createdAt: /* @__PURE__ */ new Date(),
1898
2078
  getStatus: () => sessionStatus,
1899
2079
  sendAudio: async (chunk) => {
1900
- if (sessionStatus !== "open") {
1901
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
1902
- }
1903
- if (ws.readyState !== WebSocket.OPEN) {
1904
- throw new Error("WebSocket is not open");
1905
- }
2080
+ validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
1906
2081
  ws.send(chunk.data);
1907
2082
  if (chunk.isLast) {
1908
2083
  ws.send(
@@ -1924,18 +2099,8 @@ var GladiaAdapter = class extends BaseAdapter {
1924
2099
  })
1925
2100
  );
1926
2101
  }
1927
- return new Promise((resolve) => {
1928
- const timeout = setTimeout(() => {
1929
- ws.terminate();
1930
- resolve();
1931
- }, 5e3);
1932
- ws.close();
1933
- ws.once("close", () => {
1934
- clearTimeout(timeout);
1935
- sessionStatus = "closed";
1936
- resolve();
1937
- });
1938
- });
2102
+ await closeWebSocket(ws);
2103
+ sessionStatus = "closed";
1939
2104
  }
1940
2105
  };
1941
2106
  }
@@ -2289,26 +2454,16 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2289
2454
  entityDetection: true,
2290
2455
  piiRedaction: true
2291
2456
  };
2292
- this.baseUrl = "https://api.assemblyai.com/v2";
2457
+ this.baseUrl = "https://api.assemblyai.com";
2458
+ // Generated functions already include /v2 path
2293
2459
  this.wsBaseUrl = "wss://api.assemblyai.com/v2/realtime/ws";
2294
2460
  }
2295
2461
  /**
2296
2462
  * Get axios config for generated API client functions
2297
- * Configures headers and base URL
2463
+ * Configures headers and base URL using authorization header
2298
2464
  */
2299
2465
  getAxiosConfig() {
2300
- if (!this.config) {
2301
- throw new Error("Adapter not initialized. Call initialize() first.");
2302
- }
2303
- return {
2304
- baseURL: this.config.baseUrl || this.baseUrl,
2305
- timeout: this.config.timeout || 6e4,
2306
- headers: {
2307
- authorization: this.config.apiKey,
2308
- "Content-Type": "application/json",
2309
- ...this.config.headers
2310
- }
2311
- };
2466
+ return super.getAxiosConfig("authorization");
2312
2467
  }
2313
2468
  /**
2314
2469
  * Submit audio for transcription
@@ -2586,41 +2741,6 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2586
2741
  }))
2587
2742
  }));
2588
2743
  }
2589
- /**
2590
- * Poll for transcription completion
2591
- */
2592
- async pollForCompletion(transcriptId, maxAttempts = 60, intervalMs = 3e3) {
2593
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
2594
- const result = await this.getTranscript(transcriptId);
2595
- if (!result.success) {
2596
- return result;
2597
- }
2598
- const status = result.data?.status;
2599
- if (status === "completed") {
2600
- return result;
2601
- }
2602
- if (status === "error") {
2603
- return {
2604
- success: false,
2605
- provider: this.name,
2606
- error: {
2607
- code: "TRANSCRIPTION_ERROR",
2608
- message: "Transcription failed"
2609
- },
2610
- raw: result.raw
2611
- };
2612
- }
2613
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
2614
- }
2615
- return {
2616
- success: false,
2617
- provider: this.name,
2618
- error: {
2619
- code: "POLLING_TIMEOUT",
2620
- message: `Transcription did not complete after ${maxAttempts} attempts`
2621
- }
2622
- };
2623
- }
2624
2744
  /**
2625
2745
  * Stream audio for real-time transcription
2626
2746
  *
@@ -3262,7 +3382,24 @@ function createDeepgramAdapter(config) {
3262
3382
  }
3263
3383
 
3264
3384
  // src/adapters/azure-stt-adapter.ts
3385
+ import axios5 from "axios";
3386
+
3387
+ // src/generated/azure/api/speechServicesAPIV31.ts
3265
3388
  import axios4 from "axios";
3389
+ var transcriptionsCreate = (transcription, options) => {
3390
+ return axios4.post("/transcriptions", transcription, options);
3391
+ };
3392
+ var transcriptionsGet = (id, options) => {
3393
+ return axios4.get(`/transcriptions/${id}`, options);
3394
+ };
3395
+ var transcriptionsListFiles = (id, params, options) => {
3396
+ return axios4.get(`/transcriptions/${id}/files`, {
3397
+ ...options,
3398
+ params: { ...params, ...options?.params }
3399
+ });
3400
+ };
3401
+
3402
+ // src/adapters/azure-stt-adapter.ts
3266
3403
  var AzureSTTAdapter = class extends BaseAdapter {
3267
3404
  constructor() {
3268
3405
  super(...arguments);
@@ -3279,20 +3416,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
3279
3416
  entityDetection: false,
3280
3417
  piiRedaction: false
3281
3418
  };
3419
+ this.baseUrl = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.1";
3282
3420
  }
3421
+ // Default, overridden in initialize()
3283
3422
  initialize(config) {
3284
3423
  super.initialize(config);
3285
3424
  this.region = config.region || "eastus";
3286
3425
  this.baseUrl = config.baseUrl || `https://${this.region}.api.cognitive.microsoft.com/speechtotext/v3.1`;
3287
- this.client = axios4.create({
3288
- baseURL: this.baseUrl,
3289
- timeout: config.timeout || 6e4,
3290
- headers: {
3291
- "Ocp-Apim-Subscription-Key": config.apiKey,
3292
- "Content-Type": "application/json",
3293
- ...config.headers
3294
- }
3295
- });
3426
+ }
3427
+ /**
3428
+ * Get axios config for generated API client functions
3429
+ * Configures headers and base URL using Azure subscription key
3430
+ */
3431
+ getAxiosConfig() {
3432
+ return super.getAxiosConfig("Ocp-Apim-Subscription-Key");
3296
3433
  }
3297
3434
  /**
3298
3435
  * Submit audio for transcription
@@ -3324,9 +3461,9 @@ var AzureSTTAdapter = class extends BaseAdapter {
3324
3461
  contentUrls: [audio.url],
3325
3462
  properties: this.buildTranscriptionProperties(options)
3326
3463
  };
3327
- const response = await this.client.post(
3328
- "/transcriptions",
3329
- transcriptionRequest
3464
+ const response = await transcriptionsCreate(
3465
+ transcriptionRequest,
3466
+ this.getAxiosConfig()
3330
3467
  );
3331
3468
  const transcription = response.data;
3332
3469
  return {
@@ -3357,9 +3494,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3357
3494
  async getTranscript(transcriptId) {
3358
3495
  this.validateConfig();
3359
3496
  try {
3360
- const statusResponse = await this.client.get(
3361
- `/transcriptions/${transcriptId}`
3362
- );
3497
+ const statusResponse = await transcriptionsGet(transcriptId, this.getAxiosConfig());
3363
3498
  const transcription = statusResponse.data;
3364
3499
  const status = this.normalizeStatus(transcription.status);
3365
3500
  if (status !== "completed") {
@@ -3387,7 +3522,11 @@ var AzureSTTAdapter = class extends BaseAdapter {
3387
3522
  raw: transcription
3388
3523
  };
3389
3524
  }
3390
- const filesResponse = await this.client.get(transcription.links.files);
3525
+ const filesResponse = await transcriptionsListFiles(
3526
+ transcriptId,
3527
+ void 0,
3528
+ this.getAxiosConfig()
3529
+ );
3391
3530
  const files = filesResponse.data?.values || [];
3392
3531
  const resultFile = files.find((file) => file.kind === "Transcription");
3393
3532
  if (!resultFile?.links?.contentUrl) {
@@ -3401,7 +3540,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3401
3540
  raw: transcription
3402
3541
  };
3403
3542
  }
3404
- const contentResponse = await axios4.get(resultFile.links.contentUrl);
3543
+ const contentResponse = await axios5.get(resultFile.links.contentUrl);
3405
3544
  const transcriptionData = contentResponse.data;
3406
3545
  return this.normalizeResponse(transcription, transcriptionData);
3407
3546
  } catch (error) {
@@ -3500,7 +3639,57 @@ function createAzureSTTAdapter(config) {
3500
3639
  }
3501
3640
 
3502
3641
  // src/adapters/openai-whisper-adapter.ts
3503
- import axios5 from "axios";
3642
+ import axios7 from "axios";
3643
+
3644
+ // src/generated/openai/api/openAIAPI.ts
3645
+ import axios6 from "axios";
3646
+ var createTranscription = (createTranscriptionRequest, options) => {
3647
+ const formData = new FormData();
3648
+ formData.append("file", createTranscriptionRequest.file);
3649
+ formData.append("model", createTranscriptionRequest.model);
3650
+ if (createTranscriptionRequest.language !== void 0) {
3651
+ formData.append("language", createTranscriptionRequest.language);
3652
+ }
3653
+ if (createTranscriptionRequest.prompt !== void 0) {
3654
+ formData.append("prompt", createTranscriptionRequest.prompt);
3655
+ }
3656
+ if (createTranscriptionRequest.response_format !== void 0) {
3657
+ formData.append("response_format", createTranscriptionRequest.response_format);
3658
+ }
3659
+ if (createTranscriptionRequest.temperature !== void 0) {
3660
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
3661
+ }
3662
+ if (createTranscriptionRequest.include !== void 0) {
3663
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
3664
+ }
3665
+ if (createTranscriptionRequest.timestamp_granularities !== void 0) {
3666
+ createTranscriptionRequest.timestamp_granularities.forEach(
3667
+ (value) => formData.append("timestamp_granularities", value)
3668
+ );
3669
+ }
3670
+ if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
3671
+ formData.append("stream", createTranscriptionRequest.stream.toString());
3672
+ }
3673
+ if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
3674
+ formData.append(
3675
+ "chunking_strategy",
3676
+ typeof createTranscriptionRequest.chunking_strategy === "object" ? JSON.stringify(createTranscriptionRequest.chunking_strategy) : createTranscriptionRequest.chunking_strategy
3677
+ );
3678
+ }
3679
+ if (createTranscriptionRequest.known_speaker_names !== void 0) {
3680
+ createTranscriptionRequest.known_speaker_names.forEach(
3681
+ (value) => formData.append("known_speaker_names", value)
3682
+ );
3683
+ }
3684
+ if (createTranscriptionRequest.known_speaker_references !== void 0) {
3685
+ createTranscriptionRequest.known_speaker_references.forEach(
3686
+ (value) => formData.append("known_speaker_references", value)
3687
+ );
3688
+ }
3689
+ return axios6.post("/audio/transcriptions", formData, options);
3690
+ };
3691
+
3692
+ // src/adapters/openai-whisper-adapter.ts
3504
3693
  var OpenAIWhisperAdapter = class extends BaseAdapter {
3505
3694
  constructor() {
3506
3695
  super(...arguments);
@@ -3522,19 +3711,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3522
3711
  };
3523
3712
  this.baseUrl = "https://api.openai.com/v1";
3524
3713
  }
3525
- initialize(config) {
3526
- super.initialize(config);
3527
- this.baseUrl = config.baseUrl || this.baseUrl;
3528
- this.client = axios5.create({
3529
- baseURL: this.baseUrl,
3530
- timeout: config.timeout || 12e4,
3531
- // 2 minutes default (audio processing can take time)
3532
- headers: {
3533
- Authorization: `Bearer ${config.apiKey}`,
3534
- "Content-Type": "multipart/form-data",
3535
- ...config.headers
3536
- }
3537
- });
3714
+ /**
3715
+ * Get axios config for generated API client functions
3716
+ * Configures headers and base URL using Bearer token authorization
3717
+ */
3718
+ getAxiosConfig() {
3719
+ return super.getAxiosConfig("Authorization", (apiKey) => `Bearer ${apiKey}`);
3538
3720
  }
3539
3721
  /**
3540
3722
  * Submit audio for transcription
@@ -3556,7 +3738,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3556
3738
  let audioData;
3557
3739
  let fileName = "audio.mp3";
3558
3740
  if (audio.type === "url") {
3559
- const response2 = await axios5.get(audio.url, {
3741
+ const response2 = await axios7.get(audio.url, {
3560
3742
  responseType: "arraybuffer"
3561
3743
  });
3562
3744
  audioData = Buffer.from(response2.data);
@@ -3581,40 +3763,37 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3581
3763
  const model = this.selectModel(options);
3582
3764
  const isDiarization = model === "gpt-4o-transcribe-diarize";
3583
3765
  const needsWords = options?.wordTimestamps === true;
3584
- const requestBody = {
3766
+ const request = {
3585
3767
  file: audioData,
3768
+ // Generated type expects Blob
3586
3769
  model
3587
3770
  };
3588
3771
  if (options?.language) {
3589
- requestBody.language = options.language;
3772
+ request.language = options.language;
3590
3773
  }
3591
3774
  if (options?.metadata?.prompt) {
3592
- requestBody.prompt = options.metadata.prompt;
3775
+ request.prompt = options.metadata.prompt;
3593
3776
  }
3594
3777
  if (options?.metadata?.temperature !== void 0) {
3595
- requestBody.temperature = options.metadata.temperature;
3778
+ request.temperature = options.metadata.temperature;
3596
3779
  }
3597
3780
  if (isDiarization) {
3598
- requestBody.response_format = "diarized_json";
3781
+ request.response_format = "diarized_json";
3599
3782
  if (options?.metadata?.knownSpeakerNames) {
3600
- requestBody["known_speaker_names"] = options.metadata.knownSpeakerNames;
3783
+ request.known_speaker_names = options.metadata.knownSpeakerNames;
3601
3784
  }
3602
3785
  if (options?.metadata?.knownSpeakerReferences) {
3603
- requestBody["known_speaker_references"] = options.metadata.knownSpeakerReferences;
3786
+ request.known_speaker_references = options.metadata.knownSpeakerReferences;
3604
3787
  }
3605
3788
  } else if (needsWords || options?.diarization) {
3606
- requestBody.response_format = "verbose_json";
3789
+ request.response_format = "verbose_json";
3607
3790
  if (needsWords) {
3608
- requestBody.timestamp_granularities = ["word", "segment"];
3791
+ request.timestamp_granularities = ["word", "segment"];
3609
3792
  }
3610
3793
  } else {
3611
- requestBody.response_format = "json";
3794
+ request.response_format = "json";
3612
3795
  }
3613
- const response = await this.client.post("/audio/transcriptions", requestBody, {
3614
- headers: {
3615
- "Content-Type": "multipart/form-data"
3616
- }
3617
- });
3796
+ const response = await createTranscription(request, this.getAxiosConfig());
3618
3797
  return this.normalizeResponse(response.data, model, isDiarization);
3619
3798
  } catch (error) {
3620
3799
  return this.createErrorResponse(error);
@@ -3735,7 +3914,7 @@ function createOpenAIWhisperAdapter(config) {
3735
3914
  }
3736
3915
 
3737
3916
  // src/adapters/speechmatics-adapter.ts
3738
- import axios6 from "axios";
3917
+ import axios8 from "axios";
3739
3918
  var SpeechmaticsAdapter = class extends BaseAdapter {
3740
3919
  constructor() {
3741
3920
  super(...arguments);
@@ -3757,7 +3936,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
3757
3936
  initialize(config) {
3758
3937
  super.initialize(config);
3759
3938
  this.baseUrl = config.baseUrl || this.baseUrl;
3760
- this.client = axios6.create({
3939
+ this.client = axios8.create({
3761
3940
  baseURL: this.baseUrl,
3762
3941
  timeout: config.timeout || 12e4,
3763
3942
  headers: {