voice-router-dev 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -202,39 +202,7 @@ var VoiceRouter = class {
202
202
  const adapter = this.getAdapter(provider);
203
203
  return adapter.getTranscript(transcriptId);
204
204
  }
205
- /**
206
- * Stream audio for real-time transcription
207
- * Only works with providers that support streaming
208
- *
209
- * @param options - Streaming options including provider selection
210
- * @param callbacks - Event callbacks for transcription results
211
- * @returns Promise that resolves with a StreamingSession
212
- *
213
- * @example
214
- * ```typescript
215
- * import { VoiceRouter } from '@meeting-baas/sdk';
216
- *
217
- * const router = new VoiceRouter();
218
- * router.initialize({
219
- * gladia: { apiKey: process.env.GLADIA_KEY },
220
- * deepgram: { apiKey: process.env.DEEPGRAM_KEY }
221
- * });
222
- *
223
- * const session = await router.transcribeStream({
224
- * provider: 'deepgram',
225
- * encoding: 'linear16',
226
- * sampleRate: 16000,
227
- * language: 'en'
228
- * }, {
229
- * onTranscript: (event) => console.log(event.text),
230
- * onError: (error) => console.error(error)
231
- * });
232
- *
233
- * // Send audio chunks
234
- * await session.sendAudio({ data: audioBuffer });
235
- * await session.close();
236
- * ```
237
- */
205
+ // Implementation
238
206
  async transcribeStream(options, callbacks) {
239
207
  const provider = this.selectProvider(options?.provider);
240
208
  const adapter = this.getAdapter(provider);
@@ -300,24 +268,102 @@ function createVoiceRouter(config, adapters) {
300
268
  return router;
301
269
  }
302
270
 
271
+ // src/constants/defaults.ts
272
+ var DEFAULT_TIMEOUTS = {
273
+ /** Standard HTTP request timeout for API calls (60 seconds) */
274
+ HTTP_REQUEST: 6e4,
275
+ /** Audio processing timeout for long audio files (120 seconds) */
276
+ AUDIO_PROCESSING: 12e4,
277
+ /** WebSocket connection establishment timeout (10 seconds) */
278
+ WS_CONNECTION: 1e4,
279
+ /** WebSocket graceful close timeout (5 seconds) */
280
+ WS_CLOSE: 5e3
281
+ };
282
+ var DEFAULT_POLLING = {
283
+ /** Maximum number of polling attempts before timing out */
284
+ MAX_ATTEMPTS: 60,
285
+ /** Standard interval between polling attempts (2 seconds) */
286
+ INTERVAL_MS: 2e3,
287
+ /** Slower interval for long-running jobs (3 seconds) */
288
+ SLOW_INTERVAL_MS: 3e3
289
+ };
290
+
291
+ // src/utils/errors.ts
292
+ var ERROR_CODES = {
293
+ /** Failed to parse API response or WebSocket message */
294
+ PARSE_ERROR: "PARSE_ERROR",
295
+ /** WebSocket connection error */
296
+ WEBSOCKET_ERROR: "WEBSOCKET_ERROR",
297
+ /** Async transcription job did not complete within timeout */
298
+ POLLING_TIMEOUT: "POLLING_TIMEOUT",
299
+ /** Transcription processing failed on provider side */
300
+ TRANSCRIPTION_ERROR: "TRANSCRIPTION_ERROR",
301
+ /** Connection attempt timed out */
302
+ CONNECTION_TIMEOUT: "CONNECTION_TIMEOUT",
303
+ /** Invalid input provided to API */
304
+ INVALID_INPUT: "INVALID_INPUT",
305
+ /** Requested operation not supported by provider */
306
+ NOT_SUPPORTED: "NOT_SUPPORTED",
307
+ /** No transcription results available */
308
+ NO_RESULTS: "NO_RESULTS",
309
+ /** Unspecified or unknown error */
310
+ UNKNOWN_ERROR: "UNKNOWN_ERROR"
311
+ };
312
+ var ERROR_MESSAGES = {
313
+ PARSE_ERROR: "Failed to parse response data",
314
+ WEBSOCKET_ERROR: "WebSocket connection error",
315
+ POLLING_TIMEOUT: "Transcription did not complete within timeout period",
316
+ TRANSCRIPTION_ERROR: "Transcription processing failed",
317
+ CONNECTION_TIMEOUT: "Connection attempt timed out",
318
+ INVALID_INPUT: "Invalid input provided",
319
+ NOT_SUPPORTED: "Operation not supported by this provider",
320
+ NO_RESULTS: "No transcription results available",
321
+ UNKNOWN_ERROR: "An unknown error occurred"
322
+ };
323
+ function createError(code, customMessage, details) {
324
+ return {
325
+ code,
326
+ message: customMessage || ERROR_MESSAGES[code],
327
+ details
328
+ };
329
+ }
330
+
303
331
  // src/adapters/base-adapter.ts
304
332
  var BaseAdapter = class {
305
333
  initialize(config) {
306
334
  this.config = config;
307
335
  }
308
336
  /**
309
- * Helper method to create error responses
337
+ * Helper method to create error responses with stack traces
338
+ *
339
+ * @param error - Error object or unknown error
340
+ * @param statusCode - Optional HTTP status code
341
+ * @param code - Optional error code (defaults to extracted or UNKNOWN_ERROR)
310
342
  */
311
- createErrorResponse(error, statusCode) {
343
+ createErrorResponse(error, statusCode, code) {
312
344
  const err = error;
345
+ const httpStatus = statusCode || err.statusCode || err.response?.status;
346
+ const httpStatusText = err.response?.statusText;
347
+ const responseData = err.response?.data;
313
348
  return {
314
349
  success: false,
315
350
  provider: this.name,
316
351
  error: {
317
- code: err.code || "UNKNOWN_ERROR",
352
+ code: code || err.code || ERROR_CODES.UNKNOWN_ERROR,
318
353
  message: err.message || "An unknown error occurred",
319
- statusCode: statusCode || err.statusCode,
320
- details: error
354
+ statusCode: httpStatus,
355
+ details: {
356
+ // Include full error object
357
+ error,
358
+ // Include stack trace if available
359
+ stack: err.stack,
360
+ // Include HTTP response details
361
+ httpStatus,
362
+ httpStatusText,
363
+ responseData,
364
+ // Include provider name for debugging
365
+ provider: this.name
366
+ }
321
367
  }
322
368
  };
323
369
  }
@@ -332,6 +378,64 @@ var BaseAdapter = class {
332
378
  throw new Error(`API key is required for ${this.name} provider`);
333
379
  }
334
380
  }
381
+ /**
382
+ * Build axios config for generated API client functions
383
+ *
384
+ * @param authHeaderName - Header name for API key (e.g., "Authorization", "x-gladia-key")
385
+ * @param authHeaderValue - Optional function to format auth header value (defaults to raw API key)
386
+ * @returns Axios config object
387
+ */
388
+ getAxiosConfig(authHeaderName = "Authorization", authHeaderValue) {
389
+ this.validateConfig();
390
+ const authValue = authHeaderValue ? authHeaderValue(this.config.apiKey) : this.config.apiKey;
391
+ return {
392
+ baseURL: this.config.baseUrl || this.baseUrl,
393
+ timeout: this.config.timeout || DEFAULT_TIMEOUTS.HTTP_REQUEST,
394
+ headers: {
395
+ [authHeaderName]: authValue,
396
+ "Content-Type": "application/json",
397
+ ...this.config.headers
398
+ }
399
+ };
400
+ }
401
+ /**
402
+ * Generic polling helper for async transcription jobs
403
+ *
404
+ * Polls getTranscript() until job completes or times out.
405
+ *
406
+ * @param transcriptId - Job/transcript ID to poll
407
+ * @param options - Polling configuration
408
+ * @returns Final transcription result
409
+ */
410
+ async pollForCompletion(transcriptId, options) {
411
+ const { maxAttempts = DEFAULT_POLLING.MAX_ATTEMPTS, intervalMs = DEFAULT_POLLING.INTERVAL_MS } = options || {};
412
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
413
+ const result = await this.getTranscript(transcriptId);
414
+ if (!result.success) {
415
+ return result;
416
+ }
417
+ const status = result.data?.status;
418
+ if (status === "completed") {
419
+ return result;
420
+ }
421
+ if (status === "error") {
422
+ return this.createErrorResponse(
423
+ new Error("Transcription failed"),
424
+ void 0,
425
+ ERROR_CODES.TRANSCRIPTION_ERROR
426
+ );
427
+ }
428
+ await new Promise((resolve) => setTimeout(resolve, intervalMs));
429
+ }
430
+ return {
431
+ success: false,
432
+ provider: this.name,
433
+ error: {
434
+ code: ERROR_CODES.POLLING_TIMEOUT,
435
+ message: `Transcription did not complete after ${maxAttempts} attempts`
436
+ }
437
+ };
438
+ }
335
439
  };
336
440
 
337
441
  // src/adapters/gladia-adapter.ts
@@ -378,6 +482,143 @@ function mapEncodingToProvider(unifiedEncoding, provider) {
378
482
  return providerEncoding;
379
483
  }
380
484
 
485
+ // src/utils/websocket-helpers.ts
486
+ function waitForWebSocketOpen(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CONNECTION) {
487
+ return new Promise((resolve, reject) => {
488
+ const timeout = setTimeout(() => {
489
+ reject(new Error("WebSocket connection timeout"));
490
+ }, timeoutMs);
491
+ ws.once("open", () => {
492
+ clearTimeout(timeout);
493
+ resolve();
494
+ });
495
+ ws.once("error", (error) => {
496
+ clearTimeout(timeout);
497
+ reject(error);
498
+ });
499
+ });
500
+ }
501
+ function closeWebSocket(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CLOSE) {
502
+ return new Promise((resolve) => {
503
+ const timeout = setTimeout(() => {
504
+ ws.terminate();
505
+ resolve();
506
+ }, timeoutMs);
507
+ ws.close();
508
+ ws.once("close", () => {
509
+ clearTimeout(timeout);
510
+ resolve();
511
+ });
512
+ });
513
+ }
514
+ function setupWebSocketHandlers(ws, callbacks, setSessionStatus) {
515
+ ws.on("open", () => {
516
+ setSessionStatus("open");
517
+ callbacks?.onOpen?.();
518
+ });
519
+ ws.on("error", (error) => {
520
+ callbacks?.onError?.(createError(ERROR_CODES.WEBSOCKET_ERROR, error.message, error));
521
+ });
522
+ ws.on("close", (code, reason) => {
523
+ setSessionStatus("closed");
524
+ callbacks?.onClose?.(code, reason.toString());
525
+ });
526
+ }
527
+ function validateSessionForAudio(sessionStatus, wsReadyState, WebSocketOpen) {
528
+ if (sessionStatus !== "open") {
529
+ throw new Error(`Cannot send audio: session is ${sessionStatus}`);
530
+ }
531
+ if (wsReadyState !== WebSocketOpen) {
532
+ throw new Error("WebSocket is not open");
533
+ }
534
+ }
535
+
536
+ // src/utils/validation.ts
537
+ function validateEnumValue(value, enumType, fieldName, provider) {
538
+ const validValues = Object.values(enumType);
539
+ const isValid = validValues.some((v) => v === value);
540
+ if (!isValid) {
541
+ throw new Error(
542
+ `${provider} does not support ${fieldName} '${value}'. Supported values (from OpenAPI spec): ${validValues.join(", ")}`
543
+ );
544
+ }
545
+ return value;
546
+ }
547
+
548
+ // src/utils/transcription-helpers.ts
549
+ function extractSpeakersFromUtterances(utterances, getSpeakerId, formatLabel) {
550
+ if (!utterances || utterances.length === 0) {
551
+ return void 0;
552
+ }
553
+ const speakerSet = /* @__PURE__ */ new Set();
554
+ utterances.forEach((utterance) => {
555
+ const speakerId = getSpeakerId(utterance);
556
+ if (speakerId !== void 0) {
557
+ speakerSet.add(String(speakerId));
558
+ }
559
+ });
560
+ if (speakerSet.size === 0) {
561
+ return void 0;
562
+ }
563
+ return Array.from(speakerSet).map((speakerId) => ({
564
+ id: speakerId,
565
+ label: formatLabel ? formatLabel(speakerId) : `Speaker ${speakerId}`
566
+ }));
567
+ }
568
+ function extractWords(words, mapper) {
569
+ if (!words || words.length === 0) {
570
+ return void 0;
571
+ }
572
+ const normalizedWords = words.map(mapper);
573
+ return normalizedWords.length > 0 ? normalizedWords : void 0;
574
+ }
575
+ var STATUS_MAPPINGS = {
576
+ gladia: {
577
+ queued: "queued",
578
+ processing: "processing",
579
+ done: "completed",
580
+ error: "error"
581
+ },
582
+ assemblyai: {
583
+ queued: "queued",
584
+ processing: "processing",
585
+ completed: "completed",
586
+ error: "error"
587
+ },
588
+ deepgram: {
589
+ queued: "queued",
590
+ processing: "processing",
591
+ completed: "completed",
592
+ error: "error"
593
+ },
594
+ azure: {
595
+ succeeded: "completed",
596
+ running: "processing",
597
+ notstarted: "queued",
598
+ failed: "error"
599
+ },
600
+ speechmatics: {
601
+ running: "processing",
602
+ done: "completed",
603
+ rejected: "error",
604
+ expired: "error"
605
+ }
606
+ };
607
+ function normalizeStatus(providerStatus, provider, defaultStatus = "queued") {
608
+ if (!providerStatus) return defaultStatus;
609
+ const mapping = STATUS_MAPPINGS[provider];
610
+ const statusKey = providerStatus.toString().toLowerCase();
611
+ if (statusKey in mapping) {
612
+ return mapping[statusKey];
613
+ }
614
+ for (const [key, value] of Object.entries(mapping)) {
615
+ if (statusKey.includes(key)) {
616
+ return value;
617
+ }
618
+ }
619
+ return defaultStatus;
620
+ }
621
+
381
622
  // src/generated/gladia/api/gladiaControlAPI.ts
382
623
  var import_axios = __toESM(require("axios"));
383
624
 
@@ -1498,21 +1739,10 @@ var GladiaAdapter = class extends BaseAdapter {
1498
1739
  }
1499
1740
  /**
1500
1741
  * Get axios config for generated API client functions
1501
- * Configures headers and base URL
1742
+ * Configures headers and base URL using Gladia's x-gladia-key header
1502
1743
  */
1503
1744
  getAxiosConfig() {
1504
- if (!this.config) {
1505
- throw new Error("Adapter not initialized. Call initialize() first.");
1506
- }
1507
- return {
1508
- baseURL: this.config.baseUrl || this.baseUrl,
1509
- timeout: this.config.timeout || 6e4,
1510
- headers: {
1511
- "x-gladia-key": this.config.apiKey,
1512
- "Content-Type": "application/json",
1513
- ...this.config.headers
1514
- }
1515
- };
1745
+ return super.getAxiosConfig("x-gladia-key");
1516
1746
  }
1517
1747
  /**
1518
1748
  * Submit audio for transcription
@@ -1679,29 +1909,13 @@ var GladiaAdapter = class extends BaseAdapter {
1679
1909
  * Normalize Gladia response to unified format
1680
1910
  */
1681
1911
  normalizeResponse(response) {
1682
- let status;
1683
- switch (response.status) {
1684
- case "queued":
1685
- status = "queued";
1686
- break;
1687
- case "processing":
1688
- status = "processing";
1689
- break;
1690
- case "done":
1691
- status = "completed";
1692
- break;
1693
- case "error":
1694
- status = "error";
1695
- break;
1696
- default:
1697
- status = "queued";
1698
- }
1912
+ const status = normalizeStatus(response.status, "gladia");
1699
1913
  if (response.status === "error") {
1700
1914
  return {
1701
1915
  success: false,
1702
1916
  provider: this.name,
1703
1917
  error: {
1704
- code: response.error_code?.toString() || "TRANSCRIPTION_ERROR",
1918
+ code: response.error_code?.toString() || ERROR_CODES.TRANSCRIPTION_ERROR,
1705
1919
  message: "Transcription failed",
1706
1920
  statusCode: response.error_code || void 0
1707
1921
  },
@@ -1741,22 +1955,11 @@ var GladiaAdapter = class extends BaseAdapter {
1741
1955
  * Extract speaker information from Gladia response
1742
1956
  */
1743
1957
  extractSpeakers(transcription) {
1744
- if (!transcription?.utterances) {
1745
- return void 0;
1746
- }
1747
- const speakerSet = /* @__PURE__ */ new Set();
1748
- transcription.utterances.forEach((utterance) => {
1749
- if (utterance.speaker !== void 0) {
1750
- speakerSet.add(utterance.speaker);
1751
- }
1752
- });
1753
- if (speakerSet.size === 0) {
1754
- return void 0;
1755
- }
1756
- return Array.from(speakerSet).map((speakerId) => ({
1757
- id: speakerId.toString(),
1758
- label: `Speaker ${speakerId}`
1759
- }));
1958
+ return extractSpeakersFromUtterances(
1959
+ transcription?.utterances,
1960
+ (utterance) => utterance.speaker,
1961
+ (id) => `Speaker ${id}`
1962
+ );
1760
1963
  }
1761
1964
  /**
1762
1965
  * Extract word timestamps from Gladia response
@@ -1767,14 +1970,17 @@ var GladiaAdapter = class extends BaseAdapter {
1767
1970
  }
1768
1971
  const allWords = transcription.utterances.flatMap(
1769
1972
  (utterance) => utterance.words.map((word) => ({
1770
- text: word.word,
1771
- start: word.start,
1772
- end: word.end,
1773
- confidence: word.confidence,
1774
- speaker: utterance.speaker?.toString()
1973
+ word,
1974
+ speaker: utterance.speaker
1775
1975
  }))
1776
1976
  );
1777
- return allWords.length > 0 ? allWords : void 0;
1977
+ return extractWords(allWords, (item) => ({
1978
+ text: item.word.word,
1979
+ start: item.word.start,
1980
+ end: item.word.end,
1981
+ confidence: item.word.confidence,
1982
+ speaker: item.speaker?.toString()
1983
+ }));
1778
1984
  }
1779
1985
  /**
1780
1986
  * Extract utterances from Gladia response
@@ -1800,38 +2006,6 @@ var GladiaAdapter = class extends BaseAdapter {
1800
2006
  /**
1801
2007
  * Poll for transcription completion
1802
2008
  */
1803
- async pollForCompletion(jobId, maxAttempts = 60, intervalMs = 2e3) {
1804
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
1805
- const result = await this.getTranscript(jobId);
1806
- if (!result.success) {
1807
- return result;
1808
- }
1809
- const status = result.data?.status;
1810
- if (status === "completed") {
1811
- return result;
1812
- }
1813
- if (status === "error") {
1814
- return {
1815
- success: false,
1816
- provider: this.name,
1817
- error: {
1818
- code: "TRANSCRIPTION_ERROR",
1819
- message: "Transcription failed"
1820
- },
1821
- raw: result.raw
1822
- };
1823
- }
1824
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
1825
- }
1826
- return {
1827
- success: false,
1828
- provider: this.name,
1829
- error: {
1830
- code: "POLLING_TIMEOUT",
1831
- message: `Transcription did not complete after ${maxAttempts} attempts`
1832
- }
1833
- };
1834
- }
1835
2009
  /**
1836
2010
  * Stream audio for real-time transcription
1837
2011
  *
@@ -1875,14 +2049,12 @@ var GladiaAdapter = class extends BaseAdapter {
1875
2049
  this.validateConfig();
1876
2050
  let validatedSampleRate;
1877
2051
  if (options?.sampleRate) {
1878
- const validRates = Object.values(StreamingSupportedSampleRateEnum);
1879
- const isValidRate = validRates.some((rate) => rate === options.sampleRate);
1880
- if (!isValidRate) {
1881
- throw new Error(
1882
- `Gladia does not support sample rate ${options.sampleRate} Hz. Supported rates (from OpenAPI spec): ${validRates.join(", ")} Hz`
1883
- );
1884
- }
1885
- validatedSampleRate = options.sampleRate;
2052
+ validatedSampleRate = validateEnumValue(
2053
+ options.sampleRate,
2054
+ StreamingSupportedSampleRateEnum,
2055
+ "sample rate",
2056
+ "Gladia"
2057
+ );
1886
2058
  }
1887
2059
  const streamingRequest = {
1888
2060
  encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
@@ -1904,9 +2076,8 @@ var GladiaAdapter = class extends BaseAdapter {
1904
2076
  const { id, url: wsUrl } = initResponse.data;
1905
2077
  const ws = new import_ws.default(wsUrl);
1906
2078
  let sessionStatus = "connecting";
1907
- ws.on("open", () => {
1908
- sessionStatus = "open";
1909
- callbacks?.onOpen?.();
2079
+ setupWebSocketHandlers(ws, callbacks, (status) => {
2080
+ sessionStatus = status;
1910
2081
  });
1911
2082
  ws.on("message", (data) => {
1912
2083
  try {
@@ -1951,48 +2122,20 @@ var GladiaAdapter = class extends BaseAdapter {
1951
2122
  }
1952
2123
  } catch (error) {
1953
2124
  callbacks?.onError?.({
1954
- code: "PARSE_ERROR",
2125
+ code: ERROR_CODES.PARSE_ERROR,
1955
2126
  message: "Failed to parse WebSocket message",
1956
2127
  details: error
1957
2128
  });
1958
2129
  }
1959
2130
  });
1960
- ws.on("error", (error) => {
1961
- callbacks?.onError?.({
1962
- code: "WEBSOCKET_ERROR",
1963
- message: error.message,
1964
- details: error
1965
- });
1966
- });
1967
- ws.on("close", (code, reason) => {
1968
- sessionStatus = "closed";
1969
- callbacks?.onClose?.(code, reason.toString());
1970
- });
1971
- await new Promise((resolve, reject) => {
1972
- const timeout = setTimeout(() => {
1973
- reject(new Error("WebSocket connection timeout"));
1974
- }, 1e4);
1975
- ws.once("open", () => {
1976
- clearTimeout(timeout);
1977
- resolve();
1978
- });
1979
- ws.once("error", (error) => {
1980
- clearTimeout(timeout);
1981
- reject(error);
1982
- });
1983
- });
2131
+ await waitForWebSocketOpen(ws);
1984
2132
  return {
1985
2133
  id,
1986
2134
  provider: this.name,
1987
2135
  createdAt: /* @__PURE__ */ new Date(),
1988
2136
  getStatus: () => sessionStatus,
1989
2137
  sendAudio: async (chunk) => {
1990
- if (sessionStatus !== "open") {
1991
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
1992
- }
1993
- if (ws.readyState !== import_ws.default.OPEN) {
1994
- throw new Error("WebSocket is not open");
1995
- }
2138
+ validateSessionForAudio(sessionStatus, ws.readyState, import_ws.default.OPEN);
1996
2139
  ws.send(chunk.data);
1997
2140
  if (chunk.isLast) {
1998
2141
  ws.send(
@@ -2014,18 +2157,8 @@ var GladiaAdapter = class extends BaseAdapter {
2014
2157
  })
2015
2158
  );
2016
2159
  }
2017
- return new Promise((resolve) => {
2018
- const timeout = setTimeout(() => {
2019
- ws.terminate();
2020
- resolve();
2021
- }, 5e3);
2022
- ws.close();
2023
- ws.once("close", () => {
2024
- clearTimeout(timeout);
2025
- sessionStatus = "closed";
2026
- resolve();
2027
- });
2028
- });
2160
+ await closeWebSocket(ws);
2161
+ sessionStatus = "closed";
2029
2162
  }
2030
2163
  };
2031
2164
  }
@@ -2379,26 +2512,16 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2379
2512
  entityDetection: true,
2380
2513
  piiRedaction: true
2381
2514
  };
2382
- this.baseUrl = "https://api.assemblyai.com/v2";
2515
+ this.baseUrl = "https://api.assemblyai.com";
2516
+ // Generated functions already include /v2 path
2383
2517
  this.wsBaseUrl = "wss://api.assemblyai.com/v2/realtime/ws";
2384
2518
  }
2385
2519
  /**
2386
2520
  * Get axios config for generated API client functions
2387
- * Configures headers and base URL
2521
+ * Configures headers and base URL using authorization header
2388
2522
  */
2389
2523
  getAxiosConfig() {
2390
- if (!this.config) {
2391
- throw new Error("Adapter not initialized. Call initialize() first.");
2392
- }
2393
- return {
2394
- baseURL: this.config.baseUrl || this.baseUrl,
2395
- timeout: this.config.timeout || 6e4,
2396
- headers: {
2397
- authorization: this.config.apiKey,
2398
- "Content-Type": "application/json",
2399
- ...this.config.headers
2400
- }
2401
- };
2524
+ return super.getAxiosConfig("authorization");
2402
2525
  }
2403
2526
  /**
2404
2527
  * Submit audio for transcription
@@ -2676,41 +2799,6 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2676
2799
  }))
2677
2800
  }));
2678
2801
  }
2679
- /**
2680
- * Poll for transcription completion
2681
- */
2682
- async pollForCompletion(transcriptId, maxAttempts = 60, intervalMs = 3e3) {
2683
- for (let attempt = 0; attempt < maxAttempts; attempt++) {
2684
- const result = await this.getTranscript(transcriptId);
2685
- if (!result.success) {
2686
- return result;
2687
- }
2688
- const status = result.data?.status;
2689
- if (status === "completed") {
2690
- return result;
2691
- }
2692
- if (status === "error") {
2693
- return {
2694
- success: false,
2695
- provider: this.name,
2696
- error: {
2697
- code: "TRANSCRIPTION_ERROR",
2698
- message: "Transcription failed"
2699
- },
2700
- raw: result.raw
2701
- };
2702
- }
2703
- await new Promise((resolve) => setTimeout(resolve, intervalMs));
2704
- }
2705
- return {
2706
- success: false,
2707
- provider: this.name,
2708
- error: {
2709
- code: "POLLING_TIMEOUT",
2710
- message: `Transcription did not complete after ${maxAttempts} attempts`
2711
- }
2712
- };
2713
- }
2714
2802
  /**
2715
2803
  * Stream audio for real-time transcription
2716
2804
  *
@@ -3352,7 +3440,24 @@ function createDeepgramAdapter(config) {
3352
3440
  }
3353
3441
 
3354
3442
  // src/adapters/azure-stt-adapter.ts
3443
+ var import_axios5 = __toESM(require("axios"));
3444
+
3445
+ // src/generated/azure/api/speechServicesAPIV31.ts
3355
3446
  var import_axios4 = __toESM(require("axios"));
3447
+ var transcriptionsCreate = (transcription, options) => {
3448
+ return import_axios4.default.post("/transcriptions", transcription, options);
3449
+ };
3450
+ var transcriptionsGet = (id, options) => {
3451
+ return import_axios4.default.get(`/transcriptions/${id}`, options);
3452
+ };
3453
+ var transcriptionsListFiles = (id, params, options) => {
3454
+ return import_axios4.default.get(`/transcriptions/${id}/files`, {
3455
+ ...options,
3456
+ params: { ...params, ...options?.params }
3457
+ });
3458
+ };
3459
+
3460
+ // src/adapters/azure-stt-adapter.ts
3356
3461
  var AzureSTTAdapter = class extends BaseAdapter {
3357
3462
  constructor() {
3358
3463
  super(...arguments);
@@ -3369,20 +3474,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
3369
3474
  entityDetection: false,
3370
3475
  piiRedaction: false
3371
3476
  };
3477
+ this.baseUrl = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.1";
3372
3478
  }
3479
+ // Default, overridden in initialize()
3373
3480
  initialize(config) {
3374
3481
  super.initialize(config);
3375
3482
  this.region = config.region || "eastus";
3376
3483
  this.baseUrl = config.baseUrl || `https://${this.region}.api.cognitive.microsoft.com/speechtotext/v3.1`;
3377
- this.client = import_axios4.default.create({
3378
- baseURL: this.baseUrl,
3379
- timeout: config.timeout || 6e4,
3380
- headers: {
3381
- "Ocp-Apim-Subscription-Key": config.apiKey,
3382
- "Content-Type": "application/json",
3383
- ...config.headers
3384
- }
3385
- });
3484
+ }
3485
+ /**
3486
+ * Get axios config for generated API client functions
3487
+ * Configures headers and base URL using Azure subscription key
3488
+ */
3489
+ getAxiosConfig() {
3490
+ return super.getAxiosConfig("Ocp-Apim-Subscription-Key");
3386
3491
  }
3387
3492
  /**
3388
3493
  * Submit audio for transcription
@@ -3414,9 +3519,9 @@ var AzureSTTAdapter = class extends BaseAdapter {
3414
3519
  contentUrls: [audio.url],
3415
3520
  properties: this.buildTranscriptionProperties(options)
3416
3521
  };
3417
- const response = await this.client.post(
3418
- "/transcriptions",
3419
- transcriptionRequest
3522
+ const response = await transcriptionsCreate(
3523
+ transcriptionRequest,
3524
+ this.getAxiosConfig()
3420
3525
  );
3421
3526
  const transcription = response.data;
3422
3527
  return {
@@ -3447,9 +3552,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3447
3552
  async getTranscript(transcriptId) {
3448
3553
  this.validateConfig();
3449
3554
  try {
3450
- const statusResponse = await this.client.get(
3451
- `/transcriptions/${transcriptId}`
3452
- );
3555
+ const statusResponse = await transcriptionsGet(transcriptId, this.getAxiosConfig());
3453
3556
  const transcription = statusResponse.data;
3454
3557
  const status = this.normalizeStatus(transcription.status);
3455
3558
  if (status !== "completed") {
@@ -3477,7 +3580,11 @@ var AzureSTTAdapter = class extends BaseAdapter {
3477
3580
  raw: transcription
3478
3581
  };
3479
3582
  }
3480
- const filesResponse = await this.client.get(transcription.links.files);
3583
+ const filesResponse = await transcriptionsListFiles(
3584
+ transcriptId,
3585
+ void 0,
3586
+ this.getAxiosConfig()
3587
+ );
3481
3588
  const files = filesResponse.data?.values || [];
3482
3589
  const resultFile = files.find((file) => file.kind === "Transcription");
3483
3590
  if (!resultFile?.links?.contentUrl) {
@@ -3491,7 +3598,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
3491
3598
  raw: transcription
3492
3599
  };
3493
3600
  }
3494
- const contentResponse = await import_axios4.default.get(resultFile.links.contentUrl);
3601
+ const contentResponse = await import_axios5.default.get(resultFile.links.contentUrl);
3495
3602
  const transcriptionData = contentResponse.data;
3496
3603
  return this.normalizeResponse(transcription, transcriptionData);
3497
3604
  } catch (error) {
@@ -3590,7 +3697,57 @@ function createAzureSTTAdapter(config) {
3590
3697
  }
3591
3698
 
3592
3699
  // src/adapters/openai-whisper-adapter.ts
3593
- var import_axios5 = __toESM(require("axios"));
3700
+ var import_axios7 = __toESM(require("axios"));
3701
+
3702
+ // src/generated/openai/api/openAIAPI.ts
3703
+ var import_axios6 = __toESM(require("axios"));
3704
+ var createTranscription = (createTranscriptionRequest, options) => {
3705
+ const formData = new FormData();
3706
+ formData.append("file", createTranscriptionRequest.file);
3707
+ formData.append("model", createTranscriptionRequest.model);
3708
+ if (createTranscriptionRequest.language !== void 0) {
3709
+ formData.append("language", createTranscriptionRequest.language);
3710
+ }
3711
+ if (createTranscriptionRequest.prompt !== void 0) {
3712
+ formData.append("prompt", createTranscriptionRequest.prompt);
3713
+ }
3714
+ if (createTranscriptionRequest.response_format !== void 0) {
3715
+ formData.append("response_format", createTranscriptionRequest.response_format);
3716
+ }
3717
+ if (createTranscriptionRequest.temperature !== void 0) {
3718
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
3719
+ }
3720
+ if (createTranscriptionRequest.include !== void 0) {
3721
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
3722
+ }
3723
+ if (createTranscriptionRequest.timestamp_granularities !== void 0) {
3724
+ createTranscriptionRequest.timestamp_granularities.forEach(
3725
+ (value) => formData.append("timestamp_granularities", value)
3726
+ );
3727
+ }
3728
+ if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
3729
+ formData.append("stream", createTranscriptionRequest.stream.toString());
3730
+ }
3731
+ if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
3732
+ formData.append(
3733
+ "chunking_strategy",
3734
+ typeof createTranscriptionRequest.chunking_strategy === "object" ? JSON.stringify(createTranscriptionRequest.chunking_strategy) : createTranscriptionRequest.chunking_strategy
3735
+ );
3736
+ }
3737
+ if (createTranscriptionRequest.known_speaker_names !== void 0) {
3738
+ createTranscriptionRequest.known_speaker_names.forEach(
3739
+ (value) => formData.append("known_speaker_names", value)
3740
+ );
3741
+ }
3742
+ if (createTranscriptionRequest.known_speaker_references !== void 0) {
3743
+ createTranscriptionRequest.known_speaker_references.forEach(
3744
+ (value) => formData.append("known_speaker_references", value)
3745
+ );
3746
+ }
3747
+ return import_axios6.default.post("/audio/transcriptions", formData, options);
3748
+ };
3749
+
3750
+ // src/adapters/openai-whisper-adapter.ts
3594
3751
  var OpenAIWhisperAdapter = class extends BaseAdapter {
3595
3752
  constructor() {
3596
3753
  super(...arguments);
@@ -3612,19 +3769,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3612
3769
  };
3613
3770
  this.baseUrl = "https://api.openai.com/v1";
3614
3771
  }
3615
- initialize(config) {
3616
- super.initialize(config);
3617
- this.baseUrl = config.baseUrl || this.baseUrl;
3618
- this.client = import_axios5.default.create({
3619
- baseURL: this.baseUrl,
3620
- timeout: config.timeout || 12e4,
3621
- // 2 minutes default (audio processing can take time)
3622
- headers: {
3623
- Authorization: `Bearer ${config.apiKey}`,
3624
- "Content-Type": "multipart/form-data",
3625
- ...config.headers
3626
- }
3627
- });
3772
+ /**
3773
+ * Get axios config for generated API client functions
3774
+ * Configures headers and base URL using Bearer token authorization
3775
+ */
3776
+ getAxiosConfig() {
3777
+ return super.getAxiosConfig("Authorization", (apiKey) => `Bearer ${apiKey}`);
3628
3778
  }
3629
3779
  /**
3630
3780
  * Submit audio for transcription
@@ -3646,7 +3796,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3646
3796
  let audioData;
3647
3797
  let fileName = "audio.mp3";
3648
3798
  if (audio.type === "url") {
3649
- const response2 = await import_axios5.default.get(audio.url, {
3799
+ const response2 = await import_axios7.default.get(audio.url, {
3650
3800
  responseType: "arraybuffer"
3651
3801
  });
3652
3802
  audioData = Buffer.from(response2.data);
@@ -3671,40 +3821,37 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
3671
3821
  const model = this.selectModel(options);
3672
3822
  const isDiarization = model === "gpt-4o-transcribe-diarize";
3673
3823
  const needsWords = options?.wordTimestamps === true;
3674
- const requestBody = {
3824
+ const request = {
3675
3825
  file: audioData,
3826
+ // Generated type expects Blob
3676
3827
  model
3677
3828
  };
3678
3829
  if (options?.language) {
3679
- requestBody.language = options.language;
3830
+ request.language = options.language;
3680
3831
  }
3681
3832
  if (options?.metadata?.prompt) {
3682
- requestBody.prompt = options.metadata.prompt;
3833
+ request.prompt = options.metadata.prompt;
3683
3834
  }
3684
3835
  if (options?.metadata?.temperature !== void 0) {
3685
- requestBody.temperature = options.metadata.temperature;
3836
+ request.temperature = options.metadata.temperature;
3686
3837
  }
3687
3838
  if (isDiarization) {
3688
- requestBody.response_format = "diarized_json";
3839
+ request.response_format = "diarized_json";
3689
3840
  if (options?.metadata?.knownSpeakerNames) {
3690
- requestBody["known_speaker_names"] = options.metadata.knownSpeakerNames;
3841
+ request.known_speaker_names = options.metadata.knownSpeakerNames;
3691
3842
  }
3692
3843
  if (options?.metadata?.knownSpeakerReferences) {
3693
- requestBody["known_speaker_references"] = options.metadata.knownSpeakerReferences;
3844
+ request.known_speaker_references = options.metadata.knownSpeakerReferences;
3694
3845
  }
3695
3846
  } else if (needsWords || options?.diarization) {
3696
- requestBody.response_format = "verbose_json";
3847
+ request.response_format = "verbose_json";
3697
3848
  if (needsWords) {
3698
- requestBody.timestamp_granularities = ["word", "segment"];
3849
+ request.timestamp_granularities = ["word", "segment"];
3699
3850
  }
3700
3851
  } else {
3701
- requestBody.response_format = "json";
3852
+ request.response_format = "json";
3702
3853
  }
3703
- const response = await this.client.post("/audio/transcriptions", requestBody, {
3704
- headers: {
3705
- "Content-Type": "multipart/form-data"
3706
- }
3707
- });
3854
+ const response = await createTranscription(request, this.getAxiosConfig());
3708
3855
  return this.normalizeResponse(response.data, model, isDiarization);
3709
3856
  } catch (error) {
3710
3857
  return this.createErrorResponse(error);
@@ -3825,7 +3972,7 @@ function createOpenAIWhisperAdapter(config) {
3825
3972
  }
3826
3973
 
3827
3974
  // src/adapters/speechmatics-adapter.ts
3828
- var import_axios6 = __toESM(require("axios"));
3975
+ var import_axios8 = __toESM(require("axios"));
3829
3976
  var SpeechmaticsAdapter = class extends BaseAdapter {
3830
3977
  constructor() {
3831
3978
  super(...arguments);
@@ -3847,7 +3994,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
3847
3994
  initialize(config) {
3848
3995
  super.initialize(config);
3849
3996
  this.baseUrl = config.baseUrl || this.baseUrl;
3850
- this.client = import_axios6.default.create({
3997
+ this.client = import_axios8.default.create({
3851
3998
  baseURL: this.baseUrl,
3852
3999
  timeout: config.timeout || 12e4,
3853
4000
  headers: {