voice-router-dev 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -4
- package/dist/index.d.mts +9269 -7817
- package/dist/index.d.ts +9269 -7817
- package/dist/index.js +536 -302
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +529 -302
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -4
package/dist/index.mjs
CHANGED
|
@@ -210,24 +210,165 @@ function createVoiceRouter(config, adapters) {
|
|
|
210
210
|
return router;
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
// src/generated/deepgram/schema/listenV1EncodingParameter.ts
|
|
214
|
+
var ListenV1EncodingParameter = {
|
|
215
|
+
linear16: "linear16",
|
|
216
|
+
flac: "flac",
|
|
217
|
+
mulaw: "mulaw",
|
|
218
|
+
opus: "opus",
|
|
219
|
+
speex: "speex",
|
|
220
|
+
g729: "g729"
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
// src/generated/deepgram/schema/speakV1EncodingParameter.ts
|
|
224
|
+
var SpeakV1EncodingParameter = {
|
|
225
|
+
linear16: "linear16",
|
|
226
|
+
aac: "aac",
|
|
227
|
+
opus: "opus",
|
|
228
|
+
mp3: "mp3",
|
|
229
|
+
flac: "flac",
|
|
230
|
+
mulaw: "mulaw",
|
|
231
|
+
alaw: "alaw"
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
// src/generated/deepgram/schema/speakV1ContainerParameter.ts
|
|
235
|
+
var SpeakV1ContainerParameter = {
|
|
236
|
+
none: "none",
|
|
237
|
+
wav: "wav",
|
|
238
|
+
ogg: "ogg"
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
// src/generated/deepgram/schema/speakV1SampleRateParameter.ts
|
|
242
|
+
var SpeakV1SampleRateParameter = {
|
|
243
|
+
NUMBER_16000: 16e3,
|
|
244
|
+
NUMBER_24000: 24e3,
|
|
245
|
+
NUMBER_32000: 32e3,
|
|
246
|
+
NUMBER_48000: 48e3,
|
|
247
|
+
null: null,
|
|
248
|
+
NUMBER_8000: 8e3,
|
|
249
|
+
NUMBER_22050: 22050
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
253
|
+
var StreamingSupportedEncodingEnum = {
|
|
254
|
+
"wav/pcm": "wav/pcm",
|
|
255
|
+
"wav/alaw": "wav/alaw",
|
|
256
|
+
"wav/ulaw": "wav/ulaw"
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
260
|
+
var StreamingSupportedSampleRateEnum = {
|
|
261
|
+
NUMBER_8000: 8e3,
|
|
262
|
+
NUMBER_16000: 16e3,
|
|
263
|
+
NUMBER_32000: 32e3,
|
|
264
|
+
NUMBER_44100: 44100,
|
|
265
|
+
NUMBER_48000: 48e3
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
269
|
+
var StreamingSupportedBitDepthEnum = {
|
|
270
|
+
NUMBER_8: 8,
|
|
271
|
+
NUMBER_16: 16,
|
|
272
|
+
NUMBER_24: 24,
|
|
273
|
+
NUMBER_32: 32
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
// src/constants/defaults.ts
|
|
277
|
+
var DEFAULT_TIMEOUTS = {
|
|
278
|
+
/** Standard HTTP request timeout for API calls (60 seconds) */
|
|
279
|
+
HTTP_REQUEST: 6e4,
|
|
280
|
+
/** Audio processing timeout for long audio files (120 seconds) */
|
|
281
|
+
AUDIO_PROCESSING: 12e4,
|
|
282
|
+
/** WebSocket connection establishment timeout (10 seconds) */
|
|
283
|
+
WS_CONNECTION: 1e4,
|
|
284
|
+
/** WebSocket graceful close timeout (5 seconds) */
|
|
285
|
+
WS_CLOSE: 5e3
|
|
286
|
+
};
|
|
287
|
+
var DEFAULT_POLLING = {
|
|
288
|
+
/** Maximum number of polling attempts before timing out */
|
|
289
|
+
MAX_ATTEMPTS: 60,
|
|
290
|
+
/** Standard interval between polling attempts (2 seconds) */
|
|
291
|
+
INTERVAL_MS: 2e3,
|
|
292
|
+
/** Slower interval for long-running jobs (3 seconds) */
|
|
293
|
+
SLOW_INTERVAL_MS: 3e3
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
// src/utils/errors.ts
|
|
297
|
+
var ERROR_CODES = {
|
|
298
|
+
/** Failed to parse API response or WebSocket message */
|
|
299
|
+
PARSE_ERROR: "PARSE_ERROR",
|
|
300
|
+
/** WebSocket connection error */
|
|
301
|
+
WEBSOCKET_ERROR: "WEBSOCKET_ERROR",
|
|
302
|
+
/** Async transcription job did not complete within timeout */
|
|
303
|
+
POLLING_TIMEOUT: "POLLING_TIMEOUT",
|
|
304
|
+
/** Transcription processing failed on provider side */
|
|
305
|
+
TRANSCRIPTION_ERROR: "TRANSCRIPTION_ERROR",
|
|
306
|
+
/** Connection attempt timed out */
|
|
307
|
+
CONNECTION_TIMEOUT: "CONNECTION_TIMEOUT",
|
|
308
|
+
/** Invalid input provided to API */
|
|
309
|
+
INVALID_INPUT: "INVALID_INPUT",
|
|
310
|
+
/** Requested operation not supported by provider */
|
|
311
|
+
NOT_SUPPORTED: "NOT_SUPPORTED",
|
|
312
|
+
/** No transcription results available */
|
|
313
|
+
NO_RESULTS: "NO_RESULTS",
|
|
314
|
+
/** Unspecified or unknown error */
|
|
315
|
+
UNKNOWN_ERROR: "UNKNOWN_ERROR"
|
|
316
|
+
};
|
|
317
|
+
var ERROR_MESSAGES = {
|
|
318
|
+
PARSE_ERROR: "Failed to parse response data",
|
|
319
|
+
WEBSOCKET_ERROR: "WebSocket connection error",
|
|
320
|
+
POLLING_TIMEOUT: "Transcription did not complete within timeout period",
|
|
321
|
+
TRANSCRIPTION_ERROR: "Transcription processing failed",
|
|
322
|
+
CONNECTION_TIMEOUT: "Connection attempt timed out",
|
|
323
|
+
INVALID_INPUT: "Invalid input provided",
|
|
324
|
+
NOT_SUPPORTED: "Operation not supported by this provider",
|
|
325
|
+
NO_RESULTS: "No transcription results available",
|
|
326
|
+
UNKNOWN_ERROR: "An unknown error occurred"
|
|
327
|
+
};
|
|
328
|
+
function createError(code, customMessage, details) {
|
|
329
|
+
return {
|
|
330
|
+
code,
|
|
331
|
+
message: customMessage || ERROR_MESSAGES[code],
|
|
332
|
+
details
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
213
336
|
// src/adapters/base-adapter.ts
|
|
214
337
|
var BaseAdapter = class {
|
|
215
338
|
initialize(config) {
|
|
216
339
|
this.config = config;
|
|
217
340
|
}
|
|
218
341
|
/**
|
|
219
|
-
* Helper method to create error responses
|
|
342
|
+
* Helper method to create error responses with stack traces
|
|
343
|
+
*
|
|
344
|
+
* @param error - Error object or unknown error
|
|
345
|
+
* @param statusCode - Optional HTTP status code
|
|
346
|
+
* @param code - Optional error code (defaults to extracted or UNKNOWN_ERROR)
|
|
220
347
|
*/
|
|
221
|
-
createErrorResponse(error, statusCode) {
|
|
348
|
+
createErrorResponse(error, statusCode, code) {
|
|
222
349
|
const err = error;
|
|
350
|
+
const httpStatus = statusCode || err.statusCode || err.response?.status;
|
|
351
|
+
const httpStatusText = err.response?.statusText;
|
|
352
|
+
const responseData = err.response?.data;
|
|
223
353
|
return {
|
|
224
354
|
success: false,
|
|
225
355
|
provider: this.name,
|
|
226
356
|
error: {
|
|
227
|
-
code: err.code ||
|
|
357
|
+
code: code || err.code || ERROR_CODES.UNKNOWN_ERROR,
|
|
228
358
|
message: err.message || "An unknown error occurred",
|
|
229
|
-
statusCode:
|
|
230
|
-
details:
|
|
359
|
+
statusCode: httpStatus,
|
|
360
|
+
details: {
|
|
361
|
+
// Include full error object
|
|
362
|
+
error,
|
|
363
|
+
// Include stack trace if available
|
|
364
|
+
stack: err.stack,
|
|
365
|
+
// Include HTTP response details
|
|
366
|
+
httpStatus,
|
|
367
|
+
httpStatusText,
|
|
368
|
+
responseData,
|
|
369
|
+
// Include provider name for debugging
|
|
370
|
+
provider: this.name
|
|
371
|
+
}
|
|
231
372
|
}
|
|
232
373
|
};
|
|
233
374
|
}
|
|
@@ -242,6 +383,64 @@ var BaseAdapter = class {
|
|
|
242
383
|
throw new Error(`API key is required for ${this.name} provider`);
|
|
243
384
|
}
|
|
244
385
|
}
|
|
386
|
+
/**
|
|
387
|
+
* Build axios config for generated API client functions
|
|
388
|
+
*
|
|
389
|
+
* @param authHeaderName - Header name for API key (e.g., "Authorization", "x-gladia-key")
|
|
390
|
+
* @param authHeaderValue - Optional function to format auth header value (defaults to raw API key)
|
|
391
|
+
* @returns Axios config object
|
|
392
|
+
*/
|
|
393
|
+
getAxiosConfig(authHeaderName = "Authorization", authHeaderValue) {
|
|
394
|
+
this.validateConfig();
|
|
395
|
+
const authValue = authHeaderValue ? authHeaderValue(this.config.apiKey) : this.config.apiKey;
|
|
396
|
+
return {
|
|
397
|
+
baseURL: this.config.baseUrl || this.baseUrl,
|
|
398
|
+
timeout: this.config.timeout || DEFAULT_TIMEOUTS.HTTP_REQUEST,
|
|
399
|
+
headers: {
|
|
400
|
+
[authHeaderName]: authValue,
|
|
401
|
+
"Content-Type": "application/json",
|
|
402
|
+
...this.config.headers
|
|
403
|
+
}
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Generic polling helper for async transcription jobs
|
|
408
|
+
*
|
|
409
|
+
* Polls getTranscript() until job completes or times out.
|
|
410
|
+
*
|
|
411
|
+
* @param transcriptId - Job/transcript ID to poll
|
|
412
|
+
* @param options - Polling configuration
|
|
413
|
+
* @returns Final transcription result
|
|
414
|
+
*/
|
|
415
|
+
async pollForCompletion(transcriptId, options) {
|
|
416
|
+
const { maxAttempts = DEFAULT_POLLING.MAX_ATTEMPTS, intervalMs = DEFAULT_POLLING.INTERVAL_MS } = options || {};
|
|
417
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
418
|
+
const result = await this.getTranscript(transcriptId);
|
|
419
|
+
if (!result.success) {
|
|
420
|
+
return result;
|
|
421
|
+
}
|
|
422
|
+
const status = result.data?.status;
|
|
423
|
+
if (status === "completed") {
|
|
424
|
+
return result;
|
|
425
|
+
}
|
|
426
|
+
if (status === "error") {
|
|
427
|
+
return this.createErrorResponse(
|
|
428
|
+
new Error("Transcription failed"),
|
|
429
|
+
void 0,
|
|
430
|
+
ERROR_CODES.TRANSCRIPTION_ERROR
|
|
431
|
+
);
|
|
432
|
+
}
|
|
433
|
+
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
|
434
|
+
}
|
|
435
|
+
return {
|
|
436
|
+
success: false,
|
|
437
|
+
provider: this.name,
|
|
438
|
+
error: {
|
|
439
|
+
code: ERROR_CODES.POLLING_TIMEOUT,
|
|
440
|
+
message: `Transcription did not complete after ${maxAttempts} attempts`
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
}
|
|
245
444
|
};
|
|
246
445
|
|
|
247
446
|
// src/adapters/gladia-adapter.ts
|
|
@@ -288,6 +487,143 @@ function mapEncodingToProvider(unifiedEncoding, provider) {
|
|
|
288
487
|
return providerEncoding;
|
|
289
488
|
}
|
|
290
489
|
|
|
490
|
+
// src/utils/websocket-helpers.ts
|
|
491
|
+
function waitForWebSocketOpen(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CONNECTION) {
|
|
492
|
+
return new Promise((resolve, reject) => {
|
|
493
|
+
const timeout = setTimeout(() => {
|
|
494
|
+
reject(new Error("WebSocket connection timeout"));
|
|
495
|
+
}, timeoutMs);
|
|
496
|
+
ws.once("open", () => {
|
|
497
|
+
clearTimeout(timeout);
|
|
498
|
+
resolve();
|
|
499
|
+
});
|
|
500
|
+
ws.once("error", (error) => {
|
|
501
|
+
clearTimeout(timeout);
|
|
502
|
+
reject(error);
|
|
503
|
+
});
|
|
504
|
+
});
|
|
505
|
+
}
|
|
506
|
+
function closeWebSocket(ws, timeoutMs = DEFAULT_TIMEOUTS.WS_CLOSE) {
|
|
507
|
+
return new Promise((resolve) => {
|
|
508
|
+
const timeout = setTimeout(() => {
|
|
509
|
+
ws.terminate();
|
|
510
|
+
resolve();
|
|
511
|
+
}, timeoutMs);
|
|
512
|
+
ws.close();
|
|
513
|
+
ws.once("close", () => {
|
|
514
|
+
clearTimeout(timeout);
|
|
515
|
+
resolve();
|
|
516
|
+
});
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
function setupWebSocketHandlers(ws, callbacks, setSessionStatus) {
|
|
520
|
+
ws.on("open", () => {
|
|
521
|
+
setSessionStatus("open");
|
|
522
|
+
callbacks?.onOpen?.();
|
|
523
|
+
});
|
|
524
|
+
ws.on("error", (error) => {
|
|
525
|
+
callbacks?.onError?.(createError(ERROR_CODES.WEBSOCKET_ERROR, error.message, error));
|
|
526
|
+
});
|
|
527
|
+
ws.on("close", (code, reason) => {
|
|
528
|
+
setSessionStatus("closed");
|
|
529
|
+
callbacks?.onClose?.(code, reason.toString());
|
|
530
|
+
});
|
|
531
|
+
}
|
|
532
|
+
function validateSessionForAudio(sessionStatus, wsReadyState, WebSocketOpen) {
|
|
533
|
+
if (sessionStatus !== "open") {
|
|
534
|
+
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
535
|
+
}
|
|
536
|
+
if (wsReadyState !== WebSocketOpen) {
|
|
537
|
+
throw new Error("WebSocket is not open");
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
// src/utils/validation.ts
|
|
542
|
+
function validateEnumValue(value, enumType, fieldName, provider) {
|
|
543
|
+
const validValues = Object.values(enumType);
|
|
544
|
+
const isValid = validValues.some((v) => v === value);
|
|
545
|
+
if (!isValid) {
|
|
546
|
+
throw new Error(
|
|
547
|
+
`${provider} does not support ${fieldName} '${value}'. Supported values (from OpenAPI spec): ${validValues.join(", ")}`
|
|
548
|
+
);
|
|
549
|
+
}
|
|
550
|
+
return value;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// src/utils/transcription-helpers.ts
|
|
554
|
+
function extractSpeakersFromUtterances(utterances, getSpeakerId, formatLabel) {
|
|
555
|
+
if (!utterances || utterances.length === 0) {
|
|
556
|
+
return void 0;
|
|
557
|
+
}
|
|
558
|
+
const speakerSet = /* @__PURE__ */ new Set();
|
|
559
|
+
utterances.forEach((utterance) => {
|
|
560
|
+
const speakerId = getSpeakerId(utterance);
|
|
561
|
+
if (speakerId !== void 0) {
|
|
562
|
+
speakerSet.add(String(speakerId));
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
if (speakerSet.size === 0) {
|
|
566
|
+
return void 0;
|
|
567
|
+
}
|
|
568
|
+
return Array.from(speakerSet).map((speakerId) => ({
|
|
569
|
+
id: speakerId,
|
|
570
|
+
label: formatLabel ? formatLabel(speakerId) : `Speaker ${speakerId}`
|
|
571
|
+
}));
|
|
572
|
+
}
|
|
573
|
+
function extractWords(words, mapper) {
|
|
574
|
+
if (!words || words.length === 0) {
|
|
575
|
+
return void 0;
|
|
576
|
+
}
|
|
577
|
+
const normalizedWords = words.map(mapper);
|
|
578
|
+
return normalizedWords.length > 0 ? normalizedWords : void 0;
|
|
579
|
+
}
|
|
580
|
+
var STATUS_MAPPINGS = {
|
|
581
|
+
gladia: {
|
|
582
|
+
queued: "queued",
|
|
583
|
+
processing: "processing",
|
|
584
|
+
done: "completed",
|
|
585
|
+
error: "error"
|
|
586
|
+
},
|
|
587
|
+
assemblyai: {
|
|
588
|
+
queued: "queued",
|
|
589
|
+
processing: "processing",
|
|
590
|
+
completed: "completed",
|
|
591
|
+
error: "error"
|
|
592
|
+
},
|
|
593
|
+
deepgram: {
|
|
594
|
+
queued: "queued",
|
|
595
|
+
processing: "processing",
|
|
596
|
+
completed: "completed",
|
|
597
|
+
error: "error"
|
|
598
|
+
},
|
|
599
|
+
azure: {
|
|
600
|
+
succeeded: "completed",
|
|
601
|
+
running: "processing",
|
|
602
|
+
notstarted: "queued",
|
|
603
|
+
failed: "error"
|
|
604
|
+
},
|
|
605
|
+
speechmatics: {
|
|
606
|
+
running: "processing",
|
|
607
|
+
done: "completed",
|
|
608
|
+
rejected: "error",
|
|
609
|
+
expired: "error"
|
|
610
|
+
}
|
|
611
|
+
};
|
|
612
|
+
function normalizeStatus(providerStatus, provider, defaultStatus = "queued") {
|
|
613
|
+
if (!providerStatus) return defaultStatus;
|
|
614
|
+
const mapping = STATUS_MAPPINGS[provider];
|
|
615
|
+
const statusKey = providerStatus.toString().toLowerCase();
|
|
616
|
+
if (statusKey in mapping) {
|
|
617
|
+
return mapping[statusKey];
|
|
618
|
+
}
|
|
619
|
+
for (const [key, value] of Object.entries(mapping)) {
|
|
620
|
+
if (statusKey.includes(key)) {
|
|
621
|
+
return value;
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
return defaultStatus;
|
|
625
|
+
}
|
|
626
|
+
|
|
291
627
|
// src/generated/gladia/api/gladiaControlAPI.ts
|
|
292
628
|
import axios from "axios";
|
|
293
629
|
|
|
@@ -824,21 +1160,6 @@ var StreamingResponseStatus = {
|
|
|
824
1160
|
error: "error"
|
|
825
1161
|
};
|
|
826
1162
|
|
|
827
|
-
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
828
|
-
var StreamingSupportedBitDepthEnum = {
|
|
829
|
-
NUMBER_8: 8,
|
|
830
|
-
NUMBER_16: 16,
|
|
831
|
-
NUMBER_24: 24,
|
|
832
|
-
NUMBER_32: 32
|
|
833
|
-
};
|
|
834
|
-
|
|
835
|
-
// src/generated/gladia/schema/streamingSupportedEncodingEnum.ts
|
|
836
|
-
var StreamingSupportedEncodingEnum = {
|
|
837
|
-
"wav/pcm": "wav/pcm",
|
|
838
|
-
"wav/alaw": "wav/alaw",
|
|
839
|
-
"wav/ulaw": "wav/ulaw"
|
|
840
|
-
};
|
|
841
|
-
|
|
842
1163
|
// src/generated/gladia/schema/streamingSupportedModels.ts
|
|
843
1164
|
var StreamingSupportedModels = {
|
|
844
1165
|
"solaria-1": "solaria-1"
|
|
@@ -850,15 +1171,6 @@ var StreamingSupportedRegions = {
|
|
|
850
1171
|
"eu-west": "eu-west"
|
|
851
1172
|
};
|
|
852
1173
|
|
|
853
|
-
// src/generated/gladia/schema/streamingSupportedSampleRateEnum.ts
|
|
854
|
-
var StreamingSupportedSampleRateEnum = {
|
|
855
|
-
NUMBER_8000: 8e3,
|
|
856
|
-
NUMBER_16000: 16e3,
|
|
857
|
-
NUMBER_32000: 32e3,
|
|
858
|
-
NUMBER_44100: 44100,
|
|
859
|
-
NUMBER_48000: 48e3
|
|
860
|
-
};
|
|
861
|
-
|
|
862
1174
|
// src/generated/gladia/schema/subtitlesFormatEnum.ts
|
|
863
1175
|
var SubtitlesFormatEnum = {
|
|
864
1176
|
srt: "srt",
|
|
@@ -1408,21 +1720,10 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1408
1720
|
}
|
|
1409
1721
|
/**
|
|
1410
1722
|
* Get axios config for generated API client functions
|
|
1411
|
-
* Configures headers and base URL
|
|
1723
|
+
* Configures headers and base URL using Gladia's x-gladia-key header
|
|
1412
1724
|
*/
|
|
1413
1725
|
getAxiosConfig() {
|
|
1414
|
-
|
|
1415
|
-
throw new Error("Adapter not initialized. Call initialize() first.");
|
|
1416
|
-
}
|
|
1417
|
-
return {
|
|
1418
|
-
baseURL: this.config.baseUrl || this.baseUrl,
|
|
1419
|
-
timeout: this.config.timeout || 6e4,
|
|
1420
|
-
headers: {
|
|
1421
|
-
"x-gladia-key": this.config.apiKey,
|
|
1422
|
-
"Content-Type": "application/json",
|
|
1423
|
-
...this.config.headers
|
|
1424
|
-
}
|
|
1425
|
-
};
|
|
1726
|
+
return super.getAxiosConfig("x-gladia-key");
|
|
1426
1727
|
}
|
|
1427
1728
|
/**
|
|
1428
1729
|
* Submit audio for transcription
|
|
@@ -1589,29 +1890,13 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1589
1890
|
* Normalize Gladia response to unified format
|
|
1590
1891
|
*/
|
|
1591
1892
|
normalizeResponse(response) {
|
|
1592
|
-
|
|
1593
|
-
switch (response.status) {
|
|
1594
|
-
case "queued":
|
|
1595
|
-
status = "queued";
|
|
1596
|
-
break;
|
|
1597
|
-
case "processing":
|
|
1598
|
-
status = "processing";
|
|
1599
|
-
break;
|
|
1600
|
-
case "done":
|
|
1601
|
-
status = "completed";
|
|
1602
|
-
break;
|
|
1603
|
-
case "error":
|
|
1604
|
-
status = "error";
|
|
1605
|
-
break;
|
|
1606
|
-
default:
|
|
1607
|
-
status = "queued";
|
|
1608
|
-
}
|
|
1893
|
+
const status = normalizeStatus(response.status, "gladia");
|
|
1609
1894
|
if (response.status === "error") {
|
|
1610
1895
|
return {
|
|
1611
1896
|
success: false,
|
|
1612
1897
|
provider: this.name,
|
|
1613
1898
|
error: {
|
|
1614
|
-
code: response.error_code?.toString() ||
|
|
1899
|
+
code: response.error_code?.toString() || ERROR_CODES.TRANSCRIPTION_ERROR,
|
|
1615
1900
|
message: "Transcription failed",
|
|
1616
1901
|
statusCode: response.error_code || void 0
|
|
1617
1902
|
},
|
|
@@ -1651,22 +1936,11 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1651
1936
|
* Extract speaker information from Gladia response
|
|
1652
1937
|
*/
|
|
1653
1938
|
extractSpeakers(transcription) {
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
if (utterance.speaker !== void 0) {
|
|
1660
|
-
speakerSet.add(utterance.speaker);
|
|
1661
|
-
}
|
|
1662
|
-
});
|
|
1663
|
-
if (speakerSet.size === 0) {
|
|
1664
|
-
return void 0;
|
|
1665
|
-
}
|
|
1666
|
-
return Array.from(speakerSet).map((speakerId) => ({
|
|
1667
|
-
id: speakerId.toString(),
|
|
1668
|
-
label: `Speaker ${speakerId}`
|
|
1669
|
-
}));
|
|
1939
|
+
return extractSpeakersFromUtterances(
|
|
1940
|
+
transcription?.utterances,
|
|
1941
|
+
(utterance) => utterance.speaker,
|
|
1942
|
+
(id) => `Speaker ${id}`
|
|
1943
|
+
);
|
|
1670
1944
|
}
|
|
1671
1945
|
/**
|
|
1672
1946
|
* Extract word timestamps from Gladia response
|
|
@@ -1677,14 +1951,17 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1677
1951
|
}
|
|
1678
1952
|
const allWords = transcription.utterances.flatMap(
|
|
1679
1953
|
(utterance) => utterance.words.map((word) => ({
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
end: word.end,
|
|
1683
|
-
confidence: word.confidence,
|
|
1684
|
-
speaker: utterance.speaker?.toString()
|
|
1954
|
+
word,
|
|
1955
|
+
speaker: utterance.speaker
|
|
1685
1956
|
}))
|
|
1686
1957
|
);
|
|
1687
|
-
return allWords
|
|
1958
|
+
return extractWords(allWords, (item) => ({
|
|
1959
|
+
text: item.word.word,
|
|
1960
|
+
start: item.word.start,
|
|
1961
|
+
end: item.word.end,
|
|
1962
|
+
confidence: item.word.confidence,
|
|
1963
|
+
speaker: item.speaker?.toString()
|
|
1964
|
+
}));
|
|
1688
1965
|
}
|
|
1689
1966
|
/**
|
|
1690
1967
|
* Extract utterances from Gladia response
|
|
@@ -1710,38 +1987,6 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1710
1987
|
/**
|
|
1711
1988
|
* Poll for transcription completion
|
|
1712
1989
|
*/
|
|
1713
|
-
async pollForCompletion(jobId, maxAttempts = 60, intervalMs = 2e3) {
|
|
1714
|
-
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
1715
|
-
const result = await this.getTranscript(jobId);
|
|
1716
|
-
if (!result.success) {
|
|
1717
|
-
return result;
|
|
1718
|
-
}
|
|
1719
|
-
const status = result.data?.status;
|
|
1720
|
-
if (status === "completed") {
|
|
1721
|
-
return result;
|
|
1722
|
-
}
|
|
1723
|
-
if (status === "error") {
|
|
1724
|
-
return {
|
|
1725
|
-
success: false,
|
|
1726
|
-
provider: this.name,
|
|
1727
|
-
error: {
|
|
1728
|
-
code: "TRANSCRIPTION_ERROR",
|
|
1729
|
-
message: "Transcription failed"
|
|
1730
|
-
},
|
|
1731
|
-
raw: result.raw
|
|
1732
|
-
};
|
|
1733
|
-
}
|
|
1734
|
-
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
|
1735
|
-
}
|
|
1736
|
-
return {
|
|
1737
|
-
success: false,
|
|
1738
|
-
provider: this.name,
|
|
1739
|
-
error: {
|
|
1740
|
-
code: "POLLING_TIMEOUT",
|
|
1741
|
-
message: `Transcription did not complete after ${maxAttempts} attempts`
|
|
1742
|
-
}
|
|
1743
|
-
};
|
|
1744
|
-
}
|
|
1745
1990
|
/**
|
|
1746
1991
|
* Stream audio for real-time transcription
|
|
1747
1992
|
*
|
|
@@ -1785,14 +2030,12 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1785
2030
|
this.validateConfig();
|
|
1786
2031
|
let validatedSampleRate;
|
|
1787
2032
|
if (options?.sampleRate) {
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
}
|
|
1795
|
-
validatedSampleRate = options.sampleRate;
|
|
2033
|
+
validatedSampleRate = validateEnumValue(
|
|
2034
|
+
options.sampleRate,
|
|
2035
|
+
StreamingSupportedSampleRateEnum,
|
|
2036
|
+
"sample rate",
|
|
2037
|
+
"Gladia"
|
|
2038
|
+
);
|
|
1796
2039
|
}
|
|
1797
2040
|
const streamingRequest = {
|
|
1798
2041
|
encoding: options?.encoding ? mapEncodingToProvider(options.encoding, "gladia") : void 0,
|
|
@@ -1814,9 +2057,8 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1814
2057
|
const { id, url: wsUrl } = initResponse.data;
|
|
1815
2058
|
const ws = new WebSocket(wsUrl);
|
|
1816
2059
|
let sessionStatus = "connecting";
|
|
1817
|
-
ws
|
|
1818
|
-
sessionStatus =
|
|
1819
|
-
callbacks?.onOpen?.();
|
|
2060
|
+
setupWebSocketHandlers(ws, callbacks, (status) => {
|
|
2061
|
+
sessionStatus = status;
|
|
1820
2062
|
});
|
|
1821
2063
|
ws.on("message", (data) => {
|
|
1822
2064
|
try {
|
|
@@ -1861,48 +2103,20 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1861
2103
|
}
|
|
1862
2104
|
} catch (error) {
|
|
1863
2105
|
callbacks?.onError?.({
|
|
1864
|
-
code:
|
|
2106
|
+
code: ERROR_CODES.PARSE_ERROR,
|
|
1865
2107
|
message: "Failed to parse WebSocket message",
|
|
1866
2108
|
details: error
|
|
1867
2109
|
});
|
|
1868
2110
|
}
|
|
1869
2111
|
});
|
|
1870
|
-
|
|
1871
|
-
callbacks?.onError?.({
|
|
1872
|
-
code: "WEBSOCKET_ERROR",
|
|
1873
|
-
message: error.message,
|
|
1874
|
-
details: error
|
|
1875
|
-
});
|
|
1876
|
-
});
|
|
1877
|
-
ws.on("close", (code, reason) => {
|
|
1878
|
-
sessionStatus = "closed";
|
|
1879
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
1880
|
-
});
|
|
1881
|
-
await new Promise((resolve, reject) => {
|
|
1882
|
-
const timeout = setTimeout(() => {
|
|
1883
|
-
reject(new Error("WebSocket connection timeout"));
|
|
1884
|
-
}, 1e4);
|
|
1885
|
-
ws.once("open", () => {
|
|
1886
|
-
clearTimeout(timeout);
|
|
1887
|
-
resolve();
|
|
1888
|
-
});
|
|
1889
|
-
ws.once("error", (error) => {
|
|
1890
|
-
clearTimeout(timeout);
|
|
1891
|
-
reject(error);
|
|
1892
|
-
});
|
|
1893
|
-
});
|
|
2112
|
+
await waitForWebSocketOpen(ws);
|
|
1894
2113
|
return {
|
|
1895
2114
|
id,
|
|
1896
2115
|
provider: this.name,
|
|
1897
2116
|
createdAt: /* @__PURE__ */ new Date(),
|
|
1898
2117
|
getStatus: () => sessionStatus,
|
|
1899
2118
|
sendAudio: async (chunk) => {
|
|
1900
|
-
|
|
1901
|
-
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
1902
|
-
}
|
|
1903
|
-
if (ws.readyState !== WebSocket.OPEN) {
|
|
1904
|
-
throw new Error("WebSocket is not open");
|
|
1905
|
-
}
|
|
2119
|
+
validateSessionForAudio(sessionStatus, ws.readyState, WebSocket.OPEN);
|
|
1906
2120
|
ws.send(chunk.data);
|
|
1907
2121
|
if (chunk.isLast) {
|
|
1908
2122
|
ws.send(
|
|
@@ -1924,18 +2138,8 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
1924
2138
|
})
|
|
1925
2139
|
);
|
|
1926
2140
|
}
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
ws.terminate();
|
|
1930
|
-
resolve();
|
|
1931
|
-
}, 5e3);
|
|
1932
|
-
ws.close();
|
|
1933
|
-
ws.once("close", () => {
|
|
1934
|
-
clearTimeout(timeout);
|
|
1935
|
-
sessionStatus = "closed";
|
|
1936
|
-
resolve();
|
|
1937
|
-
});
|
|
1938
|
-
});
|
|
2141
|
+
await closeWebSocket(ws);
|
|
2142
|
+
sessionStatus = "closed";
|
|
1939
2143
|
}
|
|
1940
2144
|
};
|
|
1941
2145
|
}
|
|
@@ -2269,9 +2473,6 @@ var createTranscript = (transcriptParams, options) => {
|
|
|
2269
2473
|
var getTranscript = (transcriptId, options) => {
|
|
2270
2474
|
return axios2.get(`/v2/transcript/${transcriptId}`, options);
|
|
2271
2475
|
};
|
|
2272
|
-
var createTemporaryToken = (createRealtimeTemporaryTokenParams, options) => {
|
|
2273
|
-
return axios2.post("/v2/realtime/token", createRealtimeTemporaryTokenParams, options);
|
|
2274
|
-
};
|
|
2275
2476
|
|
|
2276
2477
|
// src/adapters/assemblyai-adapter.ts
|
|
2277
2478
|
var AssemblyAIAdapter = class extends BaseAdapter {
|
|
@@ -2289,26 +2490,17 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2289
2490
|
entityDetection: true,
|
|
2290
2491
|
piiRedaction: true
|
|
2291
2492
|
};
|
|
2292
|
-
this.baseUrl = "https://api.assemblyai.com
|
|
2293
|
-
|
|
2493
|
+
this.baseUrl = "https://api.assemblyai.com";
|
|
2494
|
+
// Generated functions already include /v2 path
|
|
2495
|
+
this.wsBaseUrl = "wss://streaming.assemblyai.com/v3/ws";
|
|
2294
2496
|
}
|
|
2497
|
+
// v3 Universal Streaming endpoint
|
|
2295
2498
|
/**
|
|
2296
2499
|
* Get axios config for generated API client functions
|
|
2297
|
-
* Configures headers and base URL
|
|
2500
|
+
* Configures headers and base URL using authorization header
|
|
2298
2501
|
*/
|
|
2299
2502
|
getAxiosConfig() {
|
|
2300
|
-
|
|
2301
|
-
throw new Error("Adapter not initialized. Call initialize() first.");
|
|
2302
|
-
}
|
|
2303
|
-
return {
|
|
2304
|
-
baseURL: this.config.baseUrl || this.baseUrl,
|
|
2305
|
-
timeout: this.config.timeout || 6e4,
|
|
2306
|
-
headers: {
|
|
2307
|
-
authorization: this.config.apiKey,
|
|
2308
|
-
"Content-Type": "application/json",
|
|
2309
|
-
...this.config.headers
|
|
2310
|
-
}
|
|
2311
|
-
};
|
|
2503
|
+
return super.getAxiosConfig("authorization");
|
|
2312
2504
|
}
|
|
2313
2505
|
/**
|
|
2314
2506
|
* Submit audio for transcription
|
|
@@ -2586,41 +2778,6 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2586
2778
|
}))
|
|
2587
2779
|
}));
|
|
2588
2780
|
}
|
|
2589
|
-
/**
|
|
2590
|
-
* Poll for transcription completion
|
|
2591
|
-
*/
|
|
2592
|
-
async pollForCompletion(transcriptId, maxAttempts = 60, intervalMs = 3e3) {
|
|
2593
|
-
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
2594
|
-
const result = await this.getTranscript(transcriptId);
|
|
2595
|
-
if (!result.success) {
|
|
2596
|
-
return result;
|
|
2597
|
-
}
|
|
2598
|
-
const status = result.data?.status;
|
|
2599
|
-
if (status === "completed") {
|
|
2600
|
-
return result;
|
|
2601
|
-
}
|
|
2602
|
-
if (status === "error") {
|
|
2603
|
-
return {
|
|
2604
|
-
success: false,
|
|
2605
|
-
provider: this.name,
|
|
2606
|
-
error: {
|
|
2607
|
-
code: "TRANSCRIPTION_ERROR",
|
|
2608
|
-
message: "Transcription failed"
|
|
2609
|
-
},
|
|
2610
|
-
raw: result.raw
|
|
2611
|
-
};
|
|
2612
|
-
}
|
|
2613
|
-
await new Promise((resolve) => setTimeout(resolve, intervalMs));
|
|
2614
|
-
}
|
|
2615
|
-
return {
|
|
2616
|
-
success: false,
|
|
2617
|
-
provider: this.name,
|
|
2618
|
-
error: {
|
|
2619
|
-
code: "POLLING_TIMEOUT",
|
|
2620
|
-
message: `Transcription did not complete after ${maxAttempts} attempts`
|
|
2621
|
-
}
|
|
2622
|
-
};
|
|
2623
|
-
}
|
|
2624
2781
|
/**
|
|
2625
2782
|
* Stream audio for real-time transcription
|
|
2626
2783
|
*
|
|
@@ -2661,14 +2818,17 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2661
2818
|
*/
|
|
2662
2819
|
async transcribeStream(options, callbacks) {
|
|
2663
2820
|
this.validateConfig();
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
const
|
|
2670
|
-
const
|
|
2671
|
-
|
|
2821
|
+
if (!this.config?.apiKey) {
|
|
2822
|
+
throw new Error("API key is required for streaming");
|
|
2823
|
+
}
|
|
2824
|
+
const sampleRate = options?.sampleRate || 16e3;
|
|
2825
|
+
const encoding = options?.encoding || "pcm_s16le";
|
|
2826
|
+
const wsUrl = `${this.wsBaseUrl}?sample_rate=${sampleRate}&encoding=${encoding}`;
|
|
2827
|
+
const ws = new WebSocket2(wsUrl, {
|
|
2828
|
+
headers: {
|
|
2829
|
+
Authorization: this.config.apiKey
|
|
2830
|
+
}
|
|
2831
|
+
});
|
|
2672
2832
|
let sessionStatus = "connecting";
|
|
2673
2833
|
const sessionId = `assemblyai-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
2674
2834
|
ws.on("open", () => {
|
|
@@ -2678,41 +2838,42 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
2678
2838
|
ws.on("message", (data) => {
|
|
2679
2839
|
try {
|
|
2680
2840
|
const message = JSON.parse(data.toString());
|
|
2681
|
-
if (
|
|
2841
|
+
if ("error" in message) {
|
|
2842
|
+
callbacks?.onError?.({
|
|
2843
|
+
code: "API_ERROR",
|
|
2844
|
+
message: message.error
|
|
2845
|
+
});
|
|
2846
|
+
return;
|
|
2847
|
+
}
|
|
2848
|
+
if (message.type === "Begin") {
|
|
2849
|
+
const beginMsg = message;
|
|
2682
2850
|
callbacks?.onMetadata?.({
|
|
2683
|
-
sessionId:
|
|
2684
|
-
expiresAt:
|
|
2851
|
+
sessionId: beginMsg.id,
|
|
2852
|
+
expiresAt: new Date(beginMsg.expires_at).toISOString()
|
|
2685
2853
|
});
|
|
2686
|
-
} else if (message.
|
|
2854
|
+
} else if (message.type === "Turn") {
|
|
2855
|
+
const turnMsg = message;
|
|
2687
2856
|
callbacks?.onTranscript?.({
|
|
2688
2857
|
type: "transcript",
|
|
2689
|
-
text:
|
|
2690
|
-
isFinal:
|
|
2691
|
-
confidence:
|
|
2692
|
-
words:
|
|
2858
|
+
text: turnMsg.transcript,
|
|
2859
|
+
isFinal: turnMsg.end_of_turn,
|
|
2860
|
+
confidence: turnMsg.end_of_turn_confidence,
|
|
2861
|
+
words: turnMsg.words.map((word) => ({
|
|
2693
2862
|
text: word.text,
|
|
2694
2863
|
start: word.start / 1e3,
|
|
2864
|
+
// Convert ms to seconds
|
|
2695
2865
|
end: word.end / 1e3,
|
|
2696
2866
|
confidence: word.confidence
|
|
2697
2867
|
})),
|
|
2698
|
-
data:
|
|
2868
|
+
data: turnMsg
|
|
2699
2869
|
});
|
|
2700
|
-
} else if (message.
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
words: message.words.map((word) => ({
|
|
2707
|
-
text: word.text,
|
|
2708
|
-
start: word.start / 1e3,
|
|
2709
|
-
end: word.end / 1e3,
|
|
2710
|
-
confidence: word.confidence
|
|
2711
|
-
})),
|
|
2712
|
-
data: message
|
|
2870
|
+
} else if (message.type === "Termination") {
|
|
2871
|
+
const termMsg = message;
|
|
2872
|
+
callbacks?.onMetadata?.({
|
|
2873
|
+
terminated: true,
|
|
2874
|
+
audioDurationSeconds: termMsg.audio_duration_seconds,
|
|
2875
|
+
sessionDurationSeconds: termMsg.session_duration_seconds
|
|
2713
2876
|
});
|
|
2714
|
-
} else if (message.message_type === "SessionTerminated") {
|
|
2715
|
-
callbacks?.onMetadata?.({ terminated: true });
|
|
2716
2877
|
}
|
|
2717
2878
|
} catch (error) {
|
|
2718
2879
|
callbacks?.onError?.({
|
|
@@ -3262,7 +3423,24 @@ function createDeepgramAdapter(config) {
|
|
|
3262
3423
|
}
|
|
3263
3424
|
|
|
3264
3425
|
// src/adapters/azure-stt-adapter.ts
|
|
3426
|
+
import axios5 from "axios";
|
|
3427
|
+
|
|
3428
|
+
// src/generated/azure/api/speechServicesAPIV31.ts
|
|
3265
3429
|
import axios4 from "axios";
|
|
3430
|
+
var transcriptionsCreate = (transcription, options) => {
|
|
3431
|
+
return axios4.post("/transcriptions", transcription, options);
|
|
3432
|
+
};
|
|
3433
|
+
var transcriptionsGet = (id, options) => {
|
|
3434
|
+
return axios4.get(`/transcriptions/${id}`, options);
|
|
3435
|
+
};
|
|
3436
|
+
var transcriptionsListFiles = (id, params, options) => {
|
|
3437
|
+
return axios4.get(`/transcriptions/${id}/files`, {
|
|
3438
|
+
...options,
|
|
3439
|
+
params: { ...params, ...options?.params }
|
|
3440
|
+
});
|
|
3441
|
+
};
|
|
3442
|
+
|
|
3443
|
+
// src/adapters/azure-stt-adapter.ts
|
|
3266
3444
|
var AzureSTTAdapter = class extends BaseAdapter {
|
|
3267
3445
|
constructor() {
|
|
3268
3446
|
super(...arguments);
|
|
@@ -3279,20 +3457,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3279
3457
|
entityDetection: false,
|
|
3280
3458
|
piiRedaction: false
|
|
3281
3459
|
};
|
|
3460
|
+
this.baseUrl = "https://eastus.api.cognitive.microsoft.com/speechtotext/v3.1";
|
|
3282
3461
|
}
|
|
3462
|
+
// Default, overridden in initialize()
|
|
3283
3463
|
initialize(config) {
|
|
3284
3464
|
super.initialize(config);
|
|
3285
3465
|
this.region = config.region || "eastus";
|
|
3286
3466
|
this.baseUrl = config.baseUrl || `https://${this.region}.api.cognitive.microsoft.com/speechtotext/v3.1`;
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
}
|
|
3295
|
-
});
|
|
3467
|
+
}
|
|
3468
|
+
/**
|
|
3469
|
+
* Get axios config for generated API client functions
|
|
3470
|
+
* Configures headers and base URL using Azure subscription key
|
|
3471
|
+
*/
|
|
3472
|
+
getAxiosConfig() {
|
|
3473
|
+
return super.getAxiosConfig("Ocp-Apim-Subscription-Key");
|
|
3296
3474
|
}
|
|
3297
3475
|
/**
|
|
3298
3476
|
* Submit audio for transcription
|
|
@@ -3324,9 +3502,9 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3324
3502
|
contentUrls: [audio.url],
|
|
3325
3503
|
properties: this.buildTranscriptionProperties(options)
|
|
3326
3504
|
};
|
|
3327
|
-
const response = await
|
|
3328
|
-
|
|
3329
|
-
|
|
3505
|
+
const response = await transcriptionsCreate(
|
|
3506
|
+
transcriptionRequest,
|
|
3507
|
+
this.getAxiosConfig()
|
|
3330
3508
|
);
|
|
3331
3509
|
const transcription = response.data;
|
|
3332
3510
|
return {
|
|
@@ -3357,9 +3535,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3357
3535
|
async getTranscript(transcriptId) {
|
|
3358
3536
|
this.validateConfig();
|
|
3359
3537
|
try {
|
|
3360
|
-
const statusResponse = await this.
|
|
3361
|
-
`/transcriptions/${transcriptId}`
|
|
3362
|
-
);
|
|
3538
|
+
const statusResponse = await transcriptionsGet(transcriptId, this.getAxiosConfig());
|
|
3363
3539
|
const transcription = statusResponse.data;
|
|
3364
3540
|
const status = this.normalizeStatus(transcription.status);
|
|
3365
3541
|
if (status !== "completed") {
|
|
@@ -3387,7 +3563,11 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3387
3563
|
raw: transcription
|
|
3388
3564
|
};
|
|
3389
3565
|
}
|
|
3390
|
-
const filesResponse = await
|
|
3566
|
+
const filesResponse = await transcriptionsListFiles(
|
|
3567
|
+
transcriptId,
|
|
3568
|
+
void 0,
|
|
3569
|
+
this.getAxiosConfig()
|
|
3570
|
+
);
|
|
3391
3571
|
const files = filesResponse.data?.values || [];
|
|
3392
3572
|
const resultFile = files.find((file) => file.kind === "Transcription");
|
|
3393
3573
|
if (!resultFile?.links?.contentUrl) {
|
|
@@ -3401,7 +3581,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
3401
3581
|
raw: transcription
|
|
3402
3582
|
};
|
|
3403
3583
|
}
|
|
3404
|
-
const contentResponse = await
|
|
3584
|
+
const contentResponse = await axios5.get(resultFile.links.contentUrl);
|
|
3405
3585
|
const transcriptionData = contentResponse.data;
|
|
3406
3586
|
return this.normalizeResponse(transcription, transcriptionData);
|
|
3407
3587
|
} catch (error) {
|
|
@@ -3500,7 +3680,57 @@ function createAzureSTTAdapter(config) {
|
|
|
3500
3680
|
}
|
|
3501
3681
|
|
|
3502
3682
|
// src/adapters/openai-whisper-adapter.ts
|
|
3503
|
-
import
|
|
3683
|
+
import axios7 from "axios";
|
|
3684
|
+
|
|
3685
|
+
// src/generated/openai/api/openAIAPI.ts
|
|
3686
|
+
import axios6 from "axios";
|
|
3687
|
+
var createTranscription = (createTranscriptionRequest, options) => {
|
|
3688
|
+
const formData = new FormData();
|
|
3689
|
+
formData.append("file", createTranscriptionRequest.file);
|
|
3690
|
+
formData.append("model", createTranscriptionRequest.model);
|
|
3691
|
+
if (createTranscriptionRequest.language !== void 0) {
|
|
3692
|
+
formData.append("language", createTranscriptionRequest.language);
|
|
3693
|
+
}
|
|
3694
|
+
if (createTranscriptionRequest.prompt !== void 0) {
|
|
3695
|
+
formData.append("prompt", createTranscriptionRequest.prompt);
|
|
3696
|
+
}
|
|
3697
|
+
if (createTranscriptionRequest.response_format !== void 0) {
|
|
3698
|
+
formData.append("response_format", createTranscriptionRequest.response_format);
|
|
3699
|
+
}
|
|
3700
|
+
if (createTranscriptionRequest.temperature !== void 0) {
|
|
3701
|
+
formData.append("temperature", createTranscriptionRequest.temperature.toString());
|
|
3702
|
+
}
|
|
3703
|
+
if (createTranscriptionRequest.include !== void 0) {
|
|
3704
|
+
createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
|
|
3705
|
+
}
|
|
3706
|
+
if (createTranscriptionRequest.timestamp_granularities !== void 0) {
|
|
3707
|
+
createTranscriptionRequest.timestamp_granularities.forEach(
|
|
3708
|
+
(value) => formData.append("timestamp_granularities", value)
|
|
3709
|
+
);
|
|
3710
|
+
}
|
|
3711
|
+
if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
|
|
3712
|
+
formData.append("stream", createTranscriptionRequest.stream.toString());
|
|
3713
|
+
}
|
|
3714
|
+
if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
|
|
3715
|
+
formData.append(
|
|
3716
|
+
"chunking_strategy",
|
|
3717
|
+
typeof createTranscriptionRequest.chunking_strategy === "object" ? JSON.stringify(createTranscriptionRequest.chunking_strategy) : createTranscriptionRequest.chunking_strategy
|
|
3718
|
+
);
|
|
3719
|
+
}
|
|
3720
|
+
if (createTranscriptionRequest.known_speaker_names !== void 0) {
|
|
3721
|
+
createTranscriptionRequest.known_speaker_names.forEach(
|
|
3722
|
+
(value) => formData.append("known_speaker_names", value)
|
|
3723
|
+
);
|
|
3724
|
+
}
|
|
3725
|
+
if (createTranscriptionRequest.known_speaker_references !== void 0) {
|
|
3726
|
+
createTranscriptionRequest.known_speaker_references.forEach(
|
|
3727
|
+
(value) => formData.append("known_speaker_references", value)
|
|
3728
|
+
);
|
|
3729
|
+
}
|
|
3730
|
+
return axios6.post("/audio/transcriptions", formData, options);
|
|
3731
|
+
};
|
|
3732
|
+
|
|
3733
|
+
// src/adapters/openai-whisper-adapter.ts
|
|
3504
3734
|
var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
3505
3735
|
constructor() {
|
|
3506
3736
|
super(...arguments);
|
|
@@ -3522,19 +3752,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
3522
3752
|
};
|
|
3523
3753
|
this.baseUrl = "https://api.openai.com/v1";
|
|
3524
3754
|
}
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
// 2 minutes default (audio processing can take time)
|
|
3532
|
-
headers: {
|
|
3533
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
3534
|
-
"Content-Type": "multipart/form-data",
|
|
3535
|
-
...config.headers
|
|
3536
|
-
}
|
|
3537
|
-
});
|
|
3755
|
+
/**
|
|
3756
|
+
* Get axios config for generated API client functions
|
|
3757
|
+
* Configures headers and base URL using Bearer token authorization
|
|
3758
|
+
*/
|
|
3759
|
+
getAxiosConfig() {
|
|
3760
|
+
return super.getAxiosConfig("Authorization", (apiKey) => `Bearer ${apiKey}`);
|
|
3538
3761
|
}
|
|
3539
3762
|
/**
|
|
3540
3763
|
* Submit audio for transcription
|
|
@@ -3556,7 +3779,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
3556
3779
|
let audioData;
|
|
3557
3780
|
let fileName = "audio.mp3";
|
|
3558
3781
|
if (audio.type === "url") {
|
|
3559
|
-
const response2 = await
|
|
3782
|
+
const response2 = await axios7.get(audio.url, {
|
|
3560
3783
|
responseType: "arraybuffer"
|
|
3561
3784
|
});
|
|
3562
3785
|
audioData = Buffer.from(response2.data);
|
|
@@ -3581,40 +3804,37 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
3581
3804
|
const model = this.selectModel(options);
|
|
3582
3805
|
const isDiarization = model === "gpt-4o-transcribe-diarize";
|
|
3583
3806
|
const needsWords = options?.wordTimestamps === true;
|
|
3584
|
-
const
|
|
3807
|
+
const request = {
|
|
3585
3808
|
file: audioData,
|
|
3809
|
+
// Generated type expects Blob
|
|
3586
3810
|
model
|
|
3587
3811
|
};
|
|
3588
3812
|
if (options?.language) {
|
|
3589
|
-
|
|
3813
|
+
request.language = options.language;
|
|
3590
3814
|
}
|
|
3591
3815
|
if (options?.metadata?.prompt) {
|
|
3592
|
-
|
|
3816
|
+
request.prompt = options.metadata.prompt;
|
|
3593
3817
|
}
|
|
3594
3818
|
if (options?.metadata?.temperature !== void 0) {
|
|
3595
|
-
|
|
3819
|
+
request.temperature = options.metadata.temperature;
|
|
3596
3820
|
}
|
|
3597
3821
|
if (isDiarization) {
|
|
3598
|
-
|
|
3822
|
+
request.response_format = "diarized_json";
|
|
3599
3823
|
if (options?.metadata?.knownSpeakerNames) {
|
|
3600
|
-
|
|
3824
|
+
request.known_speaker_names = options.metadata.knownSpeakerNames;
|
|
3601
3825
|
}
|
|
3602
3826
|
if (options?.metadata?.knownSpeakerReferences) {
|
|
3603
|
-
|
|
3827
|
+
request.known_speaker_references = options.metadata.knownSpeakerReferences;
|
|
3604
3828
|
}
|
|
3605
3829
|
} else if (needsWords || options?.diarization) {
|
|
3606
|
-
|
|
3830
|
+
request.response_format = "verbose_json";
|
|
3607
3831
|
if (needsWords) {
|
|
3608
|
-
|
|
3832
|
+
request.timestamp_granularities = ["word", "segment"];
|
|
3609
3833
|
}
|
|
3610
3834
|
} else {
|
|
3611
|
-
|
|
3835
|
+
request.response_format = "json";
|
|
3612
3836
|
}
|
|
3613
|
-
const response = await this.
|
|
3614
|
-
headers: {
|
|
3615
|
-
"Content-Type": "multipart/form-data"
|
|
3616
|
-
}
|
|
3617
|
-
});
|
|
3837
|
+
const response = await createTranscription(request, this.getAxiosConfig());
|
|
3618
3838
|
return this.normalizeResponse(response.data, model, isDiarization);
|
|
3619
3839
|
} catch (error) {
|
|
3620
3840
|
return this.createErrorResponse(error);
|
|
@@ -3735,7 +3955,7 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
3735
3955
|
}
|
|
3736
3956
|
|
|
3737
3957
|
// src/adapters/speechmatics-adapter.ts
|
|
3738
|
-
import
|
|
3958
|
+
import axios8 from "axios";
|
|
3739
3959
|
var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
3740
3960
|
constructor() {
|
|
3741
3961
|
super(...arguments);
|
|
@@ -3757,7 +3977,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
3757
3977
|
initialize(config) {
|
|
3758
3978
|
super.initialize(config);
|
|
3759
3979
|
this.baseUrl = config.baseUrl || this.baseUrl;
|
|
3760
|
-
this.client =
|
|
3980
|
+
this.client = axios8.create({
|
|
3761
3981
|
baseURL: this.baseUrl,
|
|
3762
3982
|
timeout: config.timeout || 12e4,
|
|
3763
3983
|
headers: {
|
|
@@ -4892,9 +5112,16 @@ export {
|
|
|
4892
5112
|
GladiaAdapter,
|
|
4893
5113
|
schema_exports as GladiaTypes,
|
|
4894
5114
|
GladiaWebhookHandler,
|
|
5115
|
+
ListenV1EncodingParameter,
|
|
4895
5116
|
OpenAIWhisperAdapter,
|
|
5117
|
+
SpeakV1ContainerParameter,
|
|
5118
|
+
SpeakV1EncodingParameter,
|
|
5119
|
+
SpeakV1SampleRateParameter,
|
|
4896
5120
|
SpeechmaticsAdapter,
|
|
4897
5121
|
SpeechmaticsWebhookHandler,
|
|
5122
|
+
StreamingSupportedBitDepthEnum,
|
|
5123
|
+
StreamingSupportedEncodingEnum,
|
|
5124
|
+
StreamingSupportedSampleRateEnum,
|
|
4898
5125
|
VoiceRouter,
|
|
4899
5126
|
WebhookRouter,
|
|
4900
5127
|
createAssemblyAIAdapter,
|