voice-router-dev 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -39,6 +39,7 @@ __export(src_exports, {
39
39
  AssemblyAILanguage: () => AssemblyAILanguage,
40
40
  AssemblyAILanguageCodes: () => AssemblyAILanguageCodes,
41
41
  AssemblyAIListFilterSchema: () => AssemblyAIListFilterSchema,
42
+ AssemblyAIRegion: () => AssemblyAIRegion,
42
43
  AssemblyAISampleRate: () => AssemblyAISampleRate,
43
44
  AssemblyAISpeechModel: () => AssemblyAISpeechModel,
44
45
  AssemblyAIStatus: () => AssemblyAIStatus,
@@ -89,6 +90,7 @@ __export(src_exports, {
89
90
  ElevenLabsLanguageCodes: () => ElevenLabsLanguageCodes,
90
91
  ElevenLabsLanguageLabels: () => ElevenLabsLanguageLabels,
91
92
  ElevenLabsLanguages: () => ElevenLabsLanguages,
93
+ ElevenLabsRegion: () => ElevenLabsRegion,
92
94
  ElevenLabsTypes: () => schema_exports8,
93
95
  ElevenLabsZodSchemas: () => elevenLabsSpeechToTextAPI_zod_exports,
94
96
  GladiaAdapter: () => GladiaAdapter,
@@ -2820,6 +2822,12 @@ var AssemblyAISampleRate = {
2820
2822
  rate48000: 48e3
2821
2823
  };
2822
2824
  var AssemblyAIStatus = TranscriptStatus;
2825
+ var AssemblyAIRegion = {
2826
+ /** United States (default) */
2827
+ us: "us",
2828
+ /** European Union — data never leaves the EU */
2829
+ eu: "eu"
2830
+ };
2823
2831
  var GladiaStatus = TranscriptionControllerListV2StatusItem;
2824
2832
  var DeepgramStatus = V1ProjectsProjectIdRequestsGetParametersStatus;
2825
2833
  var SpeechmaticsRegion = {
@@ -6795,9 +6803,13 @@ var DeepgramAdapter = class extends BaseAdapter {
6795
6803
  * Submit audio for transcription
6796
6804
  *
6797
6805
  * Sends audio to Deepgram API for transcription. Deepgram normally processes
6798
- * synchronously and returns results immediately. When `webhookUrl` is set,
6799
- * Deepgram can instead return an async callback acknowledgment containing a
6800
- * request ID.
6806
+ * synchronously and returns results immediately.
6807
+ *
6808
+ * **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
6809
+ * with a `request_id` (status `"queued"`). The full transcript is POSTed to
6810
+ * the webhook URL — this is the primary delivery mechanism. `getTranscript()`
6811
+ * can attempt to retrieve the result later via request history, but that
6812
+ * endpoint is best-effort and not a guaranteed durable store.
6801
6813
  *
6802
6814
  * @param audio - Audio input (URL or file buffer)
6803
6815
  * @param options - Transcription options
@@ -6907,30 +6919,22 @@ var DeepgramAdapter = class extends BaseAdapter {
6907
6919
  }
6908
6920
  }
6909
6921
  /**
6910
- * Get transcription result by ID
6922
+ * Get transcription result by ID (best-effort)
6911
6923
  *
6912
- * Retrieves a previous transcription from Deepgram's request history.
6924
+ * Retrieves a previous transcription from Deepgram's request history API.
6925
+ * Requires `projectId` to be set during initialization.
6913
6926
  *
6914
- * Unlike the list endpoint, getting a single request DOES include the full
6915
- * transcript response. Requires `projectId` to be set during initialization.
6927
+ * **Important:** Deepgram's request history is best-effort. Requests may
6928
+ * expire or be unavailable depending on your plan and retention settings.
6929
+ * This is NOT a durable transcript store — for reliable retrieval, use
6930
+ * callback mode (`webhookUrl`) and persist the webhook payload yourself.
6916
6931
  *
6917
- * @param transcriptId - Request ID from a previous transcription
6918
- * @returns Full transcript response including text, words, and metadata
6932
+ * The response field on the request history entry is cast to
6933
+ * `ListenV1Response` this appears to work in practice but is not
6934
+ * explicitly documented by Deepgram as a guaranteed contract.
6919
6935
  *
6920
- * @example Get a transcript by request ID
6921
- * ```typescript
6922
- * const adapter = new DeepgramAdapter()
6923
- * adapter.initialize({
6924
- * apiKey: process.env.DEEPGRAM_API_KEY,
6925
- * projectId: process.env.DEEPGRAM_PROJECT_ID
6926
- * })
6927
- *
6928
- * const result = await adapter.getTranscript('abc123-request-id')
6929
- * if (result.success) {
6930
- * console.log(result.data?.text)
6931
- * console.log(result.data?.words)
6932
- * }
6933
- * ```
6936
+ * @param transcriptId - Request ID from a previous transcription
6937
+ * @returns Transcript response if still available in request history
6934
6938
  *
6935
6939
  * @see https://developers.deepgram.com/reference/get-request
6936
6940
  */
@@ -9013,8 +9017,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
9013
9017
  super(...arguments);
9014
9018
  this.name = "speechmatics";
9015
9019
  this.capabilities = {
9016
- streaming: false,
9017
- // Batch only (streaming available via separate WebSocket API)
9020
+ streaming: true,
9018
9021
  diarization: true,
9019
9022
  wordTimestamps: true,
9020
9023
  languageDetection: false,
@@ -9260,6 +9263,271 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
9260
9263
  throw error;
9261
9264
  }
9262
9265
  }
9266
+ /**
9267
+ * Get the regional WebSocket host for real-time streaming
9268
+ *
9269
+ * Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
9270
+ */
9271
+ getRegionalWsHost(region) {
9272
+ const regionPrefix = region || "eu1";
9273
+ return `${regionPrefix}.rt.speechmatics.com`;
9274
+ }
9275
+ /**
9276
+ * Stream audio for real-time transcription
9277
+ *
9278
+ * Creates a WebSocket connection to the Speechmatics Real-Time API.
9279
+ * Protocol: send StartRecognition config, then AddAudio binary frames,
9280
+ * receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
9281
+ *
9282
+ * @param options - Streaming configuration
9283
+ * @param callbacks - Event callbacks
9284
+ * @returns StreamingSession for sending audio and closing
9285
+ *
9286
+ * @see https://docs.speechmatics.com/rt-api-ref
9287
+ */
9288
+ async transcribeStream(options, callbacks) {
9289
+ this.validateConfig();
9290
+ const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
9291
+ const createdAt = /* @__PURE__ */ new Date();
9292
+ const smOpts = options?.speechmaticsStreaming;
9293
+ const region = smOpts?.region || this.config?.region;
9294
+ const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
9295
+ const wsUrl = `${wsBase}/v2`;
9296
+ let status = "connecting";
9297
+ let recognitionStarted = false;
9298
+ const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
9299
+ const ws = new WebSocketImpl(wsUrl);
9300
+ const language = smOpts?.language || options?.language || "en";
9301
+ const transcriptionConfig = {
9302
+ language,
9303
+ enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
9304
+ enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
9305
+ operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
9306
+ ...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
9307
+ ...smOpts?.maxDelayMode && {
9308
+ max_delay_mode: smOpts.maxDelayMode
9309
+ },
9310
+ ...smOpts?.domain && { domain: smOpts.domain },
9311
+ ...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
9312
+ diarization: TranscriptionConfigDiarization.speaker,
9313
+ ...smOpts?.maxSpeakers !== void 0 && {
9314
+ speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
9315
+ }
9316
+ },
9317
+ ...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
9318
+ additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
9319
+ (term) => ({ content: term })
9320
+ )
9321
+ }
9322
+ };
9323
+ const startRecognition = {
9324
+ message: "StartRecognition",
9325
+ audio_format: {
9326
+ type: "raw",
9327
+ encoding: smOpts?.encoding || "pcm_s16le",
9328
+ sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
9329
+ },
9330
+ transcription_config: transcriptionConfig,
9331
+ ...smOpts?.conversationConfig && {
9332
+ conversation_config: {
9333
+ end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
9334
+ }
9335
+ }
9336
+ };
9337
+ ws.onopen = () => {
9338
+ status = "open";
9339
+ const msg = JSON.stringify(startRecognition);
9340
+ if (callbacks?.onRawMessage) {
9341
+ callbacks.onRawMessage({
9342
+ provider: this.name,
9343
+ direction: "outgoing",
9344
+ timestamp: Date.now(),
9345
+ payload: msg,
9346
+ messageType: "StartRecognition"
9347
+ });
9348
+ }
9349
+ ws.send(msg);
9350
+ };
9351
+ ws.onmessage = (event) => {
9352
+ const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
9353
+ try {
9354
+ const data = JSON.parse(rawPayload);
9355
+ const messageType = data.message;
9356
+ if (callbacks?.onRawMessage) {
9357
+ callbacks.onRawMessage({
9358
+ provider: this.name,
9359
+ direction: "incoming",
9360
+ timestamp: Date.now(),
9361
+ payload: rawPayload,
9362
+ messageType
9363
+ });
9364
+ }
9365
+ switch (messageType) {
9366
+ case "RecognitionStarted": {
9367
+ recognitionStarted = true;
9368
+ callbacks?.onOpen?.();
9369
+ callbacks?.onMetadata?.({
9370
+ id: data.id,
9371
+ languagePackInfo: data.language_pack_info
9372
+ });
9373
+ break;
9374
+ }
9375
+ case "AddPartialTranscript": {
9376
+ const partial = data;
9377
+ const words = this.resultsToWords(partial.results);
9378
+ callbacks?.onTranscript?.({
9379
+ type: "transcript",
9380
+ text: partial.metadata.transcript,
9381
+ isFinal: false,
9382
+ words,
9383
+ speaker: words[0]?.speaker,
9384
+ confidence: partial.results[0]?.alternatives?.[0]?.confidence,
9385
+ channel: partial.channel ? parseInt(partial.channel) : void 0
9386
+ });
9387
+ break;
9388
+ }
9389
+ case "AddTranscript": {
9390
+ const final = data;
9391
+ const words = this.resultsToWords(final.results);
9392
+ callbacks?.onTranscript?.({
9393
+ type: "transcript",
9394
+ text: final.metadata.transcript,
9395
+ isFinal: true,
9396
+ words,
9397
+ speaker: words[0]?.speaker,
9398
+ confidence: final.results[0]?.alternatives?.[0]?.confidence,
9399
+ channel: final.channel ? parseInt(final.channel) : void 0
9400
+ });
9401
+ if (options?.diarization || smOpts?.diarization === "speaker") {
9402
+ const utterances = buildUtterancesFromWords(words);
9403
+ for (const utterance of utterances) {
9404
+ callbacks?.onUtterance?.(utterance);
9405
+ }
9406
+ }
9407
+ break;
9408
+ }
9409
+ case "EndOfUtterance": {
9410
+ break;
9411
+ }
9412
+ case "EndOfTranscript": {
9413
+ callbacks?.onClose?.(1e3, "Transcription complete");
9414
+ break;
9415
+ }
9416
+ case "Error": {
9417
+ const err = data;
9418
+ callbacks?.onError?.({
9419
+ code: err.type || "SPEECHMATICS_ERROR",
9420
+ message: err.reason || "Unknown error"
9421
+ });
9422
+ break;
9423
+ }
9424
+ case "Warning": {
9425
+ const warn = data;
9426
+ callbacks?.onMetadata?.({
9427
+ warning: warn.type,
9428
+ reason: warn.reason
9429
+ });
9430
+ break;
9431
+ }
9432
+ case "Info": {
9433
+ callbacks?.onMetadata?.(data);
9434
+ break;
9435
+ }
9436
+ case "AudioAdded":
9437
+ case "ChannelAudioAdded":
9438
+ break;
9439
+ default:
9440
+ callbacks?.onMetadata?.(data);
9441
+ break;
9442
+ }
9443
+ } catch (error) {
9444
+ callbacks?.onError?.({
9445
+ code: "PARSE_ERROR",
9446
+ message: `Failed to parse message: ${error}`
9447
+ });
9448
+ }
9449
+ };
9450
+ ws.onerror = () => {
9451
+ callbacks?.onError?.({
9452
+ code: "WEBSOCKET_ERROR",
9453
+ message: "WebSocket error occurred"
9454
+ });
9455
+ };
9456
+ ws.onclose = (event) => {
9457
+ status = "closed";
9458
+ callbacks?.onClose?.(event.code, event.reason);
9459
+ };
9460
+ await new Promise((resolve, reject) => {
9461
+ const timeout = setTimeout(() => {
9462
+ reject(new Error("WebSocket connection timeout"));
9463
+ }, 1e4);
9464
+ const checkReady = () => {
9465
+ if (recognitionStarted) {
9466
+ clearTimeout(timeout);
9467
+ resolve();
9468
+ } else if (status === "closed") {
9469
+ clearTimeout(timeout);
9470
+ reject(new Error("WebSocket connection failed"));
9471
+ } else {
9472
+ setTimeout(checkReady, 100);
9473
+ }
9474
+ };
9475
+ checkReady();
9476
+ });
9477
+ return {
9478
+ id: sessionId,
9479
+ provider: this.name,
9480
+ createdAt,
9481
+ getStatus: () => status,
9482
+ sendAudio: async (chunk) => {
9483
+ if (status !== "open") {
9484
+ throw new Error("Session is not open");
9485
+ }
9486
+ if (callbacks?.onRawMessage) {
9487
+ const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
9488
+ chunk.data.byteOffset,
9489
+ chunk.data.byteOffset + chunk.data.byteLength
9490
+ );
9491
+ callbacks.onRawMessage({
9492
+ provider: this.name,
9493
+ direction: "outgoing",
9494
+ timestamp: Date.now(),
9495
+ payload: audioPayload,
9496
+ messageType: "audio"
9497
+ });
9498
+ }
9499
+ ws.send(chunk.data);
9500
+ },
9501
+ close: async () => {
9502
+ if (status === "open") {
9503
+ status = "closing";
9504
+ const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
9505
+ if (callbacks?.onRawMessage) {
9506
+ callbacks.onRawMessage({
9507
+ provider: this.name,
9508
+ direction: "outgoing",
9509
+ timestamp: Date.now(),
9510
+ payload: endMsg,
9511
+ messageType: "EndOfStream"
9512
+ });
9513
+ }
9514
+ ws.send(endMsg);
9515
+ }
9516
+ }
9517
+ };
9518
+ }
9519
+ /**
9520
+ * Convert Speechmatics RecognitionResult[] to unified Word[]
9521
+ */
9522
+ resultsToWords(results) {
9523
+ return results.filter((r) => r.type === "word").map((r) => ({
9524
+ word: r.alternatives?.[0]?.content || "",
9525
+ start: r.start_time,
9526
+ end: r.end_time,
9527
+ confidence: r.alternatives?.[0]?.confidence,
9528
+ speaker: r.alternatives?.[0]?.speaker
9529
+ }));
9530
+ }
9263
9531
  /**
9264
9532
  * Normalize Speechmatics status to unified status
9265
9533
  * Uses generated JobDetailsStatus enum values
@@ -9679,7 +9947,7 @@ var SonioxAdapter = class extends BaseAdapter {
9679
9947
  let messageType;
9680
9948
  try {
9681
9949
  const data = JSON.parse(rawPayload);
9682
- const errorMessage = data.error_message || data.error;
9950
+ const errorMessage = data.error_message;
9683
9951
  if (errorMessage) {
9684
9952
  messageType = "error";
9685
9953
  } else if (data.finished) {
@@ -10038,7 +10306,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10038
10306
  /**
10039
10307
  * Submit audio for transcription
10040
10308
  *
10041
- * ElevenLabs batch is synchronous - the API returns the result directly.
10309
+ * ElevenLabs batch is normally synchronous the API returns results directly.
10310
+ *
10311
+ * **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
10312
+ * the request is processed asynchronously. ElevenLabs returns a 202 with a
10313
+ * `request_id` and delivers results to a webhook configured in the ElevenLabs
10314
+ * dashboard. The unified `webhookUrl` acts as an intent flag to enable async
10315
+ * mode — the actual delivery destination must be pre-configured in your
10316
+ * ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
10317
+ * webhook endpoint.
10042
10318
  */
10043
10319
  async transcribe(audio, options) {
10044
10320
  this.validateConfig();
@@ -10061,6 +10337,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10061
10337
  }
10062
10338
  };
10063
10339
  }
10340
+ const elevenlabsOpts = options?.elevenlabs;
10341
+ const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
10342
+ if (useWebhook) {
10343
+ formData.append("webhook", "true");
10344
+ }
10064
10345
  if (options?.language) {
10065
10346
  formData.append("language_code", options.language);
10066
10347
  }
@@ -10079,7 +10360,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10079
10360
  if (options?.entityDetection) {
10080
10361
  formData.append("entity_detection", "all");
10081
10362
  }
10082
- const elevenlabsOpts = options?.elevenlabs;
10083
10363
  if (elevenlabsOpts) {
10084
10364
  for (const [key, value] of Object.entries(elevenlabsOpts)) {
10085
10365
  if (value === void 0 || value === null) continue;
@@ -10102,6 +10382,22 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10102
10382
  "Content-Type": "multipart/form-data"
10103
10383
  }
10104
10384
  });
10385
+ if (useWebhook) {
10386
+ const ack = response.data;
10387
+ return {
10388
+ success: true,
10389
+ provider: this.name,
10390
+ data: {
10391
+ id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
10392
+ text: "",
10393
+ status: "queued"
10394
+ },
10395
+ tracking: {
10396
+ requestId: ack.request_id
10397
+ },
10398
+ raw: response.data
10399
+ };
10400
+ }
10105
10401
  return this.normalizeResponse(response.data);
10106
10402
  } catch (error) {
10107
10403
  return this.createErrorResponse(error);
@@ -10194,20 +10490,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10194
10490
  ws.onmessage = (event) => {
10195
10491
  receivedData = true;
10196
10492
  const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
10197
- let messageType;
10198
10493
  try {
10199
10494
  const data = JSON.parse(rawPayload);
10200
- if (data.error) {
10201
- messageType = "error";
10202
- } else if (data.message_type === "session_started") {
10203
- messageType = "session_started";
10204
- } else if (data.message_type === "partial_transcript") {
10205
- messageType = "partial_transcript";
10206
- } else if (data.message_type === "committed_transcript") {
10207
- messageType = "committed_transcript";
10208
- } else if (data.message_type === "committed_transcript_with_timestamps") {
10209
- messageType = "committed_transcript_with_timestamps";
10210
- }
10495
+ const messageType = "error" in data ? "error" : data.message_type;
10211
10496
  if (callbacks?.onRawMessage) {
10212
10497
  callbacks.onRawMessage({
10213
10498
  provider: this.name,
@@ -10217,50 +10502,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10217
10502
  messageType
10218
10503
  });
10219
10504
  }
10220
- if (data.error) {
10505
+ if ("error" in data) {
10221
10506
  callbacks?.onError?.({
10222
- code: data.error_code?.toString() || "STREAM_ERROR",
10507
+ code: data.message_type || "STREAM_ERROR",
10223
10508
  message: data.error
10224
10509
  });
10225
10510
  return;
10226
10511
  }
10227
- if (data.message_type === "session_started") {
10228
- return;
10229
- }
10230
- if (data.message_type === "partial_transcript") {
10231
- const streamEvent = {
10232
- type: "transcript",
10233
- text: data.text || "",
10234
- isFinal: false,
10235
- confidence: void 0,
10236
- language: data.language_code
10237
- };
10238
- callbacks?.onTranscript?.(streamEvent);
10239
- return;
10240
- }
10241
- if (data.message_type === "committed_transcript" || data.message_type === "committed_transcript_with_timestamps") {
10242
- const words = data.words ? data.words.map((w) => ({
10243
- word: w.text || "",
10244
- start: w.start || 0,
10245
- end: w.end || 0,
10246
- confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
10247
- speaker: w.speaker_id
10248
- })) : [];
10249
- const streamEvent = {
10250
- type: "transcript",
10251
- text: data.text || "",
10252
- isFinal: true,
10253
- words: words.length > 0 ? words : void 0,
10254
- speaker: words[0]?.speaker,
10255
- language: data.language_code,
10256
- confidence: void 0
10257
- };
10258
- callbacks?.onTranscript?.(streamEvent);
10259
- if (options?.diarization && words.length > 0) {
10260
- const utterances = buildUtterancesFromWords(words);
10261
- for (const utterance of utterances) {
10262
- callbacks?.onUtterance?.(utterance);
10512
+ switch (data.message_type) {
10513
+ case "session_started":
10514
+ break;
10515
+ case "partial_transcript": {
10516
+ const streamEvent = {
10517
+ type: "transcript",
10518
+ text: data.text || "",
10519
+ isFinal: false,
10520
+ confidence: void 0
10521
+ };
10522
+ callbacks?.onTranscript?.(streamEvent);
10523
+ break;
10524
+ }
10525
+ case "committed_transcript": {
10526
+ const streamEvent = {
10527
+ type: "transcript",
10528
+ text: data.text || "",
10529
+ isFinal: true,
10530
+ confidence: void 0
10531
+ };
10532
+ callbacks?.onTranscript?.(streamEvent);
10533
+ break;
10534
+ }
10535
+ case "committed_transcript_with_timestamps": {
10536
+ const tsData = data;
10537
+ const words = tsData.words ? tsData.words.map((w) => ({
10538
+ word: w.text || "",
10539
+ start: w.start || 0,
10540
+ end: w.end || 0,
10541
+ confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
10542
+ speaker: w.speaker_id
10543
+ })) : [];
10544
+ const streamEvent = {
10545
+ type: "transcript",
10546
+ text: tsData.text || "",
10547
+ isFinal: true,
10548
+ words: words.length > 0 ? words : void 0,
10549
+ speaker: words[0]?.speaker,
10550
+ language: tsData.language_code,
10551
+ confidence: void 0
10552
+ };
10553
+ callbacks?.onTranscript?.(streamEvent);
10554
+ if (options?.diarization && words.length > 0) {
10555
+ const utterances = buildUtterancesFromWords(words);
10556
+ for (const utterance of utterances) {
10557
+ callbacks?.onUtterance?.(utterance);
10558
+ }
10263
10559
  }
10560
+ break;
10264
10561
  }
10265
10562
  }
10266
10563
  } catch (error) {
@@ -39463,6 +39760,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
39463
39760
  AssemblyAILanguage,
39464
39761
  AssemblyAILanguageCodes,
39465
39762
  AssemblyAIListFilterSchema,
39763
+ AssemblyAIRegion,
39466
39764
  AssemblyAISampleRate,
39467
39765
  AssemblyAISpeechModel,
39468
39766
  AssemblyAIStatus,
@@ -39513,6 +39811,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
39513
39811
  ElevenLabsLanguageCodes,
39514
39812
  ElevenLabsLanguageLabels,
39515
39813
  ElevenLabsLanguages,
39814
+ ElevenLabsRegion,
39516
39815
  ElevenLabsTypes,
39517
39816
  ElevenLabsZodSchemas,
39518
39817
  GladiaAdapter,