voice-router-dev 0.8.9 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -754,60 +754,60 @@ var SonioxLanguage = {
754
754
  // src/generated/soniox/models.ts
755
755
  var SonioxModels = [
756
756
  { id: "stt-rt-v4", name: "Speech-to-Text Real-time v4", mode: "real_time" },
757
- { id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time" },
758
757
  { id: "stt-async-v4", name: "Speech-to-Text Async v4", mode: "async" },
759
- { id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async" },
760
- { id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v3" },
761
- { id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v3" },
762
- { id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v3" },
763
- { id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v3" },
764
- { id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v3" }
758
+ { id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
759
+ { id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v4" },
760
+ { id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
761
+ { id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v4" },
762
+ { id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v4" },
763
+ { id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time", aliasOf: "stt-rt-v4" },
764
+ { id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async", aliasOf: "stt-async-v4" }
765
765
  ];
766
766
  var SonioxModelCodes = [
767
767
  "stt-rt-v4",
768
- "stt-rt-v3",
769
768
  "stt-async-v4",
770
- "stt-async-v3",
771
769
  "stt-rt-preview",
772
770
  "stt-async-preview",
773
771
  "stt-rt-v3-preview",
774
772
  "stt-rt-preview-v2",
775
- "stt-async-preview-v1"
773
+ "stt-async-preview-v1",
774
+ "stt-rt-v3",
775
+ "stt-async-v3"
776
776
  ];
777
777
  var SonioxModelLabels = {
778
778
  "stt-rt-v4": "Speech-to-Text Real-time v4",
779
- "stt-rt-v3": "Speech-to-Text Real-time v3",
780
779
  "stt-async-v4": "Speech-to-Text Async v4",
781
- "stt-async-v3": "Speech-to-Text Async v3",
782
780
  "stt-rt-preview": "Speech-to-Text Real-time Preview",
783
781
  "stt-async-preview": "Speech-to-Text Async Preview",
784
782
  "stt-rt-v3-preview": "Speech-to-Text Real-time v3 Preview",
785
783
  "stt-rt-preview-v2": "Speech-to-Text Real-time Preview v2",
786
- "stt-async-preview-v1": "Speech-to-Text Async Preview v1"
784
+ "stt-async-preview-v1": "Speech-to-Text Async Preview v1",
785
+ "stt-rt-v3": "Speech-to-Text Real-time v3",
786
+ "stt-async-v3": "Speech-to-Text Async v3"
787
787
  };
788
788
  var SonioxModel = {
789
789
  stt_rt_v4: "stt-rt-v4",
790
- stt_rt_v3: "stt-rt-v3",
791
790
  stt_async_v4: "stt-async-v4",
792
- stt_async_v3: "stt-async-v3",
793
791
  stt_rt_preview: "stt-rt-preview",
794
792
  stt_async_preview: "stt-async-preview",
795
793
  stt_rt_v3_preview: "stt-rt-v3-preview",
796
794
  stt_rt_preview_v2: "stt-rt-preview-v2",
797
- stt_async_preview_v1: "stt-async-preview-v1"
795
+ stt_async_preview_v1: "stt-async-preview-v1",
796
+ stt_rt_v3: "stt-rt-v3",
797
+ stt_async_v3: "stt-async-v3"
798
798
  };
799
799
  var SonioxRealtimeModel = {
800
800
  stt_rt_v4: "stt-rt-v4",
801
- stt_rt_v3: "stt-rt-v3",
802
801
  stt_rt_preview: "stt-rt-preview",
803
802
  stt_rt_v3_preview: "stt-rt-v3-preview",
804
- stt_rt_preview_v2: "stt-rt-preview-v2"
803
+ stt_rt_preview_v2: "stt-rt-preview-v2",
804
+ stt_rt_v3: "stt-rt-v3"
805
805
  };
806
806
  var SonioxAsyncModel = {
807
807
  stt_async_v4: "stt-async-v4",
808
- stt_async_v3: "stt-async-v3",
809
808
  stt_async_preview: "stt-async-preview",
810
- stt_async_preview_v1: "stt-async-preview-v1"
809
+ stt_async_preview_v1: "stt-async-preview-v1",
810
+ stt_async_v3: "stt-async-v3"
811
811
  };
812
812
 
813
813
  // src/generated/speechmatics/languages.ts
@@ -3782,17 +3782,17 @@ var SummaryTypesEnum = {
3782
3782
  concise: "concise"
3783
3783
  };
3784
3784
 
3785
- // src/generated/gladia/schema/transcriptMessageType.ts
3786
- var TranscriptMessageType = {
3787
- transcript: "transcript"
3788
- };
3789
-
3790
3785
  // src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
3791
3786
  var TranscriptionControllerListV2KindItem = {
3792
3787
  "pre-recorded": "pre-recorded",
3793
3788
  live: "live"
3794
3789
  };
3795
3790
 
3791
+ // src/generated/gladia/schema/transcriptMessageType.ts
3792
+ var TranscriptMessageType = {
3793
+ transcript: "transcript"
3794
+ };
3795
+
3796
3796
  // src/generated/gladia/schema/translationMessageType.ts
3797
3797
  var TranslationMessageType = {
3798
3798
  translation: "translation"
@@ -4064,7 +4064,7 @@ var WebhookTranscriptionSuccessPayloadEvent = {
4064
4064
 
4065
4065
  // src/generated/gladia/api/gladiaControlAPI.ts
4066
4066
  var preRecordedControllerInitPreRecordedJobV2 = (initTranscriptionRequest, options) => {
4067
- return axios.post(`/v2/pre-recorded`, initTranscriptionRequest, options);
4067
+ return axios.post("/v2/pre-recorded", initTranscriptionRequest, options);
4068
4068
  };
4069
4069
  var preRecordedControllerGetPreRecordedJobV2 = (id, options) => {
4070
4070
  return axios.get(`/v2/pre-recorded/${id}`, options);
@@ -4079,13 +4079,13 @@ var preRecordedControllerGetAudioV2 = (id, options) => {
4079
4079
  });
4080
4080
  };
4081
4081
  var transcriptionControllerListV2 = (params, options) => {
4082
- return axios.get(`/v2/transcription`, {
4082
+ return axios.get("/v2/transcription", {
4083
4083
  ...options,
4084
4084
  params: { ...params, ...options?.params }
4085
4085
  });
4086
4086
  };
4087
4087
  var streamingControllerInitStreamingSessionV2 = (streamingRequest, params, options) => {
4088
- return axios.post(`/v2/live`, streamingRequest, {
4088
+ return axios.post("/v2/live", streamingRequest, {
4089
4089
  ...options,
4090
4090
  params: { ...params, ...options?.params }
4091
4091
  });
@@ -4351,7 +4351,6 @@ var GladiaAdapter = class extends BaseAdapter {
4351
4351
  sentiment: result?.sentiment_analysis || void 0,
4352
4352
  audioToLlm: result?.audio_to_llm || void 0,
4353
4353
  chapters: result?.chapterization || void 0,
4354
- speakerReidentification: result?.speaker_reidentification || void 0,
4355
4354
  structuredData: result?.structured_data_extraction || void 0,
4356
4355
  customMetadata: response.custom_metadata || void 0
4357
4356
  },
@@ -5413,17 +5412,17 @@ var PiiPolicy = {
5413
5412
  zodiac_sign: "zodiac_sign"
5414
5413
  };
5415
5414
 
5415
+ // src/generated/assemblyai/schema/redactedAudioStatus.ts
5416
+ var RedactedAudioStatus = {
5417
+ redacted_audio_ready: "redacted_audio_ready"
5418
+ };
5419
+
5416
5420
  // src/generated/assemblyai/schema/redactPiiAudioQuality.ts
5417
5421
  var RedactPiiAudioQuality = {
5418
5422
  mp3: "mp3",
5419
5423
  wav: "wav"
5420
5424
  };
5421
5425
 
5422
- // src/generated/assemblyai/schema/redactedAudioStatus.ts
5423
- var RedactedAudioStatus = {
5424
- redacted_audio_ready: "redacted_audio_ready"
5425
- };
5426
-
5427
5426
  // src/generated/assemblyai/schema/sentiment.ts
5428
5427
  var Sentiment = {
5429
5428
  POSITIVE: "POSITIVE",
@@ -5487,10 +5486,10 @@ var TranscriptRemoveAudioTags = {
5487
5486
 
5488
5487
  // src/generated/assemblyai/api/assemblyAIAPI.ts
5489
5488
  var createTranscript = (transcriptParams, options) => {
5490
- return axios2.post(`/v2/transcript`, transcriptParams, options);
5489
+ return axios2.post("/v2/transcript", transcriptParams, options);
5491
5490
  };
5492
5491
  var listTranscripts = (params, options) => {
5493
- return axios2.get(`/v2/transcript`, {
5492
+ return axios2.get("/v2/transcript", {
5494
5493
  ...options,
5495
5494
  params: { ...params, ...options?.params }
5496
5495
  });
@@ -5836,23 +5835,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
5836
5835
  "AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
5837
5836
  );
5838
5837
  }
5839
- const aaiOpts = { ...options?.assemblyai };
5840
- if ("speech_model" in aaiOpts && aaiOpts.speech_model != null) {
5841
- if (!aaiOpts.speech_models) {
5842
- aaiOpts.speech_models = [aaiOpts.speech_model];
5843
- }
5844
- delete aaiOpts.speech_model;
5838
+ const passthrough = options?.assemblyai;
5839
+ let speechModels;
5840
+ if (passthrough?.speech_model != null && !passthrough.speech_models) {
5841
+ speechModels = [passthrough.speech_model];
5842
+ } else if (passthrough?.speech_models) {
5843
+ speechModels = passthrough.speech_models;
5845
5844
  }
5845
+ const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
5846
5846
  const request = {
5847
- ...aaiOpts,
5847
+ ...typedOpts,
5848
5848
  audio_url: audioUrl,
5849
5849
  // speech_models is required — default to universal-3-pro
5850
- speech_models: aaiOpts.speech_models ?? [
5851
- "universal-3-pro"
5852
- ],
5850
+ speech_models: speechModels ?? ["universal-3-pro"],
5853
5851
  // Enable punctuation and formatting by default
5854
- punctuate: aaiOpts.punctuate ?? true,
5855
- format_text: aaiOpts.format_text ?? true
5852
+ punctuate: typedOpts.punctuate ?? true,
5853
+ format_text: typedOpts.format_text ?? true
5856
5854
  };
5857
5855
  if (options) {
5858
5856
  if (options.model) {
@@ -6567,8 +6565,10 @@ var DeepgramAdapter = class extends BaseAdapter {
6567
6565
  /**
6568
6566
  * Submit audio for transcription
6569
6567
  *
6570
- * Sends audio to Deepgram API for transcription. Deepgram processes
6571
- * synchronously and returns results immediately (no polling required).
6568
+ * Sends audio to Deepgram API for transcription. Deepgram normally processes
6569
+ * synchronously and returns results immediately. When `webhookUrl` is set,
6570
+ * Deepgram can instead return an async callback acknowledgment containing a
6571
+ * request ID.
6572
6572
  *
6573
6573
  * @param audio - Audio input (URL or file buffer)
6574
6574
  * @param options - Transcription options
@@ -6619,17 +6619,59 @@ var DeepgramAdapter = class extends BaseAdapter {
6619
6619
  { params }
6620
6620
  ).then((res) => res.data);
6621
6621
  } else if (audio.type === "file") {
6622
- response = await this.client.post("/listen", audio.file, {
6623
- params,
6624
- headers: {
6625
- "Content-Type": "audio/*"
6622
+ response = await this.client.post(
6623
+ "/listen",
6624
+ audio.file,
6625
+ {
6626
+ params,
6627
+ headers: {
6628
+ "Content-Type": "audio/*"
6629
+ }
6626
6630
  }
6627
- }).then((res) => res.data);
6631
+ ).then((res) => res.data);
6628
6632
  } else {
6629
6633
  throw new Error(
6630
6634
  "Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
6631
6635
  );
6632
6636
  }
6637
+ if (options?.webhookUrl) {
6638
+ const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
6639
+ if (!requestId) {
6640
+ return {
6641
+ success: false,
6642
+ provider: this.name,
6643
+ error: {
6644
+ code: "MISSING_REQUEST_ID",
6645
+ message: "Deepgram callback mode did not return a request ID"
6646
+ },
6647
+ raw: response
6648
+ };
6649
+ }
6650
+ return {
6651
+ success: true,
6652
+ provider: this.name,
6653
+ data: {
6654
+ id: requestId,
6655
+ text: "",
6656
+ status: "queued"
6657
+ },
6658
+ tracking: {
6659
+ requestId
6660
+ },
6661
+ raw: response
6662
+ };
6663
+ }
6664
+ if (!("results" in response) || !("metadata" in response)) {
6665
+ return {
6666
+ success: false,
6667
+ provider: this.name,
6668
+ error: {
6669
+ code: "INVALID_RESPONSE",
6670
+ message: "Deepgram did not return a synchronous transcription payload"
6671
+ },
6672
+ raw: response
6673
+ };
6674
+ }
6633
6675
  return this.normalizeResponse(response);
6634
6676
  } catch (error) {
6635
6677
  return this.createErrorResponse(error);
@@ -7290,7 +7332,8 @@ var DeepgramAdapter = class extends BaseAdapter {
7290
7332
  break;
7291
7333
  }
7292
7334
  case "Metadata": {
7293
- callbacks?.onMetadata?.(message);
7335
+ const { type: _, ...metadata } = message;
7336
+ callbacks?.onMetadata?.(metadata);
7294
7337
  break;
7295
7338
  }
7296
7339
  case "Error": {
@@ -7627,13 +7670,13 @@ var TextNormalizationKind = {
7627
7670
 
7628
7671
  // src/generated/azure/api/speechServicesAPIVersion32.ts
7629
7672
  var transcriptionsList = (params, options) => {
7630
- return axios4.get(`/transcriptions`, {
7673
+ return axios4.get("/transcriptions", {
7631
7674
  ...options,
7632
7675
  params: { ...params, ...options?.params }
7633
7676
  });
7634
7677
  };
7635
7678
  var transcriptionsCreate = (transcription, options) => {
7636
- return axios4.post(`/transcriptions`, transcription, options);
7679
+ return axios4.post("/transcriptions", transcription, options);
7637
7680
  };
7638
7681
  var transcriptionsGet = (id, options) => {
7639
7682
  return axios4.get(`/transcriptions/${id}`, options);
@@ -7648,13 +7691,13 @@ var transcriptionsListFiles = (id, params, options) => {
7648
7691
  });
7649
7692
  };
7650
7693
  var webHooksList = (params, options) => {
7651
- return axios4.get(`/webhooks`, {
7694
+ return axios4.get("/webhooks", {
7652
7695
  ...options,
7653
7696
  params: { ...params, ...options?.params }
7654
7697
  });
7655
7698
  };
7656
7699
  var webHooksCreate = (webHook, options) => {
7657
- return axios4.post(`/webhooks`, webHook, options);
7700
+ return axios4.post("/webhooks", webHook, options);
7658
7701
  };
7659
7702
  var webHooksDelete = (id, options) => {
7660
7703
  return axios4.delete(`/webhooks/${id}`, options);
@@ -7726,10 +7769,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
7726
7769
  contentUrls: [audio.url],
7727
7770
  properties: this.buildTranscriptionProperties(options)
7728
7771
  };
7729
- const response = await transcriptionsCreate(
7730
- transcriptionRequest,
7731
- this.getAxiosConfig()
7732
- );
7772
+ const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
7733
7773
  const transcription = response.data;
7734
7774
  const transcriptId = transcription.self?.split("/").pop() || "";
7735
7775
  return await this.pollForCompletion(transcriptId);
@@ -7782,7 +7822,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
7782
7822
  this.getAxiosConfig()
7783
7823
  );
7784
7824
  const files = filesResponse.data?.values || [];
7785
- const resultFile = files.find((file) => file.kind === "Transcription");
7825
+ const resultFile = files.find((file) => file.kind === FileKind.Transcription);
7786
7826
  if (!resultFile?.links?.contentUrl) {
7787
7827
  return {
7788
7828
  success: false,
@@ -8006,15 +8046,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
8006
8046
  return properties;
8007
8047
  }
8008
8048
  /**
8009
- * Normalize Azure status to unified status
8049
+ * Normalize Azure status to unified status using generated AzureStatus constants
8010
8050
  */
8011
8051
  normalizeStatus(status) {
8012
- const statusStr = status?.toString().toLowerCase() || "";
8013
- if (statusStr.includes("succeeded")) return "completed";
8014
- if (statusStr.includes("running")) return "processing";
8015
- if (statusStr.includes("notstarted")) return "queued";
8016
- if (statusStr.includes("failed")) return "error";
8017
- return "queued";
8052
+ switch (status) {
8053
+ case Status.Succeeded:
8054
+ return "completed";
8055
+ case Status.Running:
8056
+ return "processing";
8057
+ case Status.Failed:
8058
+ return "error";
8059
+ case Status.NotStarted:
8060
+ default:
8061
+ return "queued";
8062
+ }
8018
8063
  }
8019
8064
  /**
8020
8065
  * Normalize Azure transcription response to unified format
@@ -8134,30 +8179,30 @@ function getAzureOpenAIRealtimeUrl(endpoint, deployment, apiVersion = "2024-10-0
8134
8179
  import axios6 from "axios";
8135
8180
  var createTranscription = (createTranscriptionRequest, options) => {
8136
8181
  const formData = new FormData();
8137
- formData.append(`file`, createTranscriptionRequest.file);
8138
- formData.append(`model`, createTranscriptionRequest.model);
8182
+ formData.append("file", createTranscriptionRequest.file);
8183
+ formData.append("model", createTranscriptionRequest.model);
8139
8184
  if (createTranscriptionRequest.language !== void 0) {
8140
- formData.append(`language`, createTranscriptionRequest.language);
8185
+ formData.append("language", createTranscriptionRequest.language);
8141
8186
  }
8142
8187
  if (createTranscriptionRequest.prompt !== void 0) {
8143
- formData.append(`prompt`, createTranscriptionRequest.prompt);
8188
+ formData.append("prompt", createTranscriptionRequest.prompt);
8144
8189
  }
8145
8190
  if (createTranscriptionRequest.response_format !== void 0) {
8146
- formData.append(`response_format`, createTranscriptionRequest.response_format);
8191
+ formData.append("response_format", createTranscriptionRequest.response_format);
8147
8192
  }
8148
8193
  if (createTranscriptionRequest.temperature !== void 0) {
8149
- formData.append(`temperature`, createTranscriptionRequest.temperature.toString());
8194
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
8150
8195
  }
8151
8196
  if (createTranscriptionRequest.include !== void 0) {
8152
- createTranscriptionRequest.include.forEach((value) => formData.append(`include`, value));
8197
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
8153
8198
  }
8154
8199
  if (createTranscriptionRequest.timestamp_granularities !== void 0) {
8155
8200
  createTranscriptionRequest.timestamp_granularities.forEach(
8156
- (value) => formData.append(`timestamp_granularities`, value)
8201
+ (value) => formData.append("timestamp_granularities", value)
8157
8202
  );
8158
8203
  }
8159
8204
  if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
8160
- formData.append(`stream`, createTranscriptionRequest.stream.toString());
8205
+ formData.append("stream", createTranscriptionRequest.stream.toString());
8161
8206
  }
8162
8207
  if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
8163
8208
  formData.append(
@@ -8167,15 +8212,15 @@ var createTranscription = (createTranscriptionRequest, options) => {
8167
8212
  }
8168
8213
  if (createTranscriptionRequest.known_speaker_names !== void 0) {
8169
8214
  createTranscriptionRequest.known_speaker_names.forEach(
8170
- (value) => formData.append(`known_speaker_names`, value)
8215
+ (value) => formData.append("known_speaker_names", value)
8171
8216
  );
8172
8217
  }
8173
8218
  if (createTranscriptionRequest.known_speaker_references !== void 0) {
8174
8219
  createTranscriptionRequest.known_speaker_references.forEach(
8175
- (value) => formData.append(`known_speaker_references`, value)
8220
+ (value) => formData.append("known_speaker_references", value)
8176
8221
  );
8177
8222
  }
8178
- return axios6.post(`/audio/transcriptions`, formData, options);
8223
+ return axios6.post("/audio/transcriptions", formData, options);
8179
8224
  };
8180
8225
 
8181
8226
  // src/generated/openai/schema/createTranscriptionRequestTimestampGranularitiesItem.ts
@@ -8264,7 +8309,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
8264
8309
  const request = {
8265
8310
  ...options?.openai,
8266
8311
  file: audioData,
8267
- // Generated type expects Blob
8268
8312
  model
8269
8313
  };
8270
8314
  if (options?.language) {
@@ -8691,7 +8735,6 @@ function createOpenAIWhisperAdapter(config) {
8691
8735
 
8692
8736
  // src/adapters/speechmatics-adapter.ts
8693
8737
  import axios8 from "axios";
8694
- import WebSocket6 from "ws";
8695
8738
 
8696
8739
  // src/generated/speechmatics/schema/notificationConfigContentsItem.ts
8697
8740
  var NotificationConfigContentsItem = {
@@ -8741,7 +8784,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
8741
8784
  super(...arguments);
8742
8785
  this.name = "speechmatics";
8743
8786
  this.capabilities = {
8744
- streaming: true,
8787
+ streaming: false,
8788
+ // Batch only (streaming available via separate WebSocket API)
8745
8789
  diarization: true,
8746
8790
  wordTimestamps: true,
8747
8791
  languageDetection: false,
@@ -8876,16 +8920,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
8876
8920
  jobConfig.fetch_data = {
8877
8921
  url: audio.url
8878
8922
  };
8879
- const formData = new FormData();
8880
- formData.append("config", JSON.stringify(jobConfig));
8881
- requestBody = formData;
8882
- headers = { "Content-Type": "multipart/form-data" };
8923
+ requestBody = { config: JSON.stringify(jobConfig) };
8924
+ headers = { "Content-Type": "application/json" };
8883
8925
  } else if (audio.type === "file") {
8884
- const formData = new FormData();
8885
- formData.append("config", JSON.stringify(jobConfig));
8886
- const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
8887
- formData.append("data_file", audioBlob, audio.filename || "audio.wav");
8888
- requestBody = formData;
8926
+ requestBody = {
8927
+ config: JSON.stringify(jobConfig),
8928
+ data_file: audio.file
8929
+ };
8889
8930
  headers = { "Content-Type": "multipart/form-data" };
8890
8931
  } else {
8891
8932
  return {
@@ -8990,389 +9031,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
8990
9031
  throw error;
8991
9032
  }
8992
9033
  }
8993
- /**
8994
- * Build WebSocket URL for real-time streaming
8995
- *
8996
- * Note: Real-time API uses a different host from the batch API:
8997
- * - Batch: {region}.asr.api.speechmatics.com
8998
- * - Real-time: {region}.rt.speechmatics.com
8999
- *
9000
- * @param region - Regional endpoint identifier
9001
- * @returns WebSocket URL for real-time API
9002
- */
9003
- getRegionalWsUrl(region) {
9004
- if (this.config?.wsBaseUrl) {
9005
- return this.config.wsBaseUrl;
9006
- }
9007
- const rtRegionMap = {
9008
- eu1: "eu",
9009
- eu2: "eu",
9010
- us1: "us",
9011
- us2: "us",
9012
- au1: "eu"
9013
- // No AU RT endpoint — fall back to EU
9014
- };
9015
- const rtPrefix = rtRegionMap[region || ""] || "eu";
9016
- return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
9017
- }
9018
- /**
9019
- * Stream audio for real-time transcription via WebSocket
9020
- *
9021
- * Connects to Speechmatics' real-time API and sends audio chunks
9022
- * for transcription with results returned via callbacks.
9023
- *
9024
- * @param options - Streaming configuration options
9025
- * @param callbacks - Event callbacks for transcription results
9026
- * @returns Promise that resolves with a StreamingSession
9027
- *
9028
- * @example Basic streaming
9029
- * ```typescript
9030
- * const session = await adapter.transcribeStream({
9031
- * language: 'en',
9032
- * speechmaticsStreaming: {
9033
- * enablePartials: true,
9034
- * operatingPoint: 'enhanced'
9035
- * }
9036
- * }, {
9037
- * onTranscript: (event) => console.log(event.text),
9038
- * onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
9039
- * onError: (error) => console.error(error)
9040
- * });
9041
- *
9042
- * await session.sendAudio({ data: audioBuffer });
9043
- * await session.close();
9044
- * ```
9045
- */
9046
- async transcribeStream(options, callbacks) {
9047
- this.validateConfig();
9048
- const smOpts = options?.speechmaticsStreaming || {};
9049
- const region = smOpts.region || this.config?.region;
9050
- const wsUrl = this.getRegionalWsUrl(region);
9051
- const ws = new WebSocket6(wsUrl, {
9052
- headers: {
9053
- Authorization: `Bearer ${this.config.apiKey}`
9054
- }
9055
- });
9056
- let sessionStatus = "connecting";
9057
- const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
9058
- let seqNo = 0;
9059
- let utteranceResults = [];
9060
- const sessionReady = new Promise((resolve, reject) => {
9061
- const timeout = setTimeout(() => {
9062
- reject(new Error("WebSocket connection timeout"));
9063
- }, 1e4);
9064
- let wsOpen = false;
9065
- ws.once("error", (error) => {
9066
- clearTimeout(timeout);
9067
- reject(error);
9068
- });
9069
- ws.once("open", () => {
9070
- wsOpen = true;
9071
- const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
9072
- const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
9073
- const startMsg = {
9074
- message: "StartRecognition",
9075
- audio_format: {
9076
- type: "raw",
9077
- encoding,
9078
- sample_rate: sampleRate
9079
- },
9080
- transcription_config: {
9081
- language: smOpts.language || options?.language || "en",
9082
- enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
9083
- }
9084
- };
9085
- const txConfig = startMsg.transcription_config;
9086
- if (smOpts.domain) txConfig.domain = smOpts.domain;
9087
- if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
9088
- if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
9089
- if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
9090
- if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
9091
- if (smOpts.diarization === "speaker" || options?.diarization) {
9092
- txConfig.diarization = "speaker";
9093
- if (smOpts.maxSpeakers) {
9094
- txConfig.speaker_diarization_config = {
9095
- max_speakers: smOpts.maxSpeakers
9096
- };
9097
- } else if (options?.speakersExpected) {
9098
- txConfig.speaker_diarization_config = {
9099
- max_speakers: options.speakersExpected
9100
- };
9101
- }
9102
- }
9103
- if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
9104
- txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
9105
- content: word
9106
- }));
9107
- } else if (options?.customVocabulary && options.customVocabulary.length > 0) {
9108
- txConfig.additional_vocab = options.customVocabulary.map((word) => ({
9109
- content: word
9110
- }));
9111
- }
9112
- if (smOpts.conversationConfig) {
9113
- txConfig.conversation_config = {
9114
- end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
9115
- };
9116
- }
9117
- const startPayload = JSON.stringify(startMsg);
9118
- if (callbacks?.onRawMessage) {
9119
- callbacks.onRawMessage({
9120
- provider: "speechmatics",
9121
- direction: "outgoing",
9122
- timestamp: Date.now(),
9123
- payload: startPayload,
9124
- messageType: "StartRecognition"
9125
- });
9126
- }
9127
- ws.send(startPayload);
9128
- });
9129
- const onMessage = (data) => {
9130
- const rawPayload = data.toString();
9131
- try {
9132
- const msg = JSON.parse(rawPayload);
9133
- if (msg.message === "RecognitionStarted") {
9134
- clearTimeout(timeout);
9135
- ws.removeListener("message", onMessage);
9136
- ws.emit("message", data);
9137
- resolve();
9138
- } else if (msg.message === "Error") {
9139
- clearTimeout(timeout);
9140
- ws.removeListener("message", onMessage);
9141
- reject(new Error(msg.reason || "Recognition failed to start"));
9142
- }
9143
- } catch {
9144
- }
9145
- };
9146
- ws.on("message", onMessage);
9147
- });
9148
- ws.on("message", (data) => {
9149
- const rawPayload = data.toString();
9150
- try {
9151
- const message = JSON.parse(rawPayload);
9152
- if (callbacks?.onRawMessage) {
9153
- callbacks.onRawMessage({
9154
- provider: "speechmatics",
9155
- direction: "incoming",
9156
- timestamp: Date.now(),
9157
- payload: rawPayload,
9158
- messageType: message.message
9159
- });
9160
- }
9161
- this.handleStreamingMessage(message, callbacks, utteranceResults);
9162
- } catch (error) {
9163
- if (callbacks?.onRawMessage) {
9164
- callbacks.onRawMessage({
9165
- provider: "speechmatics",
9166
- direction: "incoming",
9167
- timestamp: Date.now(),
9168
- payload: rawPayload,
9169
- messageType: "parse_error"
9170
- });
9171
- }
9172
- callbacks?.onError?.({
9173
- code: "PARSE_ERROR",
9174
- message: "Failed to parse WebSocket message",
9175
- details: error
9176
- });
9177
- }
9178
- });
9179
- ws.on("error", (error) => {
9180
- callbacks?.onError?.({
9181
- code: "WEBSOCKET_ERROR",
9182
- message: error.message,
9183
- details: error
9184
- });
9185
- });
9186
- ws.on("close", (code, reason) => {
9187
- sessionStatus = "closed";
9188
- callbacks?.onClose?.(code, reason.toString());
9189
- });
9190
- await sessionReady;
9191
- sessionStatus = "open";
9192
- callbacks?.onOpen?.();
9193
- return {
9194
- id: sessionId,
9195
- provider: this.name,
9196
- createdAt: /* @__PURE__ */ new Date(),
9197
- getStatus: () => sessionStatus,
9198
- sendAudio: async (chunk) => {
9199
- if (sessionStatus !== "open") {
9200
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
9201
- }
9202
- if (ws.readyState !== WebSocket6.OPEN) {
9203
- throw new Error("WebSocket is not open");
9204
- }
9205
- if (callbacks?.onRawMessage) {
9206
- const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
9207
- chunk.data.byteOffset,
9208
- chunk.data.byteOffset + chunk.data.byteLength
9209
- );
9210
- callbacks.onRawMessage({
9211
- provider: this.name,
9212
- direction: "outgoing",
9213
- timestamp: Date.now(),
9214
- payload: audioPayload,
9215
- messageType: "audio"
9216
- });
9217
- }
9218
- ws.send(chunk.data);
9219
- seqNo++;
9220
- if (chunk.isLast) {
9221
- const endMsg = JSON.stringify({
9222
- message: "EndOfStream",
9223
- last_seq_no: seqNo
9224
- });
9225
- if (callbacks?.onRawMessage) {
9226
- callbacks.onRawMessage({
9227
- provider: this.name,
9228
- direction: "outgoing",
9229
- timestamp: Date.now(),
9230
- payload: endMsg,
9231
- messageType: "EndOfStream"
9232
- });
9233
- }
9234
- ws.send(endMsg);
9235
- }
9236
- },
9237
- close: async () => {
9238
- if (sessionStatus === "closed" || sessionStatus === "closing") {
9239
- return;
9240
- }
9241
- sessionStatus = "closing";
9242
- if (ws.readyState === WebSocket6.OPEN) {
9243
- seqNo++;
9244
- ws.send(
9245
- JSON.stringify({
9246
- message: "EndOfStream",
9247
- last_seq_no: seqNo
9248
- })
9249
- );
9250
- }
9251
- return new Promise((resolve) => {
9252
- const timeout = setTimeout(() => {
9253
- ws.terminate();
9254
- sessionStatus = "closed";
9255
- resolve();
9256
- }, 5e3);
9257
- const onMsg = (data) => {
9258
- try {
9259
- const msg = JSON.parse(data.toString());
9260
- if (msg.message === "EndOfTranscript") {
9261
- ws.removeListener("message", onMsg);
9262
- clearTimeout(timeout);
9263
- ws.close();
9264
- }
9265
- } catch {
9266
- }
9267
- };
9268
- ws.on("message", onMsg);
9269
- ws.once("close", () => {
9270
- clearTimeout(timeout);
9271
- sessionStatus = "closed";
9272
- resolve();
9273
- });
9274
- });
9275
- }
9276
- };
9277
- }
9278
- /**
9279
- * Handle incoming Speechmatics real-time WebSocket messages
9280
- */
9281
- handleStreamingMessage(message, callbacks, utteranceResults) {
9282
- switch (message.message) {
9283
- case "RecognitionStarted": {
9284
- break;
9285
- }
9286
- case "AddPartialTranscript": {
9287
- const results = message.results || [];
9288
- const text = buildTextFromSpeechmaticsResults(results);
9289
- if (text) {
9290
- callbacks?.onTranscript?.({
9291
- type: "transcript",
9292
- text,
9293
- isFinal: false,
9294
- words: this.extractWordsFromResults(results),
9295
- data: message
9296
- });
9297
- }
9298
- break;
9299
- }
9300
- case "AddTranscript": {
9301
- const results = message.results || [];
9302
- const text = buildTextFromSpeechmaticsResults(results);
9303
- if (utteranceResults) {
9304
- utteranceResults.push(...results);
9305
- }
9306
- if (text) {
9307
- callbacks?.onTranscript?.({
9308
- type: "transcript",
9309
- text,
9310
- isFinal: true,
9311
- words: this.extractWordsFromResults(results),
9312
- data: message
9313
- });
9314
- }
9315
- break;
9316
- }
9317
- case "EndOfUtterance": {
9318
- if (utteranceResults && utteranceResults.length > 0) {
9319
- const text = buildTextFromSpeechmaticsResults(utteranceResults);
9320
- const words = this.extractWordsFromResults(utteranceResults);
9321
- const utterances = buildUtterancesFromWords(words);
9322
- if (utterances.length > 0) {
9323
- for (const utt of utterances) {
9324
- callbacks?.onUtterance?.(utt);
9325
- }
9326
- } else if (text) {
9327
- callbacks?.onUtterance?.({
9328
- text,
9329
- start: words.length > 0 ? words[0].start : 0,
9330
- end: words.length > 0 ? words[words.length - 1].end : 0,
9331
- words
9332
- });
9333
- }
9334
- utteranceResults.length = 0;
9335
- }
9336
- break;
9337
- }
9338
- case "AudioAdded": {
9339
- break;
9340
- }
9341
- case "EndOfTranscript": {
9342
- break;
9343
- }
9344
- case "Info":
9345
- case "Warning": {
9346
- callbacks?.onMetadata?.(message);
9347
- break;
9348
- }
9349
- case "Error": {
9350
- const errMsg = message;
9351
- callbacks?.onError?.({
9352
- code: errMsg.type || "SPEECHMATICS_ERROR",
9353
- message: errMsg.reason || "Unknown error",
9354
- details: message
9355
- });
9356
- break;
9357
- }
9358
- default: {
9359
- callbacks?.onMetadata?.(message);
9360
- break;
9361
- }
9362
- }
9363
- }
9364
- /**
9365
- * Extract unified Word[] from Speechmatics recognition results
9366
- */
9367
- extractWordsFromResults(results) {
9368
- return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
9369
- word: result.alternatives?.[0]?.content || "",
9370
- start: result.start_time,
9371
- end: result.end_time,
9372
- confidence: result.alternatives?.[0]?.confidence,
9373
- speaker: result.alternatives?.[0]?.speaker
9374
- }));
9375
- }
9376
9034
  /**
9377
9035
  * Normalize Speechmatics status to unified status
9378
9036
  * Uses generated JobDetailsStatus enum values
@@ -9441,9 +9099,6 @@ function createSpeechmaticsAdapter(config) {
9441
9099
  return adapter;
9442
9100
  }
9443
9101
 
9444
- // src/adapters/soniox-adapter.ts
9445
- import axios9 from "axios";
9446
-
9447
9102
  // src/generated/soniox/schema/transcriptionStatus.ts
9448
9103
  var TranscriptionStatus = {
9449
9104
  queued: "queued",
@@ -9452,6 +9107,57 @@ var TranscriptionStatus = {
9452
9107
  error: "error"
9453
9108
  };
9454
9109
 
9110
+ // src/generated/soniox/api/sonioxPublicAPI.ts
9111
+ import axios9 from "axios";
9112
+
9113
+ // src/generated/soniox/schema/index.ts
9114
+ var schema_exports4 = {};
9115
+ __export(schema_exports4, {
9116
+ TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
9117
+ TranscriptionMode: () => TranscriptionMode,
9118
+ TranscriptionStatus: () => TranscriptionStatus,
9119
+ TranslationConfigType: () => TranslationConfigType
9120
+ });
9121
+
9122
+ // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
9123
+ var TemporaryApiKeyUsageType = {
9124
+ transcribe_websocket: "transcribe_websocket"
9125
+ };
9126
+
9127
+ // src/generated/soniox/schema/transcriptionMode.ts
9128
+ var TranscriptionMode = {
9129
+ real_time: "real_time",
9130
+ async: "async"
9131
+ };
9132
+
9133
+ // src/generated/soniox/schema/translationConfigType.ts
9134
+ var TranslationConfigType = {
9135
+ one_way: "one_way",
9136
+ two_way: "two_way"
9137
+ };
9138
+
9139
+ // src/generated/soniox/api/sonioxPublicAPI.ts
9140
+ var uploadFile = (uploadFileBody2, options) => {
9141
+ const formData = new FormData();
9142
+ if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
9143
+ formData.append("client_reference_id", uploadFileBody2.client_reference_id);
9144
+ }
9145
+ formData.append("file", uploadFileBody2.file);
9146
+ return axios9.post("/v1/files", formData, options);
9147
+ };
9148
+ var createTranscription2 = (createTranscriptionPayload, options) => {
9149
+ return axios9.post("/v1/transcriptions", createTranscriptionPayload, options);
9150
+ };
9151
+ var getTranscription = (transcriptionId, options) => {
9152
+ return axios9.get(`/v1/transcriptions/${transcriptionId}`, options);
9153
+ };
9154
+ var getTranscriptionTranscript = (transcriptionId, options) => {
9155
+ return axios9.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
9156
+ };
9157
+ var getModels = (options) => {
9158
+ return axios9.get("/v1/models", options);
9159
+ };
9160
+
9455
9161
  // src/adapters/soniox-adapter.ts
9456
9162
  var SonioxAdapter = class extends BaseAdapter {
9457
9163
  constructor() {
@@ -9506,11 +9212,17 @@ var SonioxAdapter = class extends BaseAdapter {
9506
9212
  }
9507
9213
  }
9508
9214
  /**
9509
- * Get the base URL for API requests
9215
+ * Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
9510
9216
  */
9511
9217
  get baseUrl() {
9512
9218
  if (this.config?.baseUrl) return this.config.baseUrl;
9513
- return `https://${this.getRegionalHost()}/v1`;
9219
+ return `https://${this.getRegionalHost()}`;
9220
+ }
9221
+ /**
9222
+ * Build axios config with Soniox Bearer auth
9223
+ */
9224
+ getAxiosConfig() {
9225
+ return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
9514
9226
  }
9515
9227
  initialize(config) {
9516
9228
  super.initialize(config);
@@ -9520,15 +9232,6 @@ var SonioxAdapter = class extends BaseAdapter {
9520
9232
  if (config.model) {
9521
9233
  this.defaultModel = config.model;
9522
9234
  }
9523
- this.client = axios9.create({
9524
- baseURL: this.baseUrl,
9525
- timeout: config.timeout || 12e4,
9526
- headers: {
9527
- Authorization: `Bearer ${config.apiKey}`,
9528
- "Content-Type": "application/json",
9529
- ...config.headers
9530
- }
9531
- });
9532
9235
  }
9533
9236
  /**
9534
9237
  * Get current region
@@ -9558,23 +9261,12 @@ var SonioxAdapter = class extends BaseAdapter {
9558
9261
  */
9559
9262
  setRegion(region) {
9560
9263
  this.region = region;
9561
- if (this.config?.apiKey) {
9562
- this.client = axios9.create({
9563
- baseURL: this.baseUrl,
9564
- timeout: this.config.timeout || 12e4,
9565
- headers: {
9566
- Authorization: `Bearer ${this.config.apiKey}`,
9567
- "Content-Type": "application/json",
9568
- ...this.config.headers
9569
- }
9570
- });
9571
- }
9572
9264
  }
9573
9265
  /**
9574
9266
  * Submit audio for transcription
9575
9267
  *
9576
- * Soniox uses async batch processing. The transcribe method submits audio
9577
- * and waits for completion (or use getTranscript for polling).
9268
+ * Uses the async v1 API: createTranscription returns status `queued`,
9269
+ * then polls until completed (or returns immediately if webhook is set).
9578
9270
  *
9579
9271
  * @param audio - Audio input (URL or file)
9580
9272
  * @param options - Transcription options
@@ -9583,21 +9275,44 @@ var SonioxAdapter = class extends BaseAdapter {
9583
9275
  async transcribe(audio, options) {
9584
9276
  this.validateConfig();
9585
9277
  try {
9586
- const requestBody = {
9587
- model: options?.model || this.defaultModel
9588
- };
9589
- if (audio.type === "url") {
9590
- requestBody.audio_url = audio.url;
9591
- } else if (audio.type === "file") {
9592
- const formData = new FormData();
9278
+ const sonioxOpts = options?.soniox;
9279
+ if (audio.type === "file") {
9593
9280
  const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
9594
- formData.append("file", audioBlob, audio.filename || "audio.wav");
9595
- const uploadResponse = await this.client.post("/files", formData, {
9596
- headers: {
9597
- "Content-Type": "multipart/form-data"
9598
- }
9599
- });
9600
- requestBody.file_id = uploadResponse.data.id;
9281
+ const uploadBody = { file: audioBlob };
9282
+ const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
9283
+ const payload = {
9284
+ ...sonioxOpts,
9285
+ model: options?.model || this.defaultModel,
9286
+ file_id: fileResp.data.id,
9287
+ language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
9288
+ enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
9289
+ enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
9290
+ context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
9291
+ webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
9292
+ };
9293
+ const createResp = await createTranscription2(payload, this.getAxiosConfig());
9294
+ const meta = createResp.data;
9295
+ if (options?.webhookUrl || sonioxOpts?.webhook_url) {
9296
+ return this.normalizeTranscription(meta);
9297
+ }
9298
+ return this.pollForCompletion(meta.id);
9299
+ } else if (audio.type === "url") {
9300
+ const payload = {
9301
+ ...sonioxOpts,
9302
+ model: options?.model || this.defaultModel,
9303
+ audio_url: audio.url,
9304
+ language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
9305
+ enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
9306
+ enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
9307
+ context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
9308
+ webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
9309
+ };
9310
+ const createResp = await createTranscription2(payload, this.getAxiosConfig());
9311
+ const meta = createResp.data;
9312
+ if (options?.webhookUrl || sonioxOpts?.webhook_url) {
9313
+ return this.normalizeTranscription(meta);
9314
+ }
9315
+ return this.pollForCompletion(meta.id);
9601
9316
  } else {
9602
9317
  return {
9603
9318
  success: false,
@@ -9608,23 +9323,6 @@ var SonioxAdapter = class extends BaseAdapter {
9608
9323
  }
9609
9324
  };
9610
9325
  }
9611
- if (options?.language) {
9612
- requestBody.language_hints = [options.language];
9613
- }
9614
- if (options?.diarization) {
9615
- requestBody.enable_speaker_diarization = true;
9616
- }
9617
- if (options?.languageDetection) {
9618
- requestBody.enable_language_identification = true;
9619
- }
9620
- if (options?.customVocabulary && options.customVocabulary.length > 0) {
9621
- requestBody.context = {
9622
- terms: options.customVocabulary
9623
- };
9624
- }
9625
- const response = await this.client.post("/transcriptions", requestBody);
9626
- const transcriptionId = response.data.id;
9627
- return await this.pollForCompletion(transcriptionId);
9628
9326
  } catch (error) {
9629
9327
  return this.createErrorResponse(error);
9630
9328
  }
@@ -9632,9 +9330,8 @@ var SonioxAdapter = class extends BaseAdapter {
9632
9330
  /**
9633
9331
  * Get transcription result by ID
9634
9332
  *
9635
- * Checks job status via GET /v1/transcriptions/{id}, then fetches
9636
- * the full transcript via GET /v1/transcriptions/{id}/transcript
9637
- * when completed.
9333
+ * Fetches transcription metadata and, if completed, the transcript text/tokens.
9334
+ * Used by pollForCompletion() for async polling.
9638
9335
  *
9639
9336
  * @param transcriptId - Transcript ID
9640
9337
  * @returns Transcription response
@@ -9642,39 +9339,20 @@ var SonioxAdapter = class extends BaseAdapter {
9642
9339
  async getTranscript(transcriptId) {
9643
9340
  this.validateConfig();
9644
9341
  try {
9645
- const statusResponse = await this.client.get(`/transcriptions/${transcriptId}`);
9646
- const job = statusResponse.data;
9647
- if (job.status === "error") {
9648
- return {
9649
- success: false,
9650
- provider: this.name,
9651
- error: {
9652
- code: "TRANSCRIPTION_ERROR",
9653
- message: job.error_message || "Transcription failed"
9654
- }
9655
- };
9656
- }
9657
- if (job.status !== "completed") {
9658
- return {
9659
- success: true,
9660
- provider: this.name,
9661
- data: {
9662
- id: job.id,
9663
- text: "",
9664
- status: job.status
9665
- },
9666
- raw: job
9667
- };
9342
+ const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
9343
+ const meta = metaResp.data;
9344
+ if (meta.status === TranscriptionStatus.completed) {
9345
+ try {
9346
+ const transcriptResp = await getTranscriptionTranscript(
9347
+ transcriptId,
9348
+ this.getAxiosConfig()
9349
+ );
9350
+ return this.normalizeTranscription(meta, transcriptResp.data);
9351
+ } catch (transcriptError) {
9352
+ return this.createErrorResponse(transcriptError);
9353
+ }
9668
9354
  }
9669
- const transcriptResponse = await this.client.get(
9670
- `/transcriptions/${transcriptId}/transcript`
9671
- );
9672
- return this.normalizeResponse({
9673
- ...transcriptResponse.data,
9674
- // Carry over job metadata
9675
- id: job.id,
9676
- audio_duration_ms: job.audio_duration_ms
9677
- });
9355
+ return this.normalizeTranscription(meta);
9678
9356
  } catch (error) {
9679
9357
  return this.createErrorResponse(error);
9680
9358
  }
@@ -9694,51 +9372,50 @@ var SonioxAdapter = class extends BaseAdapter {
9694
9372
  const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
9695
9373
  const createdAt = /* @__PURE__ */ new Date();
9696
9374
  const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
9697
- const wsUrl = `${wsBase}/transcribe-websocket`;
9698
- const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
9699
- const sonioxOpts = options?.sonioxStreaming;
9700
- const initMessage = {
9701
- api_key: this.config.apiKey,
9702
- model: modelId
9703
- };
9704
- if (sonioxOpts?.audioFormat) {
9705
- initMessage.audio_format = sonioxOpts.audioFormat;
9706
- } else if (options?.encoding) {
9375
+ const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
9376
+ wsUrl.searchParams.set("api_key", this.config.apiKey);
9377
+ const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
9378
+ wsUrl.searchParams.set("model", modelId);
9379
+ if (options?.encoding) {
9707
9380
  const encodingMap = {
9708
9381
  linear16: "pcm_s16le",
9709
9382
  pcm: "pcm_s16le",
9710
9383
  mulaw: "mulaw",
9711
9384
  alaw: "alaw"
9712
9385
  };
9713
- initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
9386
+ wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
9714
9387
  }
9715
- if (sonioxOpts?.sampleRate || options?.sampleRate) {
9716
- initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
9388
+ if (options?.sampleRate) {
9389
+ wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
9717
9390
  }
9718
- if (sonioxOpts?.numChannels || options?.channels) {
9719
- initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
9391
+ if (options?.channels) {
9392
+ wsUrl.searchParams.set("num_channels", options.channels.toString());
9720
9393
  }
9394
+ const sonioxOpts = options?.sonioxStreaming;
9721
9395
  if (sonioxOpts) {
9722
9396
  if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
9723
- initMessage.language_hints = sonioxOpts.languageHints;
9397
+ wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
9724
9398
  }
9725
9399
  if (sonioxOpts.enableLanguageIdentification) {
9726
- initMessage.enable_language_identification = true;
9400
+ wsUrl.searchParams.set("enable_language_identification", "true");
9727
9401
  }
9728
9402
  if (sonioxOpts.enableEndpointDetection) {
9729
- initMessage.enable_endpoint_detection = true;
9403
+ wsUrl.searchParams.set("enable_endpoint_detection", "true");
9730
9404
  }
9731
9405
  if (sonioxOpts.enableSpeakerDiarization) {
9732
- initMessage.enable_speaker_diarization = true;
9406
+ wsUrl.searchParams.set("enable_speaker_diarization", "true");
9733
9407
  }
9734
9408
  if (sonioxOpts.context) {
9735
- initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
9409
+ wsUrl.searchParams.set(
9410
+ "context",
9411
+ typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
9412
+ );
9736
9413
  }
9737
9414
  if (sonioxOpts.translation) {
9738
- initMessage.translation = sonioxOpts.translation;
9415
+ wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
9739
9416
  }
9740
9417
  if (sonioxOpts.clientReferenceId) {
9741
- initMessage.client_reference_id = sonioxOpts.clientReferenceId;
9418
+ wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
9742
9419
  }
9743
9420
  }
9744
9421
  if (!sonioxOpts?.languageHints && options?.language) {
@@ -9747,33 +9424,24 @@ var SonioxAdapter = class extends BaseAdapter {
9747
9424
  `[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
9748
9425
  );
9749
9426
  }
9750
- initMessage.language_hints = [options.language];
9427
+ wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
9751
9428
  }
9752
9429
  if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
9753
- initMessage.enable_speaker_diarization = true;
9430
+ wsUrl.searchParams.set("enable_speaker_diarization", "true");
9754
9431
  }
9755
9432
  if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
9756
- initMessage.enable_language_identification = true;
9433
+ wsUrl.searchParams.set("enable_language_identification", "true");
9434
+ }
9435
+ if (options?.interimResults !== false) {
9757
9436
  }
9758
9437
  let status = "connecting";
9759
9438
  let openedAt = null;
9760
9439
  let receivedData = false;
9761
9440
  const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
9762
- const ws = new WebSocketImpl(wsUrl);
9441
+ const ws = new WebSocketImpl(wsUrl.toString());
9763
9442
  ws.onopen = () => {
9764
- openedAt = Date.now();
9765
- const initPayload = JSON.stringify(initMessage);
9766
- if (callbacks?.onRawMessage) {
9767
- callbacks.onRawMessage({
9768
- provider: this.name,
9769
- direction: "outgoing",
9770
- timestamp: Date.now(),
9771
- payload: initPayload,
9772
- messageType: "init"
9773
- });
9774
- }
9775
- ws.send(initPayload);
9776
9443
  status = "open";
9444
+ openedAt = Date.now();
9777
9445
  callbacks?.onOpen?.();
9778
9446
  };
9779
9447
  ws.onmessage = (event) => {
@@ -9782,7 +9450,8 @@ var SonioxAdapter = class extends BaseAdapter {
9782
9450
  let messageType;
9783
9451
  try {
9784
9452
  const data = JSON.parse(rawPayload);
9785
- if (data.error) {
9453
+ const errorMessage = data.error_message || data.error;
9454
+ if (errorMessage) {
9786
9455
  messageType = "error";
9787
9456
  } else if (data.finished) {
9788
9457
  messageType = "finished";
@@ -9798,10 +9467,10 @@ var SonioxAdapter = class extends BaseAdapter {
9798
9467
  messageType
9799
9468
  });
9800
9469
  }
9801
- if (data.error) {
9470
+ if (errorMessage) {
9802
9471
  callbacks?.onError?.({
9803
9472
  code: data.error_code?.toString() || "STREAM_ERROR",
9804
- message: data.error
9473
+ message: errorMessage
9805
9474
  });
9806
9475
  return;
9807
9476
  }
@@ -9815,7 +9484,7 @@ var SonioxAdapter = class extends BaseAdapter {
9815
9484
  start: token.start_ms ? token.start_ms / 1e3 : 0,
9816
9485
  end: token.end_ms ? token.end_ms / 1e3 : 0,
9817
9486
  confidence: token.confidence,
9818
- speaker: token.speaker
9487
+ speaker: token.speaker ?? void 0
9819
9488
  }));
9820
9489
  const text = data.text || data.tokens.map((t) => t.text).join("");
9821
9490
  const isFinal = data.tokens.every((t) => t.is_final);
@@ -9824,8 +9493,8 @@ var SonioxAdapter = class extends BaseAdapter {
9824
9493
  text,
9825
9494
  isFinal,
9826
9495
  words,
9827
- speaker: data.tokens[0]?.speaker,
9828
- language: data.tokens[0]?.language,
9496
+ speaker: data.tokens[0]?.speaker ?? void 0,
9497
+ language: data.tokens[0]?.language ?? void 0,
9829
9498
  confidence: data.tokens[0]?.confidence
9830
9499
  };
9831
9500
  callbacks?.onTranscript?.(event2);
@@ -9852,10 +9521,10 @@ var SonioxAdapter = class extends BaseAdapter {
9852
9521
  ws.onclose = (event) => {
9853
9522
  status = "closed";
9854
9523
  const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
9855
- const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
9856
- if (isEarlyClose && event.code === 1e3) {
9524
+ const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
9525
+ if (isImmediateClose && event.code === 1e3) {
9857
9526
  const errorMessage = [
9858
- "Soniox closed connection shortly after opening.",
9527
+ "Soniox closed connection immediately after opening.",
9859
9528
  `Current config: region=${this.region}, model=${modelId}`,
9860
9529
  "Likely causes:",
9861
9530
  " - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
@@ -9941,7 +9610,7 @@ var SonioxAdapter = class extends BaseAdapter {
9941
9610
  async getModels() {
9942
9611
  this.validateConfig();
9943
9612
  try {
9944
- const response = await this.client.get("/models");
9613
+ const response = await getModels(this.getAxiosConfig());
9945
9614
  return response.data.models || [];
9946
9615
  } catch (error) {
9947
9616
  console.error("Failed to fetch Soniox models:", error);
@@ -9968,55 +9637,82 @@ var SonioxAdapter = class extends BaseAdapter {
9968
9637
  start: token.start_ms ? token.start_ms / 1e3 : 0,
9969
9638
  end: token.end_ms ? token.end_ms / 1e3 : 0,
9970
9639
  confidence: token.confidence,
9971
- speaker: token.speaker
9640
+ speaker: token.speaker ?? void 0
9972
9641
  }));
9973
9642
  return buildUtterancesFromWords(words);
9974
9643
  }
9975
9644
  /**
9976
- * Normalize Soniox response to unified format
9645
+ * Normalize v1 API response to unified format
9646
+ *
9647
+ * @param meta - Transcription metadata from getTranscription/createTranscription
9648
+ * @param transcript - Transcript data (text/tokens), only present when status is completed
9977
9649
  */
9978
- normalizeResponse(response) {
9979
- const text = response.text || (response.tokens ? response.tokens.filter((t) => t.is_final !== false).map((t) => t.text).join("") : "");
9980
- const words = response.tokens ? response.tokens.filter(
9981
- (t) => t.is_final !== false && t.start_ms !== void 0 && t.end_ms !== void 0
9982
- ).map((token) => ({
9650
+ normalizeTranscription(meta, transcript) {
9651
+ if (meta.status === TranscriptionStatus.error) {
9652
+ return {
9653
+ success: false,
9654
+ provider: this.name,
9655
+ data: {
9656
+ id: meta.id,
9657
+ text: "",
9658
+ status: "error"
9659
+ },
9660
+ error: {
9661
+ code: meta.error_type || "TRANSCRIPTION_ERROR",
9662
+ message: meta.error_message || "Transcription failed"
9663
+ },
9664
+ raw: { meta, transcript }
9665
+ };
9666
+ }
9667
+ if (!transcript) {
9668
+ return {
9669
+ success: true,
9670
+ provider: this.name,
9671
+ data: {
9672
+ id: meta.id,
9673
+ text: "",
9674
+ status: meta.status,
9675
+ duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
9676
+ },
9677
+ raw: { meta }
9678
+ };
9679
+ }
9680
+ const tokens = transcript.tokens || [];
9681
+ const text = transcript.text || tokens.map((t) => t.text).join("");
9682
+ const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
9983
9683
  word: token.text,
9984
9684
  start: token.start_ms / 1e3,
9985
9685
  end: token.end_ms / 1e3,
9986
9686
  confidence: token.confidence,
9987
- speaker: token.speaker
9988
- })) : [];
9687
+ speaker: token.speaker ?? void 0
9688
+ }));
9989
9689
  const speakerSet = /* @__PURE__ */ new Set();
9990
- if (response.tokens) {
9991
- response.tokens.forEach((t) => {
9992
- if (t.speaker) speakerSet.add(t.speaker);
9993
- });
9994
- }
9690
+ tokens.forEach((t) => {
9691
+ if (t.speaker) speakerSet.add(String(t.speaker));
9692
+ });
9995
9693
  const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
9996
9694
  id,
9997
9695
  label: `Speaker ${id}`
9998
9696
  })) : void 0;
9999
- const tokens = response.tokens ? response.tokens.filter((t) => t.is_final !== false) : [];
10000
- const utterances = tokens.length > 0 ? this.buildUtterancesFromTokens(tokens) : [];
10001
- const language = response.tokens?.find((t) => t.language)?.language;
9697
+ const utterances = this.buildUtterancesFromTokens(tokens);
9698
+ const language = tokens.find((t) => t.language)?.language ?? void 0;
10002
9699
  return {
10003
9700
  success: true,
10004
9701
  provider: this.name,
10005
9702
  data: {
10006
- id: response.id || `soniox_${Date.now()}`,
9703
+ id: meta.id,
10007
9704
  text,
10008
9705
  status: TranscriptionStatus.completed,
10009
9706
  language,
10010
- duration: response.audio_duration_ms ? response.audio_duration_ms / 1e3 : response.total_audio_proc_ms ? response.total_audio_proc_ms / 1e3 : void 0,
9707
+ duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
10011
9708
  speakers,
10012
9709
  words: words.length > 0 ? words : void 0,
10013
9710
  utterances: utterances.length > 0 ? utterances : void 0
10014
9711
  },
10015
9712
  tracking: {
10016
- requestId: response.id,
10017
- processingTimeMs: response.total_audio_proc_ms
9713
+ requestId: meta.id
10018
9714
  },
10019
- raw: response
9715
+ raw: { meta, transcript }
10020
9716
  };
10021
9717
  }
10022
9718
  };
@@ -10440,7 +10136,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10440
10136
  * - Multi-channel: `MultichannelSpeechToTextResponseModel` with `transcripts[]`
10441
10137
  */
10442
10138
  normalizeResponse(response) {
10443
- const chunks = response.transcripts ? response.transcripts : [response];
10139
+ const chunks = "transcripts" in response ? response.transcripts : [response];
10444
10140
  const text = chunks.map((c) => c.text).join(" ");
10445
10141
  const words = [];
10446
10142
  const speakerSet = /* @__PURE__ */ new Set();
@@ -10834,9 +10530,7 @@ var listenTranscribeQueryParams = zod.object({
10834
10530
  ),
10835
10531
  dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
10836
10532
  encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
10837
- filler_words: zod.boolean().optional().describe(
10838
- 'Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'
10839
- ),
10533
+ filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
10840
10534
  keyterm: zod.array(zod.string()).optional().describe(
10841
10535
  "Key term prompting can boost or suppress specialized terminology and brands. Only compatible with Nova-3"
10842
10536
  ),
@@ -11540,6 +11234,7 @@ __export(assemblyAIAPI_zod_exports, {
11540
11234
  createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault,
11541
11235
  createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault,
11542
11236
  createTranscriptBodySummarizationDefault: () => createTranscriptBodySummarizationDefault,
11237
+ createTranscriptBodyTemperatureDefault: () => createTranscriptBodyTemperatureDefault,
11543
11238
  createTranscriptResponse: () => createTranscriptResponse,
11544
11239
  createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault,
11545
11240
  createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault,
@@ -11609,6 +11304,7 @@ var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
11609
11304
  var createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault = true;
11610
11305
  var createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
11611
11306
  var createTranscriptBodySummarizationDefault = false;
11307
+ var createTranscriptBodyTemperatureDefault = 0;
11612
11308
  var createTranscriptBodyCustomTopicsDefault = false;
11613
11309
  var createTranscriptBody = zod3.object({
11614
11310
  audio_end_at: zod3.number().optional().describe(
@@ -11618,10 +11314,10 @@ var createTranscriptBody = zod3.object({
11618
11314
  "The point in time, in milliseconds, to begin transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
11619
11315
  ),
11620
11316
  auto_chapters: zod3.boolean().optional().describe(
11621
- "Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11317
+ "Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11622
11318
  ),
11623
11319
  auto_highlights: zod3.boolean().optional().describe(
11624
- "Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases), either true or false"
11320
+ "Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights), either true or false"
11625
11321
  ),
11626
11322
  content_safety: zod3.boolean().optional().describe(
11627
11323
  "Enable [Content Moderation](https://www.assemblyai.com/docs/content-moderation), can be true or false"
@@ -11637,16 +11333,16 @@ var createTranscriptBody = zod3.object({
11637
11333
  "Object containing words or phrases to replace, and the word or phrase to replace with"
11638
11334
  )
11639
11335
  ).optional().describe(
11640
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
11336
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
11641
11337
  ),
11642
11338
  disfluencies: zod3.boolean().optional().describe(
11643
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
11339
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
11644
11340
  ),
11645
11341
  domain: zod3.string().nullish().describe(
11646
11342
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
11647
11343
  ),
11648
11344
  entity_detection: zod3.boolean().optional().describe(
11649
- "Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection), can be true or false"
11345
+ "Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript), can be true or false"
11650
11346
  ),
11651
11347
  filter_profanity: zod3.boolean().optional().describe(
11652
11348
  "Filter profanity from the transcribed text, can be true or false. See [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) for more details."
@@ -11655,7 +11351,7 @@ var createTranscriptBody = zod3.object({
11655
11351
  "Enable [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
11656
11352
  ),
11657
11353
  iab_categories: zod3.boolean().optional().describe(
11658
- "Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection), can be true or false"
11354
+ "Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics), can be true or false"
11659
11355
  ),
11660
11356
  keyterms_prompt: zod3.array(zod3.string()).optional().describe(
11661
11357
  "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
@@ -11903,7 +11599,7 @@ var createTranscriptBody = zod3.object({
11903
11599
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
11904
11600
  ),
11905
11601
  multichannel: zod3.boolean().optional().describe(
11906
- "Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) transcription, can be true or false."
11602
+ "Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) transcription, can be true or false."
11907
11603
  ),
11908
11604
  prompt: zod3.string().optional().describe(
11909
11605
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -11986,23 +11682,23 @@ var createTranscriptBody = zod3.object({
11986
11682
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
11987
11683
  ),
11988
11684
  sentiment_analysis: zod3.boolean().optional().describe(
11989
- "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis), can be true or false"
11685
+ "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
11990
11686
  ),
11991
11687
  speaker_labels: zod3.boolean().optional().describe(
11992
- "Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization), can be true or false"
11688
+ "Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers), can be true or false"
11993
11689
  ),
11994
11690
  speaker_options: zod3.object({
11995
11691
  min_speakers_expected: zod3.number().default(createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault).describe(
11996
- "The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details."
11692
+ "The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details."
11997
11693
  ),
11998
11694
  max_speakers_expected: zod3.number().optional().describe(
11999
- "<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details.\n"
11695
+ "<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details.\n"
12000
11696
  )
12001
11697
  }).optional().describe(
12002
- "Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
11698
+ "Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
12003
11699
  ),
12004
11700
  speakers_expected: zod3.number().nullish().describe(
12005
- "Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
11701
+ "Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
12006
11702
  ),
12007
11703
  speech_models: zod3.array(
12008
11704
  zod3.string().describe(
@@ -12078,7 +11774,7 @@ var createTranscriptBody = zod3.object({
12078
11774
  "Enable speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
12079
11775
  ),
12080
11776
  summarization: zod3.boolean().optional().describe(
12081
- "Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11777
+ "Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12082
11778
  ),
12083
11779
  summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
12084
11780
  summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
@@ -12087,6 +11783,9 @@ var createTranscriptBody = zod3.object({
12087
11783
  ).or(zod3.null()).optional().describe(
12088
11784
  'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12089
11785
  ),
11786
+ temperature: zod3.number().optional().describe(
11787
+ "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
11788
+ ),
12090
11789
  webhook_auth_header_name: zod3.string().nullish().describe(
12091
11790
  "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
12092
11791
  ),
@@ -12108,7 +11807,7 @@ var createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
12108
11807
  var createTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
12109
11808
  var createTranscriptResponse = zod3.object({
12110
11809
  audio_channels: zod3.number().optional().describe(
12111
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
11810
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
12112
11811
  ),
12113
11812
  audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
12114
11813
  audio_end_at: zod3.number().nullish().describe(
@@ -12119,10 +11818,10 @@ var createTranscriptResponse = zod3.object({
12119
11818
  ),
12120
11819
  audio_url: zod3.string().describe("The URL of the media that was transcribed"),
12121
11820
  auto_chapters: zod3.boolean().nullish().describe(
12122
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11821
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12123
11822
  ),
12124
11823
  auto_highlights: zod3.boolean().describe(
12125
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
11824
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
12126
11825
  ),
12127
11826
  auto_highlights_result: zod3.object({
12128
11827
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -12142,9 +11841,9 @@ var createTranscriptResponse = zod3.object({
12142
11841
  })
12143
11842
  ).describe("A temporally-sequential array of Key Phrases")
12144
11843
  }).describe(
12145
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
11844
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12146
11845
  ).or(zod3.null()).optional().describe(
12147
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
11846
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12148
11847
  ),
12149
11848
  chapters: zod3.array(
12150
11849
  zod3.object({
@@ -12157,7 +11856,7 @@ var createTranscriptResponse = zod3.object({
12157
11856
  end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
12158
11857
  }).describe("Chapter of the audio file")
12159
11858
  ).nullish().describe(
12160
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
11859
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
12161
11860
  ),
12162
11861
  confidence: zod3.number().nullish().describe(
12163
11862
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -12213,10 +11912,10 @@ var createTranscriptResponse = zod3.object({
12213
11912
  "Object containing words or phrases to replace, and the word or phrase to replace with"
12214
11913
  )
12215
11914
  ).nullish().describe(
12216
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
11915
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
12217
11916
  ),
12218
11917
  disfluencies: zod3.boolean().nullish().describe(
12219
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
11918
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
12220
11919
  ),
12221
11920
  domain: zod3.string().nullish().describe(
12222
11921
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12278,10 +11977,10 @@ var createTranscriptResponse = zod3.object({
12278
11977
  )
12279
11978
  }).describe("A detected entity")
12280
11979
  ).nullish().describe(
12281
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
11980
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
12282
11981
  ),
12283
11982
  entity_detection: zod3.boolean().nullish().describe(
12284
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
11983
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
12285
11984
  ),
12286
11985
  error: zod3.string().optional().describe("Error message of why the transcript failed"),
12287
11986
  filter_profanity: zod3.boolean().nullish().describe(
@@ -12291,7 +11990,7 @@ var createTranscriptResponse = zod3.object({
12291
11990
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
12292
11991
  ),
12293
11992
  iab_categories: zod3.boolean().nullish().describe(
12294
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
11993
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
12295
11994
  ),
12296
11995
  iab_categories_result: zod3.object({
12297
11996
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -12314,9 +12013,9 @@ var createTranscriptResponse = zod3.object({
12314
12013
  ).describe("An array of results for the Topic Detection model"),
12315
12014
  summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
12316
12015
  }).describe(
12317
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12016
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
12318
12017
  ).or(zod3.null()).optional().describe(
12319
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12018
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
12320
12019
  ),
12321
12020
  id: zod3.string().uuid().describe("The unique identifier of your transcript"),
12322
12021
  keyterms_prompt: zod3.array(zod3.string()).optional().describe(
@@ -12566,7 +12265,7 @@ var createTranscriptResponse = zod3.object({
12566
12265
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12567
12266
  ),
12568
12267
  multichannel: zod3.boolean().nullish().describe(
12569
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
12268
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
12570
12269
  ),
12571
12270
  prompt: zod3.string().optional().describe(
12572
12271
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -12649,7 +12348,7 @@ var createTranscriptResponse = zod3.object({
12649
12348
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12650
12349
  ),
12651
12350
  sentiment_analysis: zod3.boolean().nullish().describe(
12652
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
12351
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
12653
12352
  ),
12654
12353
  sentiment_analysis_results: zod3.array(
12655
12354
  zod3.object({
@@ -12664,17 +12363,17 @@ var createTranscriptResponse = zod3.object({
12664
12363
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12665
12364
  ),
12666
12365
  speaker: zod3.string().nullable().describe(
12667
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12366
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12668
12367
  )
12669
12368
  }).describe("The result of the Sentiment Analysis model")
12670
12369
  ).nullish().describe(
12671
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
12370
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
12672
12371
  ),
12673
12372
  speaker_labels: zod3.boolean().nullish().describe(
12674
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
12373
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
12675
12374
  ),
12676
12375
  speakers_expected: zod3.number().nullish().describe(
12677
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
12376
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
12678
12377
  ),
12679
12378
  speech_model_used: zod3.string().optional().describe(
12680
12379
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -12777,22 +12476,25 @@ var createTranscriptResponse = zod3.object({
12777
12476
  "The status of your transcript. Possible values are queued, processing, completed, or error."
12778
12477
  ),
12779
12478
  summarization: zod3.boolean().describe(
12780
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12479
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12781
12480
  ),
12782
12481
  summary: zod3.string().nullish().describe(
12783
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
12482
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
12784
12483
  ),
12785
12484
  summary_model: zod3.string().nullish().describe(
12786
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
12485
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
12787
12486
  ),
12788
12487
  summary_type: zod3.string().nullish().describe(
12789
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
12488
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
12790
12489
  ),
12791
12490
  remove_audio_tags: zod3.enum(["all"]).describe(
12792
12491
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
12793
12492
  ).or(zod3.null()).optional().describe(
12794
12493
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
12795
12494
  ),
12495
+ temperature: zod3.number().nullish().describe(
12496
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
12497
+ ),
12796
12498
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
12797
12499
  throttled: zod3.boolean().nullish().describe(
12798
12500
  "True while a request is throttled and false when a request is no longer throttled"
@@ -12813,7 +12515,7 @@ var createTranscriptResponse = zod3.object({
12813
12515
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12814
12516
  ),
12815
12517
  speaker: zod3.string().nullable().describe(
12816
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12518
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12817
12519
  )
12818
12520
  })
12819
12521
  ).describe("The words in the utterance."),
@@ -12828,7 +12530,7 @@ var createTranscriptResponse = zod3.object({
12828
12530
  )
12829
12531
  })
12830
12532
  ).nullish().describe(
12831
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
12533
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
12832
12534
  ),
12833
12535
  webhook_auth: zod3.boolean().describe(
12834
12536
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -12852,7 +12554,7 @@ var createTranscriptResponse = zod3.object({
12852
12554
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12853
12555
  ),
12854
12556
  speaker: zod3.string().nullable().describe(
12855
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12557
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12856
12558
  )
12857
12559
  })
12858
12560
  ).nullish().describe(
@@ -12925,7 +12627,7 @@ var getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = tr
12925
12627
  var getTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
12926
12628
  var getTranscriptResponse = zod3.object({
12927
12629
  audio_channels: zod3.number().optional().describe(
12928
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
12630
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
12929
12631
  ),
12930
12632
  audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
12931
12633
  audio_end_at: zod3.number().nullish().describe(
@@ -12936,10 +12638,10 @@ var getTranscriptResponse = zod3.object({
12936
12638
  ),
12937
12639
  audio_url: zod3.string().describe("The URL of the media that was transcribed"),
12938
12640
  auto_chapters: zod3.boolean().nullish().describe(
12939
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12641
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12940
12642
  ),
12941
12643
  auto_highlights: zod3.boolean().describe(
12942
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
12644
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
12943
12645
  ),
12944
12646
  auto_highlights_result: zod3.object({
12945
12647
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -12959,9 +12661,9 @@ var getTranscriptResponse = zod3.object({
12959
12661
  })
12960
12662
  ).describe("A temporally-sequential array of Key Phrases")
12961
12663
  }).describe(
12962
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12664
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12963
12665
  ).or(zod3.null()).optional().describe(
12964
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12666
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12965
12667
  ),
12966
12668
  chapters: zod3.array(
12967
12669
  zod3.object({
@@ -12974,7 +12676,7 @@ var getTranscriptResponse = zod3.object({
12974
12676
  end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
12975
12677
  }).describe("Chapter of the audio file")
12976
12678
  ).nullish().describe(
12977
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
12679
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
12978
12680
  ),
12979
12681
  confidence: zod3.number().nullish().describe(
12980
12682
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -13030,10 +12732,10 @@ var getTranscriptResponse = zod3.object({
13030
12732
  "Object containing words or phrases to replace, and the word or phrase to replace with"
13031
12733
  )
13032
12734
  ).nullish().describe(
13033
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
12735
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13034
12736
  ),
13035
12737
  disfluencies: zod3.boolean().nullish().describe(
13036
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
12738
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13037
12739
  ),
13038
12740
  domain: zod3.string().nullish().describe(
13039
12741
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13095,10 +12797,10 @@ var getTranscriptResponse = zod3.object({
13095
12797
  )
13096
12798
  }).describe("A detected entity")
13097
12799
  ).nullish().describe(
13098
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
12800
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
13099
12801
  ),
13100
12802
  entity_detection: zod3.boolean().nullish().describe(
13101
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
12803
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
13102
12804
  ),
13103
12805
  error: zod3.string().optional().describe("Error message of why the transcript failed"),
13104
12806
  filter_profanity: zod3.boolean().nullish().describe(
@@ -13108,7 +12810,7 @@ var getTranscriptResponse = zod3.object({
13108
12810
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
13109
12811
  ),
13110
12812
  iab_categories: zod3.boolean().nullish().describe(
13111
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
12813
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
13112
12814
  ),
13113
12815
  iab_categories_result: zod3.object({
13114
12816
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13131,9 +12833,9 @@ var getTranscriptResponse = zod3.object({
13131
12833
  ).describe("An array of results for the Topic Detection model"),
13132
12834
  summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
13133
12835
  }).describe(
13134
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12836
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13135
12837
  ).or(zod3.null()).optional().describe(
13136
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12838
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13137
12839
  ),
13138
12840
  id: zod3.string().uuid().describe("The unique identifier of your transcript"),
13139
12841
  keyterms_prompt: zod3.array(zod3.string()).optional().describe(
@@ -13383,7 +13085,7 @@ var getTranscriptResponse = zod3.object({
13383
13085
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
13384
13086
  ),
13385
13087
  multichannel: zod3.boolean().nullish().describe(
13386
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
13088
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
13387
13089
  ),
13388
13090
  prompt: zod3.string().optional().describe(
13389
13091
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -13466,7 +13168,7 @@ var getTranscriptResponse = zod3.object({
13466
13168
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
13467
13169
  ),
13468
13170
  sentiment_analysis: zod3.boolean().nullish().describe(
13469
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
13171
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
13470
13172
  ),
13471
13173
  sentiment_analysis_results: zod3.array(
13472
13174
  zod3.object({
@@ -13481,17 +13183,17 @@ var getTranscriptResponse = zod3.object({
13481
13183
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13482
13184
  ),
13483
13185
  speaker: zod3.string().nullable().describe(
13484
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13186
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13485
13187
  )
13486
13188
  }).describe("The result of the Sentiment Analysis model")
13487
13189
  ).nullish().describe(
13488
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
13190
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
13489
13191
  ),
13490
13192
  speaker_labels: zod3.boolean().nullish().describe(
13491
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
13193
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
13492
13194
  ),
13493
13195
  speakers_expected: zod3.number().nullish().describe(
13494
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
13196
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
13495
13197
  ),
13496
13198
  speech_model_used: zod3.string().optional().describe(
13497
13199
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -13594,22 +13296,25 @@ var getTranscriptResponse = zod3.object({
13594
13296
  "The status of your transcript. Possible values are queued, processing, completed, or error."
13595
13297
  ),
13596
13298
  summarization: zod3.boolean().describe(
13597
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13299
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13598
13300
  ),
13599
13301
  summary: zod3.string().nullish().describe(
13600
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
13302
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13601
13303
  ),
13602
13304
  summary_model: zod3.string().nullish().describe(
13603
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
13305
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13604
13306
  ),
13605
13307
  summary_type: zod3.string().nullish().describe(
13606
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
13308
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13607
13309
  ),
13608
13310
  remove_audio_tags: zod3.enum(["all"]).describe(
13609
13311
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13610
13312
  ).or(zod3.null()).optional().describe(
13611
13313
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13612
13314
  ),
13315
+ temperature: zod3.number().nullish().describe(
13316
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13317
+ ),
13613
13318
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
13614
13319
  throttled: zod3.boolean().nullish().describe(
13615
13320
  "True while a request is throttled and false when a request is no longer throttled"
@@ -13630,7 +13335,7 @@ var getTranscriptResponse = zod3.object({
13630
13335
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13631
13336
  ),
13632
13337
  speaker: zod3.string().nullable().describe(
13633
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13338
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13634
13339
  )
13635
13340
  })
13636
13341
  ).describe("The words in the utterance."),
@@ -13645,7 +13350,7 @@ var getTranscriptResponse = zod3.object({
13645
13350
  )
13646
13351
  })
13647
13352
  ).nullish().describe(
13648
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
13353
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13649
13354
  ),
13650
13355
  webhook_auth: zod3.boolean().describe(
13651
13356
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -13669,7 +13374,7 @@ var getTranscriptResponse = zod3.object({
13669
13374
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13670
13375
  ),
13671
13376
  speaker: zod3.string().nullable().describe(
13672
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13377
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13673
13378
  )
13674
13379
  })
13675
13380
  ).nullish().describe(
@@ -13702,7 +13407,7 @@ var deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
13702
13407
  var deleteTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
13703
13408
  var deleteTranscriptResponse = zod3.object({
13704
13409
  audio_channels: zod3.number().optional().describe(
13705
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
13410
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
13706
13411
  ),
13707
13412
  audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
13708
13413
  audio_end_at: zod3.number().nullish().describe(
@@ -13713,10 +13418,10 @@ var deleteTranscriptResponse = zod3.object({
13713
13418
  ),
13714
13419
  audio_url: zod3.string().describe("The URL of the media that was transcribed"),
13715
13420
  auto_chapters: zod3.boolean().nullish().describe(
13716
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13421
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13717
13422
  ),
13718
13423
  auto_highlights: zod3.boolean().describe(
13719
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
13424
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
13720
13425
  ),
13721
13426
  auto_highlights_result: zod3.object({
13722
13427
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13736,9 +13441,9 @@ var deleteTranscriptResponse = zod3.object({
13736
13441
  })
13737
13442
  ).describe("A temporally-sequential array of Key Phrases")
13738
13443
  }).describe(
13739
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
13444
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13740
13445
  ).or(zod3.null()).optional().describe(
13741
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
13446
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13742
13447
  ),
13743
13448
  chapters: zod3.array(
13744
13449
  zod3.object({
@@ -13751,7 +13456,7 @@ var deleteTranscriptResponse = zod3.object({
13751
13456
  end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
13752
13457
  }).describe("Chapter of the audio file")
13753
13458
  ).nullish().describe(
13754
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
13459
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
13755
13460
  ),
13756
13461
  confidence: zod3.number().nullish().describe(
13757
13462
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -13807,10 +13512,10 @@ var deleteTranscriptResponse = zod3.object({
13807
13512
  "Object containing words or phrases to replace, and the word or phrase to replace with"
13808
13513
  )
13809
13514
  ).nullish().describe(
13810
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
13515
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13811
13516
  ),
13812
13517
  disfluencies: zod3.boolean().nullish().describe(
13813
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
13518
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13814
13519
  ),
13815
13520
  domain: zod3.string().nullish().describe(
13816
13521
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13872,10 +13577,10 @@ var deleteTranscriptResponse = zod3.object({
13872
13577
  )
13873
13578
  }).describe("A detected entity")
13874
13579
  ).nullish().describe(
13875
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
13580
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
13876
13581
  ),
13877
13582
  entity_detection: zod3.boolean().nullish().describe(
13878
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
13583
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
13879
13584
  ),
13880
13585
  error: zod3.string().optional().describe("Error message of why the transcript failed"),
13881
13586
  filter_profanity: zod3.boolean().nullish().describe(
@@ -13885,7 +13590,7 @@ var deleteTranscriptResponse = zod3.object({
13885
13590
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
13886
13591
  ),
13887
13592
  iab_categories: zod3.boolean().nullish().describe(
13888
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
13593
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
13889
13594
  ),
13890
13595
  iab_categories_result: zod3.object({
13891
13596
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13908,9 +13613,9 @@ var deleteTranscriptResponse = zod3.object({
13908
13613
  ).describe("An array of results for the Topic Detection model"),
13909
13614
  summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
13910
13615
  }).describe(
13911
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13616
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13912
13617
  ).or(zod3.null()).optional().describe(
13913
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13618
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13914
13619
  ),
13915
13620
  id: zod3.string().uuid().describe("The unique identifier of your transcript"),
13916
13621
  keyterms_prompt: zod3.array(zod3.string()).optional().describe(
@@ -14160,7 +13865,7 @@ var deleteTranscriptResponse = zod3.object({
14160
13865
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
14161
13866
  ),
14162
13867
  multichannel: zod3.boolean().nullish().describe(
14163
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
13868
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
14164
13869
  ),
14165
13870
  prompt: zod3.string().optional().describe(
14166
13871
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -14243,7 +13948,7 @@ var deleteTranscriptResponse = zod3.object({
14243
13948
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
14244
13949
  ),
14245
13950
  sentiment_analysis: zod3.boolean().nullish().describe(
14246
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
13951
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
14247
13952
  ),
14248
13953
  sentiment_analysis_results: zod3.array(
14249
13954
  zod3.object({
@@ -14258,17 +13963,17 @@ var deleteTranscriptResponse = zod3.object({
14258
13963
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14259
13964
  ),
14260
13965
  speaker: zod3.string().nullable().describe(
14261
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13966
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14262
13967
  )
14263
13968
  }).describe("The result of the Sentiment Analysis model")
14264
13969
  ).nullish().describe(
14265
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
13970
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
14266
13971
  ),
14267
13972
  speaker_labels: zod3.boolean().nullish().describe(
14268
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
13973
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
14269
13974
  ),
14270
13975
  speakers_expected: zod3.number().nullish().describe(
14271
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
13976
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
14272
13977
  ),
14273
13978
  speech_model_used: zod3.string().optional().describe(
14274
13979
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -14371,22 +14076,25 @@ var deleteTranscriptResponse = zod3.object({
14371
14076
  "The status of your transcript. Possible values are queued, processing, completed, or error."
14372
14077
  ),
14373
14078
  summarization: zod3.boolean().describe(
14374
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
14079
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
14375
14080
  ),
14376
14081
  summary: zod3.string().nullish().describe(
14377
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
14082
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14378
14083
  ),
14379
14084
  summary_model: zod3.string().nullish().describe(
14380
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
14085
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14381
14086
  ),
14382
14087
  summary_type: zod3.string().nullish().describe(
14383
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
14088
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14384
14089
  ),
14385
14090
  remove_audio_tags: zod3.enum(["all"]).describe(
14386
14091
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14387
14092
  ).or(zod3.null()).optional().describe(
14388
14093
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14389
14094
  ),
14095
+ temperature: zod3.number().nullish().describe(
14096
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
14097
+ ),
14390
14098
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
14391
14099
  throttled: zod3.boolean().nullish().describe(
14392
14100
  "True while a request is throttled and false when a request is no longer throttled"
@@ -14407,7 +14115,7 @@ var deleteTranscriptResponse = zod3.object({
14407
14115
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14408
14116
  ),
14409
14117
  speaker: zod3.string().nullable().describe(
14410
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14118
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14411
14119
  )
14412
14120
  })
14413
14121
  ).describe("The words in the utterance."),
@@ -14422,7 +14130,7 @@ var deleteTranscriptResponse = zod3.object({
14422
14130
  )
14423
14131
  })
14424
14132
  ).nullish().describe(
14425
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
14133
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
14426
14134
  ),
14427
14135
  webhook_auth: zod3.boolean().describe(
14428
14136
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -14446,7 +14154,7 @@ var deleteTranscriptResponse = zod3.object({
14446
14154
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14447
14155
  ),
14448
14156
  speaker: zod3.string().nullable().describe(
14449
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14157
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14450
14158
  )
14451
14159
  })
14452
14160
  ).nullish().describe(
@@ -14491,7 +14199,7 @@ var getTranscriptSentencesResponse = zod3.object({
14491
14199
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14492
14200
  ),
14493
14201
  speaker: zod3.string().nullable().describe(
14494
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14202
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14495
14203
  )
14496
14204
  })
14497
14205
  ).describe("An array of words in the sentence"),
@@ -14499,7 +14207,7 @@ var getTranscriptSentencesResponse = zod3.object({
14499
14207
  "The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14500
14208
  ),
14501
14209
  speaker: zod3.string().nullable().describe(
14502
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14210
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14503
14211
  )
14504
14212
  })
14505
14213
  ).describe("An array of sentences in the transcript")
@@ -14527,7 +14235,7 @@ var getTranscriptParagraphsResponse = zod3.object({
14527
14235
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14528
14236
  ),
14529
14237
  speaker: zod3.string().nullable().describe(
14530
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14238
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14531
14239
  )
14532
14240
  })
14533
14241
  ).describe("An array of words in the paragraph")
@@ -17139,23 +16847,6 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
17139
16847
  }).optional().describe(
17140
16848
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
17141
16849
  ),
17142
- speaker_reidentification: zod5.object({
17143
- success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
17144
- is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
17145
- exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
17146
- error: zod5.object({
17147
- status_code: zod5.number().describe("Status code of the addon error"),
17148
- exception: zod5.string().describe("Reason of the addon error"),
17149
- message: zod5.string().describe("Detailed message of the addon error")
17150
- }).nullable().describe(
17151
- "`null` if `success` is `true`. Contains the error details of the failed model"
17152
- ),
17153
- results: zod5.string().describe(
17154
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
17155
- )
17156
- }).optional().describe(
17157
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
17158
- ),
17159
16850
  structured_data_extraction: zod5.object({
17160
16851
  success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
17161
16852
  is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
@@ -18636,23 +18327,6 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
18636
18327
  }).optional().describe(
18637
18328
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
18638
18329
  ),
18639
- speaker_reidentification: zod5.object({
18640
- success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
18641
- is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
18642
- exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
18643
- error: zod5.object({
18644
- status_code: zod5.number().describe("Status code of the addon error"),
18645
- exception: zod5.string().describe("Reason of the addon error"),
18646
- message: zod5.string().describe("Detailed message of the addon error")
18647
- }).nullable().describe(
18648
- "`null` if `success` is `true`. Contains the error details of the failed model"
18649
- ),
18650
- results: zod5.string().describe(
18651
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
18652
- )
18653
- }).optional().describe(
18654
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
18655
- ),
18656
18330
  structured_data_extraction: zod5.object({
18657
18331
  success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
18658
18332
  is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
@@ -20790,23 +20464,6 @@ var transcriptionControllerListV2Response = zod5.object({
20790
20464
  }).optional().describe(
20791
20465
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
20792
20466
  ),
20793
- speaker_reidentification: zod5.object({
20794
- success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
20795
- is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
20796
- exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
20797
- error: zod5.object({
20798
- status_code: zod5.number().describe("Status code of the addon error"),
20799
- exception: zod5.string().describe("Reason of the addon error"),
20800
- message: zod5.string().describe("Detailed message of the addon error")
20801
- }).nullable().describe(
20802
- "`null` if `success` is `true`. Contains the error details of the failed model"
20803
- ),
20804
- results: zod5.string().describe(
20805
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
20806
- )
20807
- }).optional().describe(
20808
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
20809
- ),
20810
20467
  structured_data_extraction: zod5.object({
20811
20468
  success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
20812
20469
  is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
@@ -21106,11 +20763,7 @@ var transcriptionControllerListV2Response = zod5.object({
21106
20763
  channels: zod5.number().min(1).max(transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsMax).default(
21107
20764
  transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsDefault
21108
20765
  ).describe("The number of channels of the audio stream"),
21109
- model: zod5.enum(["solaria-1"]).describe(
21110
- 'The model used to process the audio. "solaria-1" is used by default.'
21111
- ).default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe(
21112
- 'The model used to process the audio. "solaria-1" is used by default.'
21113
- ),
20766
+ model: zod5.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
21114
20767
  endpointing: zod5.number().min(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMin).max(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMax).default(
21115
20768
  transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingDefault
21116
20769
  ).describe(
@@ -23534,23 +23187,6 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
23534
23187
  }).optional().describe(
23535
23188
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
23536
23189
  ),
23537
- speaker_reidentification: zod5.object({
23538
- success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
23539
- is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
23540
- exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
23541
- error: zod5.object({
23542
- status_code: zod5.number().describe("Status code of the addon error"),
23543
- exception: zod5.string().describe("Reason of the addon error"),
23544
- message: zod5.string().describe("Detailed message of the addon error")
23545
- }).nullable().describe(
23546
- "`null` if `success` is `true`. Contains the error details of the failed model"
23547
- ),
23548
- results: zod5.string().describe(
23549
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
23550
- )
23551
- }).optional().describe(
23552
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
23553
- ),
23554
23190
  structured_data_extraction: zod5.object({
23555
23191
  success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
23556
23192
  is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
@@ -26716,23 +26352,6 @@ var historyControllerGetListV1Response = zod5.object({
26716
26352
  }).optional().describe(
26717
26353
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
26718
26354
  ),
26719
- speaker_reidentification: zod5.object({
26720
- success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
26721
- is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
26722
- exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
26723
- error: zod5.object({
26724
- status_code: zod5.number().describe("Status code of the addon error"),
26725
- exception: zod5.string().describe("Reason of the addon error"),
26726
- message: zod5.string().describe("Detailed message of the addon error")
26727
- }).nullable().describe(
26728
- "`null` if `success` is `true`. Contains the error details of the failed model"
26729
- ),
26730
- results: zod5.string().describe(
26731
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
26732
- )
26733
- }).optional().describe(
26734
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
26735
- ),
26736
26355
  structured_data_extraction: zod5.object({
26737
26356
  success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
26738
26357
  is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
@@ -27026,11 +26645,7 @@ var historyControllerGetListV1Response = zod5.object({
27026
26645
  historyControllerGetListV1ResponseItemsItemRequestParamsSampleRateDefault
27027
26646
  ).describe("The sample rate of the audio stream"),
27028
26647
  channels: zod5.number().min(1).max(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsMax).default(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsDefault).describe("The number of channels of the audio stream"),
27029
- model: zod5.enum(["solaria-1"]).describe(
27030
- 'The model used to process the audio. "solaria-1" is used by default.'
27031
- ).default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe(
27032
- 'The model used to process the audio. "solaria-1" is used by default.'
27033
- ),
26648
+ model: zod5.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
27034
26649
  endpointing: zod5.number().min(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMin).max(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMax).default(
27035
26650
  historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingDefault
27036
26651
  ).describe(
@@ -36191,6 +35806,7 @@ __export(sonioxPublicAPI_zod_exports, {
36191
35806
  createTemporaryApiKeyBody: () => createTemporaryApiKeyBody,
36192
35807
  createTemporaryApiKeyBodyClientReferenceIdMaxOne: () => createTemporaryApiKeyBodyClientReferenceIdMaxOne,
36193
35808
  createTemporaryApiKeyBodyExpiresInSecondsMax: () => createTemporaryApiKeyBodyExpiresInSecondsMax,
35809
+ createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne: () => createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne,
36194
35810
  createTranscriptionBody: () => createTranscriptionBody2,
36195
35811
  createTranscriptionBodyAudioUrlMaxOne: () => createTranscriptionBodyAudioUrlMaxOne,
36196
35812
  createTranscriptionBodyAudioUrlRegExpOne: () => createTranscriptionBodyAudioUrlRegExpOne,
@@ -36321,11 +35937,11 @@ var getTranscriptionsResponse = zod10.object({
36321
35937
  });
36322
35938
  var createTranscriptionBodyModelMaxThree = 32;
36323
35939
  var createTranscriptionBodyAudioUrlMaxOne = 4096;
36324
- var createTranscriptionBodyAudioUrlRegExpOne = new RegExp("^https?://[^\\s]+$");
35940
+ var createTranscriptionBodyAudioUrlRegExpOne = /^https?:\/\/[^\s]+$/;
36325
35941
  var createTranscriptionBodyLanguageHintsItemMax = 10;
36326
35942
  var createTranscriptionBodyLanguageHintsMaxOne = 100;
36327
35943
  var createTranscriptionBodyWebhookUrlMaxOne = 256;
36328
- var createTranscriptionBodyWebhookUrlRegExpOne = new RegExp("^https?://[^\\s]+$");
35944
+ var createTranscriptionBodyWebhookUrlRegExpOne = /^https?:\/\/[^\s]+$/;
36329
35945
  var createTranscriptionBodyWebhookAuthHeaderNameMaxOne = 256;
36330
35946
  var createTranscriptionBodyWebhookAuthHeaderValueMaxOne = 256;
36331
35947
  var createTranscriptionBodyClientReferenceIdMaxOne = 256;
@@ -36473,22 +36089,25 @@ var getModelsResponse = zod10.object({
36473
36089
  });
36474
36090
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
36475
36091
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
36092
+ var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
36476
36093
  var createTemporaryApiKeyBody = zod10.object({
36477
36094
  usage_type: zod10.enum(["transcribe_websocket"]),
36478
36095
  expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
36479
- client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique.")
36096
+ client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
36097
+ single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
36098
+ max_session_duration_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne).or(zod10.null()).optional().describe(
36099
+ "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
36100
+ )
36480
36101
  });
36481
36102
 
36482
36103
  // src/generated/soniox/streaming-types.zod.ts
36483
36104
  var streaming_types_zod_exports = {};
36484
36105
  __export(streaming_types_zod_exports, {
36485
36106
  sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
36486
- sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
36487
36107
  sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
36488
36108
  sonioxContextSchema: () => sonioxContextSchema,
36489
36109
  sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
36490
36110
  sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
36491
- sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
36492
36111
  sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
36493
36112
  sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
36494
36113
  sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
@@ -36502,7 +36121,7 @@ __export(streaming_types_zod_exports, {
36502
36121
  streamingUpdateConfigParams: () => streamingUpdateConfigParams3
36503
36122
  });
36504
36123
  import { z as zod11 } from "zod";
36505
- var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
36124
+ var sonioxAudioFormatSchema = zod11.enum([
36506
36125
  "auto",
36507
36126
  "aac",
36508
36127
  "aiff",
@@ -36512,10 +36131,7 @@ var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
36512
36131
  "mp3",
36513
36132
  "ogg",
36514
36133
  "wav",
36515
- "webm"
36516
- ]);
36517
- var sonioxPcmAudioEncodingSchema = zod11.enum([
36518
- // Signed PCM
36134
+ "webm",
36519
36135
  "pcm_s8",
36520
36136
  "pcm_s16le",
36521
36137
  "pcm_s16be",
@@ -36523,7 +36139,6 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
36523
36139
  "pcm_s24be",
36524
36140
  "pcm_s32le",
36525
36141
  "pcm_s32be",
36526
- // Unsigned PCM
36527
36142
  "pcm_u8",
36528
36143
  "pcm_u16le",
36529
36144
  "pcm_u16be",
@@ -36531,86 +36146,81 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
36531
36146
  "pcm_u24be",
36532
36147
  "pcm_u32le",
36533
36148
  "pcm_u32be",
36534
- // Float PCM
36535
36149
  "pcm_f32le",
36536
36150
  "pcm_f32be",
36537
36151
  "pcm_f64le",
36538
36152
  "pcm_f64be",
36539
- // Companded
36540
36153
  "mulaw",
36541
36154
  "alaw"
36542
36155
  ]);
36543
- var sonioxAudioFormatSchema = zod11.union([
36544
- sonioxAutoDetectedAudioFormatSchema,
36545
- sonioxPcmAudioEncodingSchema
36546
- ]);
36547
36156
  var sonioxOneWayTranslationSchema = zod11.object({
36548
36157
  type: zod11.literal("one_way"),
36549
- target_language: zod11.string().describe("Target language code for translation")
36158
+ target_language: zod11.string()
36550
36159
  });
36551
36160
  var sonioxTwoWayTranslationSchema = zod11.object({
36552
36161
  type: zod11.literal("two_way"),
36553
- language_a: zod11.string().describe("First language for bidirectional translation"),
36554
- language_b: zod11.string().describe("Second language for bidirectional translation")
36162
+ language_a: zod11.string(),
36163
+ language_b: zod11.string()
36555
36164
  });
36556
36165
  var sonioxTranslationConfigSchema = zod11.union([
36557
36166
  sonioxOneWayTranslationSchema,
36558
36167
  sonioxTwoWayTranslationSchema
36559
36168
  ]);
36560
36169
  var sonioxContextGeneralItemSchema = zod11.object({
36561
- key: zod11.string().describe("Context item key (e.g. 'Domain')"),
36562
- value: zod11.string().describe("Context item value (e.g. 'medicine')")
36170
+ key: zod11.string(),
36171
+ value: zod11.string()
36563
36172
  });
36564
36173
  var sonioxTranslationTermSchema = zod11.object({
36565
- source: zod11.string().describe("Source term"),
36566
- target: zod11.string().describe("Target term to translate to")
36174
+ source: zod11.string(),
36175
+ target: zod11.string()
36567
36176
  });
36568
36177
  var sonioxStructuredContextSchema = zod11.object({
36569
- general: zod11.array(sonioxContextGeneralItemSchema).optional().describe("General context items (key-value pairs)"),
36570
- text: zod11.string().optional().describe("Text context"),
36571
- terms: zod11.array(zod11.string()).optional().describe("Terms that might occur in speech"),
36572
- translation_terms: zod11.array(sonioxTranslationTermSchema).optional().describe("Hints how to translate specific terms (ignored if translation is not enabled)")
36178
+ general: zod11.array(sonioxContextGeneralItemSchema).optional(),
36179
+ text: zod11.string().optional(),
36180
+ terms: zod11.array(zod11.string()).optional(),
36181
+ translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
36573
36182
  });
36574
36183
  var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
36575
36184
  var sonioxRealtimeModelSchema = zod11.enum([
36185
+ "stt-rt-v4",
36576
36186
  "stt-rt-v3",
36577
36187
  "stt-rt-preview",
36578
36188
  "stt-rt-v3-preview",
36579
36189
  "stt-rt-preview-v2"
36580
36190
  ]);
36581
36191
  var streamingTranscriberParams3 = zod11.object({
36582
- model: sonioxRealtimeModelSchema.describe("Real-time model to use"),
36583
- audioFormat: sonioxAudioFormatSchema.optional().describe("Audio format specification. Use 'auto' for automatic detection"),
36584
- sampleRate: zod11.number().optional().describe("Sample rate in Hz (required for raw PCM formats)"),
36585
- numChannels: zod11.number().min(1).max(2).optional().describe("Number of audio channels (1 for mono, 2 for stereo) - required for raw PCM formats"),
36586
- languageHints: zod11.array(zod11.string()).optional().describe("Expected languages in the audio (ISO language codes)"),
36587
- context: sonioxContextSchema.optional().describe("Additional context to improve transcription accuracy"),
36588
- enableSpeakerDiarization: zod11.boolean().optional().describe("Enable speaker diarization - each token will include a speaker field"),
36589
- enableLanguageIdentification: zod11.boolean().optional().describe("Enable language identification - each token will include a language field"),
36590
- enableEndpointDetection: zod11.boolean().optional().describe("Enable endpoint detection to detect when a speaker has finished talking"),
36591
- translation: sonioxTranslationConfigSchema.optional().describe("Translation configuration"),
36592
- clientReferenceId: zod11.string().optional().describe("Optional tracking identifier (client-defined)")
36593
- });
36594
- var sonioxTranslationStatusSchema = zod11.enum(["none", "original", "translation"]);
36192
+ model: sonioxRealtimeModelSchema,
36193
+ audioFormat: sonioxAudioFormatSchema.optional(),
36194
+ sampleRate: zod11.number().optional(),
36195
+ numChannels: zod11.number().optional(),
36196
+ languageHints: zod11.array(zod11.string()).optional(),
36197
+ context: sonioxContextSchema.optional(),
36198
+ enableSpeakerDiarization: zod11.boolean().optional(),
36199
+ enableLanguageIdentification: zod11.boolean().optional(),
36200
+ enableEndpointDetection: zod11.boolean().optional(),
36201
+ translation: sonioxTranslationConfigSchema.optional(),
36202
+ clientReferenceId: zod11.string().optional()
36203
+ });
36204
+ var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
36595
36205
  var sonioxTokenSchema = zod11.object({
36596
- text: zod11.string().describe("Token text content (subword, word, or space)"),
36597
- start_ms: zod11.number().optional().describe("Start time of the token in milliseconds"),
36598
- end_ms: zod11.number().optional().describe("End time of the token in milliseconds"),
36599
- confidence: zod11.number().min(0).max(1).optional().describe("Confidence score between 0.0 and 1.0"),
36600
- is_final: zod11.boolean().describe("Whether this token is final (confirmed) or provisional"),
36601
- speaker: zod11.string().optional().describe("Speaker identifier (only present when speaker diarization is enabled)"),
36602
- language: zod11.string().optional().describe("Detected language code (only present when language identification is enabled)"),
36603
- source_language: zod11.string().optional().describe("Original language code for translated tokens"),
36604
- translation_status: sonioxTranslationStatusSchema.optional().describe("Translation status: 'none', 'original', or 'translation'")
36206
+ text: zod11.string(),
36207
+ start_ms: zod11.number().optional(),
36208
+ end_ms: zod11.number().optional(),
36209
+ confidence: zod11.number(),
36210
+ is_final: zod11.boolean(),
36211
+ speaker: zod11.string().optional(),
36212
+ translation_status: sonioxTranslationStatusSchema.optional(),
36213
+ language: zod11.string().optional(),
36214
+ source_language: zod11.string().optional()
36605
36215
  });
36606
36216
  var sonioxStreamingResponseSchema = zod11.object({
36607
- text: zod11.string().optional().describe("Complete transcribed text"),
36608
- tokens: zod11.array(sonioxTokenSchema).describe("List of recognized tokens"),
36609
- final_audio_proc_ms: zod11.number().optional().describe("Milliseconds of audio processed into final tokens"),
36610
- total_audio_proc_ms: zod11.number().optional().describe("Milliseconds of audio processed (final + non-final)"),
36611
- finished: zod11.boolean().optional().describe("Whether the transcription is complete"),
36612
- error: zod11.string().optional().describe("Error message if an error occurred"),
36613
- error_code: zod11.number().optional().describe("Error code if an error occurred")
36217
+ text: zod11.string(),
36218
+ tokens: zod11.array(sonioxTokenSchema),
36219
+ final_audio_proc_ms: zod11.number(),
36220
+ total_audio_proc_ms: zod11.number(),
36221
+ finished: zod11.boolean().optional(),
36222
+ error_code: zod11.number().optional(),
36223
+ error_message: zod11.string().optional()
36614
36224
  });
36615
36225
  var sonioxRecorderStateSchema = zod11.enum([
36616
36226
  "Init",
@@ -37176,8 +36786,8 @@ var BatchOnlyProviders = AllProviders.filter(
37176
36786
  );
37177
36787
 
37178
36788
  // src/generated/deepgram/schema/index.ts
37179
- var schema_exports4 = {};
37180
- __export(schema_exports4, {
36789
+ var schema_exports5 = {};
36790
+ __export(schema_exports5, {
37181
36791
  V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
37182
36792
  V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
37183
36793
  V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
@@ -37432,8 +37042,8 @@ var V1SpeakPostParametersSampleRate = {
37432
37042
  };
37433
37043
 
37434
37044
  // src/generated/openai/schema/index.ts
37435
- var schema_exports5 = {};
37436
- __export(schema_exports5, {
37045
+ var schema_exports6 = {};
37046
+ __export(schema_exports6, {
37437
37047
  AudioResponseFormat: () => AudioResponseFormat,
37438
37048
  CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
37439
37049
  CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
@@ -37727,6 +37337,16 @@ var ToolChoiceOptions = {
37727
37337
  required: "required"
37728
37338
  };
37729
37339
 
37340
+ // src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
37341
+ var TranscriptionDiarizedSegmentType = {
37342
+ transcripttextsegment: "transcript.text.segment"
37343
+ };
37344
+
37345
+ // src/generated/openai/schema/transcriptionInclude.ts
37346
+ var TranscriptionInclude = {
37347
+ logprobs: "logprobs"
37348
+ };
37349
+
37730
37350
  // src/generated/openai/schema/transcriptTextDeltaEventType.ts
37731
37351
  var TranscriptTextDeltaEventType = {
37732
37352
  transcripttextdelta: "transcript.text.delta"
@@ -37752,16 +37372,6 @@ var TranscriptTextUsageTokensType = {
37752
37372
  tokens: "tokens"
37753
37373
  };
37754
37374
 
37755
- // src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
37756
- var TranscriptionDiarizedSegmentType = {
37757
- transcripttextsegment: "transcript.text.segment"
37758
- };
37759
-
37760
- // src/generated/openai/schema/transcriptionInclude.ts
37761
- var TranscriptionInclude = {
37762
- logprobs: "logprobs"
37763
- };
37764
-
37765
37375
  // src/generated/openai/schema/vadConfigType.ts
37766
37376
  var VadConfigType = {
37767
37377
  server_vad: "server_vad"
@@ -37773,8 +37383,8 @@ var VoiceResourceObject = {
37773
37383
  };
37774
37384
 
37775
37385
  // src/generated/speechmatics/schema/index.ts
37776
- var schema_exports6 = {};
37777
- __export(schema_exports6, {
37386
+ var schema_exports7 = {};
37387
+ __export(schema_exports7, {
37778
37388
  AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
37779
37389
  ErrorResponseError: () => ErrorResponseError,
37780
37390
  GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
@@ -37963,32 +37573,6 @@ var WrittenFormRecognitionResultType = {
37963
37573
  word: "word"
37964
37574
  };
37965
37575
 
37966
- // src/generated/soniox/schema/index.ts
37967
- var schema_exports7 = {};
37968
- __export(schema_exports7, {
37969
- TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
37970
- TranscriptionMode: () => TranscriptionMode,
37971
- TranscriptionStatus: () => TranscriptionStatus,
37972
- TranslationConfigType: () => TranslationConfigType
37973
- });
37974
-
37975
- // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
37976
- var TemporaryApiKeyUsageType = {
37977
- transcribe_websocket: "transcribe_websocket"
37978
- };
37979
-
37980
- // src/generated/soniox/schema/transcriptionMode.ts
37981
- var TranscriptionMode = {
37982
- real_time: "real_time",
37983
- async: "async"
37984
- };
37985
-
37986
- // src/generated/soniox/schema/translationConfigType.ts
37987
- var TranslationConfigType = {
37988
- one_way: "one_way",
37989
- two_way: "two_way"
37990
- };
37991
-
37992
37576
  // src/generated/elevenlabs/schema/index.ts
37993
37577
  var schema_exports8 = {};
37994
37578
  __export(schema_exports8, {
@@ -38143,8 +37727,8 @@ var getJobsQueryParams = zod12.object({
38143
37727
  var getJobsResponseJobsItemDurationMin = 0;
38144
37728
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38145
37729
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38146
- var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38147
- var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
37730
+ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
37731
+ var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38148
37732
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38149
37733
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38150
37734
  var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38342,8 +37926,8 @@ var getJobsJobidParams = zod12.object({
38342
37926
  var getJobsJobidResponseJobDurationMin = 0;
38343
37927
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38344
37928
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38345
- var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38346
- var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
37929
+ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
37930
+ var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38347
37931
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38348
37932
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38349
37933
  var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38540,8 +38124,8 @@ var deleteJobsJobidQueryParams = zod12.object({
38540
38124
  var deleteJobsJobidResponseJobDurationMin = 0;
38541
38125
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38542
38126
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38543
- var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38544
- var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
38127
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38128
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38545
38129
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38546
38130
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38547
38131
  var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38744,8 +38328,8 @@ var getJobsJobidTranscriptQueryParams = zod12.object({
38744
38328
  var getJobsJobidTranscriptResponseJobDurationMin = 0;
38745
38329
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38746
38330
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38747
- var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38748
- var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
38331
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38332
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38749
38333
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38750
38334
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38751
38335
  var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
@@ -39692,7 +39276,7 @@ export {
39692
39276
  DeepgramTTSSampleRate,
39693
39277
  DeepgramTopicMode,
39694
39278
  DeepgramTranscriptionSchema,
39695
- schema_exports4 as DeepgramTypes,
39279
+ schema_exports5 as DeepgramTypes,
39696
39280
  deepgramAPI_zod_exports as DeepgramZodSchemas,
39697
39281
  ElevenLabsAdapter,
39698
39282
  ElevenLabsCapabilities,
@@ -39729,7 +39313,7 @@ export {
39729
39313
  OpenAIResponseFormat,
39730
39314
  streaming_types_exports as OpenAIStreamingTypes,
39731
39315
  OpenAITranscriptionSchema,
39732
- schema_exports5 as OpenAITypes,
39316
+ schema_exports6 as OpenAITypes,
39733
39317
  OpenAIWhisperAdapter,
39734
39318
  openAIAudioRealtimeAPI_zod_exports as OpenAIZodSchemas,
39735
39319
  ProfanityFilterMode,
@@ -39758,7 +39342,7 @@ export {
39758
39342
  SonioxStreamingUpdateSchema,
39759
39343
  streaming_types_zod_exports as SonioxStreamingZodSchemas,
39760
39344
  SonioxTranscriptionSchema,
39761
- schema_exports7 as SonioxTypes,
39345
+ schema_exports4 as SonioxTypes,
39762
39346
  SpeakV1ContainerParameter,
39763
39347
  SpeakV1EncodingParameter,
39764
39348
  SpeakV1SampleRateParameter,
@@ -39773,7 +39357,7 @@ export {
39773
39357
  SpeechmaticsStreamingSchema,
39774
39358
  SpeechmaticsStreamingUpdateSchema,
39775
39359
  SpeechmaticsTranscriptionSchema,
39776
- schema_exports6 as SpeechmaticsTypes,
39360
+ schema_exports7 as SpeechmaticsTypes,
39777
39361
  speechmaticsASRRESTAPI_zod_exports as SpeechmaticsZodSchemas,
39778
39362
  StreamingProviders,
39779
39363
  StreamingSupportedBitDepthEnum,