voice-router-dev 0.8.9 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -82,7 +82,7 @@ __export(src_exports, {
82
82
  DeepgramTTSSampleRate: () => DeepgramTTSSampleRate,
83
83
  DeepgramTopicMode: () => DeepgramTopicMode,
84
84
  DeepgramTranscriptionSchema: () => DeepgramTranscriptionSchema,
85
- DeepgramTypes: () => schema_exports4,
85
+ DeepgramTypes: () => schema_exports5,
86
86
  DeepgramZodSchemas: () => deepgramAPI_zod_exports,
87
87
  ElevenLabsAdapter: () => ElevenLabsAdapter,
88
88
  ElevenLabsCapabilities: () => ElevenLabsCapabilities,
@@ -119,7 +119,7 @@ __export(src_exports, {
119
119
  OpenAIResponseFormat: () => OpenAIResponseFormat,
120
120
  OpenAIStreamingTypes: () => streaming_types_exports,
121
121
  OpenAITranscriptionSchema: () => OpenAITranscriptionSchema,
122
- OpenAITypes: () => schema_exports5,
122
+ OpenAITypes: () => schema_exports6,
123
123
  OpenAIWhisperAdapter: () => OpenAIWhisperAdapter,
124
124
  OpenAIZodSchemas: () => openAIAudioRealtimeAPI_zod_exports,
125
125
  ProfanityFilterMode: () => ProfanityFilterMode,
@@ -148,7 +148,7 @@ __export(src_exports, {
148
148
  SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
149
149
  SonioxStreamingZodSchemas: () => streaming_types_zod_exports,
150
150
  SonioxTranscriptionSchema: () => SonioxTranscriptionSchema,
151
- SonioxTypes: () => schema_exports7,
151
+ SonioxTypes: () => schema_exports4,
152
152
  SpeakV1ContainerParameter: () => SpeakV1ContainerParameter,
153
153
  SpeakV1EncodingParameter: () => SpeakV1EncodingParameter,
154
154
  SpeakV1SampleRateParameter: () => SpeakV1SampleRateParameter,
@@ -163,7 +163,7 @@ __export(src_exports, {
163
163
  SpeechmaticsStreamingSchema: () => SpeechmaticsStreamingSchema,
164
164
  SpeechmaticsStreamingUpdateSchema: () => SpeechmaticsStreamingUpdateSchema,
165
165
  SpeechmaticsTranscriptionSchema: () => SpeechmaticsTranscriptionSchema,
166
- SpeechmaticsTypes: () => schema_exports6,
166
+ SpeechmaticsTypes: () => schema_exports7,
167
167
  SpeechmaticsZodSchemas: () => speechmaticsASRRESTAPI_zod_exports,
168
168
  StreamingProviders: () => StreamingProviders,
169
169
  StreamingSupportedBitDepthEnum: () => StreamingSupportedBitDepthEnum,
@@ -983,60 +983,60 @@ var SonioxLanguage = {
983
983
  // src/generated/soniox/models.ts
984
984
  var SonioxModels = [
985
985
  { id: "stt-rt-v4", name: "Speech-to-Text Real-time v4", mode: "real_time" },
986
- { id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time" },
987
986
  { id: "stt-async-v4", name: "Speech-to-Text Async v4", mode: "async" },
988
- { id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async" },
989
- { id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v3" },
990
- { id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v3" },
991
- { id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v3" },
992
- { id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v3" },
993
- { id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v3" }
987
+ { id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
988
+ { id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v4" },
989
+ { id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
990
+ { id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v4" },
991
+ { id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v4" },
992
+ { id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time", aliasOf: "stt-rt-v4" },
993
+ { id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async", aliasOf: "stt-async-v4" }
994
994
  ];
995
995
  var SonioxModelCodes = [
996
996
  "stt-rt-v4",
997
- "stt-rt-v3",
998
997
  "stt-async-v4",
999
- "stt-async-v3",
1000
998
  "stt-rt-preview",
1001
999
  "stt-async-preview",
1002
1000
  "stt-rt-v3-preview",
1003
1001
  "stt-rt-preview-v2",
1004
- "stt-async-preview-v1"
1002
+ "stt-async-preview-v1",
1003
+ "stt-rt-v3",
1004
+ "stt-async-v3"
1005
1005
  ];
1006
1006
  var SonioxModelLabels = {
1007
1007
  "stt-rt-v4": "Speech-to-Text Real-time v4",
1008
- "stt-rt-v3": "Speech-to-Text Real-time v3",
1009
1008
  "stt-async-v4": "Speech-to-Text Async v4",
1010
- "stt-async-v3": "Speech-to-Text Async v3",
1011
1009
  "stt-rt-preview": "Speech-to-Text Real-time Preview",
1012
1010
  "stt-async-preview": "Speech-to-Text Async Preview",
1013
1011
  "stt-rt-v3-preview": "Speech-to-Text Real-time v3 Preview",
1014
1012
  "stt-rt-preview-v2": "Speech-to-Text Real-time Preview v2",
1015
- "stt-async-preview-v1": "Speech-to-Text Async Preview v1"
1013
+ "stt-async-preview-v1": "Speech-to-Text Async Preview v1",
1014
+ "stt-rt-v3": "Speech-to-Text Real-time v3",
1015
+ "stt-async-v3": "Speech-to-Text Async v3"
1016
1016
  };
1017
1017
  var SonioxModel = {
1018
1018
  stt_rt_v4: "stt-rt-v4",
1019
- stt_rt_v3: "stt-rt-v3",
1020
1019
  stt_async_v4: "stt-async-v4",
1021
- stt_async_v3: "stt-async-v3",
1022
1020
  stt_rt_preview: "stt-rt-preview",
1023
1021
  stt_async_preview: "stt-async-preview",
1024
1022
  stt_rt_v3_preview: "stt-rt-v3-preview",
1025
1023
  stt_rt_preview_v2: "stt-rt-preview-v2",
1026
- stt_async_preview_v1: "stt-async-preview-v1"
1024
+ stt_async_preview_v1: "stt-async-preview-v1",
1025
+ stt_rt_v3: "stt-rt-v3",
1026
+ stt_async_v3: "stt-async-v3"
1027
1027
  };
1028
1028
  var SonioxRealtimeModel = {
1029
1029
  stt_rt_v4: "stt-rt-v4",
1030
- stt_rt_v3: "stt-rt-v3",
1031
1030
  stt_rt_preview: "stt-rt-preview",
1032
1031
  stt_rt_v3_preview: "stt-rt-v3-preview",
1033
- stt_rt_preview_v2: "stt-rt-preview-v2"
1032
+ stt_rt_preview_v2: "stt-rt-preview-v2",
1033
+ stt_rt_v3: "stt-rt-v3"
1034
1034
  };
1035
1035
  var SonioxAsyncModel = {
1036
1036
  stt_async_v4: "stt-async-v4",
1037
- stt_async_v3: "stt-async-v3",
1038
1037
  stt_async_preview: "stt-async-preview",
1039
- stt_async_preview_v1: "stt-async-preview-v1"
1038
+ stt_async_preview_v1: "stt-async-preview-v1",
1039
+ stt_async_v3: "stt-async-v3"
1040
1040
  };
1041
1041
 
1042
1042
  // src/generated/speechmatics/languages.ts
@@ -4011,17 +4011,17 @@ var SummaryTypesEnum = {
4011
4011
  concise: "concise"
4012
4012
  };
4013
4013
 
4014
- // src/generated/gladia/schema/transcriptMessageType.ts
4015
- var TranscriptMessageType = {
4016
- transcript: "transcript"
4017
- };
4018
-
4019
4014
  // src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
4020
4015
  var TranscriptionControllerListV2KindItem = {
4021
4016
  "pre-recorded": "pre-recorded",
4022
4017
  live: "live"
4023
4018
  };
4024
4019
 
4020
+ // src/generated/gladia/schema/transcriptMessageType.ts
4021
+ var TranscriptMessageType = {
4022
+ transcript: "transcript"
4023
+ };
4024
+
4025
4025
  // src/generated/gladia/schema/translationMessageType.ts
4026
4026
  var TranslationMessageType = {
4027
4027
  translation: "translation"
@@ -4293,7 +4293,7 @@ var WebhookTranscriptionSuccessPayloadEvent = {
4293
4293
 
4294
4294
  // src/generated/gladia/api/gladiaControlAPI.ts
4295
4295
  var preRecordedControllerInitPreRecordedJobV2 = (initTranscriptionRequest, options) => {
4296
- return import_axios.default.post(`/v2/pre-recorded`, initTranscriptionRequest, options);
4296
+ return import_axios.default.post("/v2/pre-recorded", initTranscriptionRequest, options);
4297
4297
  };
4298
4298
  var preRecordedControllerGetPreRecordedJobV2 = (id, options) => {
4299
4299
  return import_axios.default.get(`/v2/pre-recorded/${id}`, options);
@@ -4308,13 +4308,13 @@ var preRecordedControllerGetAudioV2 = (id, options) => {
4308
4308
  });
4309
4309
  };
4310
4310
  var transcriptionControllerListV2 = (params, options) => {
4311
- return import_axios.default.get(`/v2/transcription`, {
4311
+ return import_axios.default.get("/v2/transcription", {
4312
4312
  ...options,
4313
4313
  params: { ...params, ...options?.params }
4314
4314
  });
4315
4315
  };
4316
4316
  var streamingControllerInitStreamingSessionV2 = (streamingRequest, params, options) => {
4317
- return import_axios.default.post(`/v2/live`, streamingRequest, {
4317
+ return import_axios.default.post("/v2/live", streamingRequest, {
4318
4318
  ...options,
4319
4319
  params: { ...params, ...options?.params }
4320
4320
  });
@@ -4580,7 +4580,6 @@ var GladiaAdapter = class extends BaseAdapter {
4580
4580
  sentiment: result?.sentiment_analysis || void 0,
4581
4581
  audioToLlm: result?.audio_to_llm || void 0,
4582
4582
  chapters: result?.chapterization || void 0,
4583
- speakerReidentification: result?.speaker_reidentification || void 0,
4584
4583
  structuredData: result?.structured_data_extraction || void 0,
4585
4584
  customMetadata: response.custom_metadata || void 0
4586
4585
  },
@@ -5642,17 +5641,17 @@ var PiiPolicy = {
5642
5641
  zodiac_sign: "zodiac_sign"
5643
5642
  };
5644
5643
 
5644
+ // src/generated/assemblyai/schema/redactedAudioStatus.ts
5645
+ var RedactedAudioStatus = {
5646
+ redacted_audio_ready: "redacted_audio_ready"
5647
+ };
5648
+
5645
5649
  // src/generated/assemblyai/schema/redactPiiAudioQuality.ts
5646
5650
  var RedactPiiAudioQuality = {
5647
5651
  mp3: "mp3",
5648
5652
  wav: "wav"
5649
5653
  };
5650
5654
 
5651
- // src/generated/assemblyai/schema/redactedAudioStatus.ts
5652
- var RedactedAudioStatus = {
5653
- redacted_audio_ready: "redacted_audio_ready"
5654
- };
5655
-
5656
5655
  // src/generated/assemblyai/schema/sentiment.ts
5657
5656
  var Sentiment = {
5658
5657
  POSITIVE: "POSITIVE",
@@ -5716,10 +5715,10 @@ var TranscriptRemoveAudioTags = {
5716
5715
 
5717
5716
  // src/generated/assemblyai/api/assemblyAIAPI.ts
5718
5717
  var createTranscript = (transcriptParams, options) => {
5719
- return import_axios2.default.post(`/v2/transcript`, transcriptParams, options);
5718
+ return import_axios2.default.post("/v2/transcript", transcriptParams, options);
5720
5719
  };
5721
5720
  var listTranscripts = (params, options) => {
5722
- return import_axios2.default.get(`/v2/transcript`, {
5721
+ return import_axios2.default.get("/v2/transcript", {
5723
5722
  ...options,
5724
5723
  params: { ...params, ...options?.params }
5725
5724
  });
@@ -6065,23 +6064,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
6065
6064
  "AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
6066
6065
  );
6067
6066
  }
6068
- const aaiOpts = { ...options?.assemblyai };
6069
- if ("speech_model" in aaiOpts && aaiOpts.speech_model != null) {
6070
- if (!aaiOpts.speech_models) {
6071
- aaiOpts.speech_models = [aaiOpts.speech_model];
6072
- }
6073
- delete aaiOpts.speech_model;
6067
+ const passthrough = options?.assemblyai;
6068
+ let speechModels;
6069
+ if (passthrough?.speech_model != null && !passthrough.speech_models) {
6070
+ speechModels = [passthrough.speech_model];
6071
+ } else if (passthrough?.speech_models) {
6072
+ speechModels = passthrough.speech_models;
6074
6073
  }
6074
+ const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
6075
6075
  const request = {
6076
- ...aaiOpts,
6076
+ ...typedOpts,
6077
6077
  audio_url: audioUrl,
6078
6078
  // speech_models is required — default to universal-3-pro
6079
- speech_models: aaiOpts.speech_models ?? [
6080
- "universal-3-pro"
6081
- ],
6079
+ speech_models: speechModels ?? ["universal-3-pro"],
6082
6080
  // Enable punctuation and formatting by default
6083
- punctuate: aaiOpts.punctuate ?? true,
6084
- format_text: aaiOpts.format_text ?? true
6081
+ punctuate: typedOpts.punctuate ?? true,
6082
+ format_text: typedOpts.format_text ?? true
6085
6083
  };
6086
6084
  if (options) {
6087
6085
  if (options.model) {
@@ -6796,8 +6794,10 @@ var DeepgramAdapter = class extends BaseAdapter {
6796
6794
  /**
6797
6795
  * Submit audio for transcription
6798
6796
  *
6799
- * Sends audio to Deepgram API for transcription. Deepgram processes
6800
- * synchronously and returns results immediately (no polling required).
6797
+ * Sends audio to Deepgram API for transcription. Deepgram normally processes
6798
+ * synchronously and returns results immediately. When `webhookUrl` is set,
6799
+ * Deepgram can instead return an async callback acknowledgment containing a
6800
+ * request ID.
6801
6801
  *
6802
6802
  * @param audio - Audio input (URL or file buffer)
6803
6803
  * @param options - Transcription options
@@ -6848,17 +6848,59 @@ var DeepgramAdapter = class extends BaseAdapter {
6848
6848
  { params }
6849
6849
  ).then((res) => res.data);
6850
6850
  } else if (audio.type === "file") {
6851
- response = await this.client.post("/listen", audio.file, {
6852
- params,
6853
- headers: {
6854
- "Content-Type": "audio/*"
6851
+ response = await this.client.post(
6852
+ "/listen",
6853
+ audio.file,
6854
+ {
6855
+ params,
6856
+ headers: {
6857
+ "Content-Type": "audio/*"
6858
+ }
6855
6859
  }
6856
- }).then((res) => res.data);
6860
+ ).then((res) => res.data);
6857
6861
  } else {
6858
6862
  throw new Error(
6859
6863
  "Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
6860
6864
  );
6861
6865
  }
6866
+ if (options?.webhookUrl) {
6867
+ const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
6868
+ if (!requestId) {
6869
+ return {
6870
+ success: false,
6871
+ provider: this.name,
6872
+ error: {
6873
+ code: "MISSING_REQUEST_ID",
6874
+ message: "Deepgram callback mode did not return a request ID"
6875
+ },
6876
+ raw: response
6877
+ };
6878
+ }
6879
+ return {
6880
+ success: true,
6881
+ provider: this.name,
6882
+ data: {
6883
+ id: requestId,
6884
+ text: "",
6885
+ status: "queued"
6886
+ },
6887
+ tracking: {
6888
+ requestId
6889
+ },
6890
+ raw: response
6891
+ };
6892
+ }
6893
+ if (!("results" in response) || !("metadata" in response)) {
6894
+ return {
6895
+ success: false,
6896
+ provider: this.name,
6897
+ error: {
6898
+ code: "INVALID_RESPONSE",
6899
+ message: "Deepgram did not return a synchronous transcription payload"
6900
+ },
6901
+ raw: response
6902
+ };
6903
+ }
6862
6904
  return this.normalizeResponse(response);
6863
6905
  } catch (error) {
6864
6906
  return this.createErrorResponse(error);
@@ -7519,7 +7561,8 @@ var DeepgramAdapter = class extends BaseAdapter {
7519
7561
  break;
7520
7562
  }
7521
7563
  case "Metadata": {
7522
- callbacks?.onMetadata?.(message);
7564
+ const { type: _, ...metadata } = message;
7565
+ callbacks?.onMetadata?.(metadata);
7523
7566
  break;
7524
7567
  }
7525
7568
  case "Error": {
@@ -7856,13 +7899,13 @@ var TextNormalizationKind = {
7856
7899
 
7857
7900
  // src/generated/azure/api/speechServicesAPIVersion32.ts
7858
7901
  var transcriptionsList = (params, options) => {
7859
- return import_axios4.default.get(`/transcriptions`, {
7902
+ return import_axios4.default.get("/transcriptions", {
7860
7903
  ...options,
7861
7904
  params: { ...params, ...options?.params }
7862
7905
  });
7863
7906
  };
7864
7907
  var transcriptionsCreate = (transcription, options) => {
7865
- return import_axios4.default.post(`/transcriptions`, transcription, options);
7908
+ return import_axios4.default.post("/transcriptions", transcription, options);
7866
7909
  };
7867
7910
  var transcriptionsGet = (id, options) => {
7868
7911
  return import_axios4.default.get(`/transcriptions/${id}`, options);
@@ -7877,13 +7920,13 @@ var transcriptionsListFiles = (id, params, options) => {
7877
7920
  });
7878
7921
  };
7879
7922
  var webHooksList = (params, options) => {
7880
- return import_axios4.default.get(`/webhooks`, {
7923
+ return import_axios4.default.get("/webhooks", {
7881
7924
  ...options,
7882
7925
  params: { ...params, ...options?.params }
7883
7926
  });
7884
7927
  };
7885
7928
  var webHooksCreate = (webHook, options) => {
7886
- return import_axios4.default.post(`/webhooks`, webHook, options);
7929
+ return import_axios4.default.post("/webhooks", webHook, options);
7887
7930
  };
7888
7931
  var webHooksDelete = (id, options) => {
7889
7932
  return import_axios4.default.delete(`/webhooks/${id}`, options);
@@ -7955,10 +7998,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
7955
7998
  contentUrls: [audio.url],
7956
7999
  properties: this.buildTranscriptionProperties(options)
7957
8000
  };
7958
- const response = await transcriptionsCreate(
7959
- transcriptionRequest,
7960
- this.getAxiosConfig()
7961
- );
8001
+ const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
7962
8002
  const transcription = response.data;
7963
8003
  const transcriptId = transcription.self?.split("/").pop() || "";
7964
8004
  return await this.pollForCompletion(transcriptId);
@@ -8011,7 +8051,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
8011
8051
  this.getAxiosConfig()
8012
8052
  );
8013
8053
  const files = filesResponse.data?.values || [];
8014
- const resultFile = files.find((file) => file.kind === "Transcription");
8054
+ const resultFile = files.find((file) => file.kind === FileKind.Transcription);
8015
8055
  if (!resultFile?.links?.contentUrl) {
8016
8056
  return {
8017
8057
  success: false,
@@ -8235,15 +8275,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
8235
8275
  return properties;
8236
8276
  }
8237
8277
  /**
8238
- * Normalize Azure status to unified status
8278
+ * Normalize Azure status to unified status using generated AzureStatus constants
8239
8279
  */
8240
8280
  normalizeStatus(status) {
8241
- const statusStr = status?.toString().toLowerCase() || "";
8242
- if (statusStr.includes("succeeded")) return "completed";
8243
- if (statusStr.includes("running")) return "processing";
8244
- if (statusStr.includes("notstarted")) return "queued";
8245
- if (statusStr.includes("failed")) return "error";
8246
- return "queued";
8281
+ switch (status) {
8282
+ case Status.Succeeded:
8283
+ return "completed";
8284
+ case Status.Running:
8285
+ return "processing";
8286
+ case Status.Failed:
8287
+ return "error";
8288
+ case Status.NotStarted:
8289
+ default:
8290
+ return "queued";
8291
+ }
8247
8292
  }
8248
8293
  /**
8249
8294
  * Normalize Azure transcription response to unified format
@@ -8363,30 +8408,30 @@ function getAzureOpenAIRealtimeUrl(endpoint, deployment, apiVersion = "2024-10-0
8363
8408
  var import_axios6 = __toESM(require("axios"));
8364
8409
  var createTranscription = (createTranscriptionRequest, options) => {
8365
8410
  const formData = new FormData();
8366
- formData.append(`file`, createTranscriptionRequest.file);
8367
- formData.append(`model`, createTranscriptionRequest.model);
8411
+ formData.append("file", createTranscriptionRequest.file);
8412
+ formData.append("model", createTranscriptionRequest.model);
8368
8413
  if (createTranscriptionRequest.language !== void 0) {
8369
- formData.append(`language`, createTranscriptionRequest.language);
8414
+ formData.append("language", createTranscriptionRequest.language);
8370
8415
  }
8371
8416
  if (createTranscriptionRequest.prompt !== void 0) {
8372
- formData.append(`prompt`, createTranscriptionRequest.prompt);
8417
+ formData.append("prompt", createTranscriptionRequest.prompt);
8373
8418
  }
8374
8419
  if (createTranscriptionRequest.response_format !== void 0) {
8375
- formData.append(`response_format`, createTranscriptionRequest.response_format);
8420
+ formData.append("response_format", createTranscriptionRequest.response_format);
8376
8421
  }
8377
8422
  if (createTranscriptionRequest.temperature !== void 0) {
8378
- formData.append(`temperature`, createTranscriptionRequest.temperature.toString());
8423
+ formData.append("temperature", createTranscriptionRequest.temperature.toString());
8379
8424
  }
8380
8425
  if (createTranscriptionRequest.include !== void 0) {
8381
- createTranscriptionRequest.include.forEach((value) => formData.append(`include`, value));
8426
+ createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
8382
8427
  }
8383
8428
  if (createTranscriptionRequest.timestamp_granularities !== void 0) {
8384
8429
  createTranscriptionRequest.timestamp_granularities.forEach(
8385
- (value) => formData.append(`timestamp_granularities`, value)
8430
+ (value) => formData.append("timestamp_granularities", value)
8386
8431
  );
8387
8432
  }
8388
8433
  if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
8389
- formData.append(`stream`, createTranscriptionRequest.stream.toString());
8434
+ formData.append("stream", createTranscriptionRequest.stream.toString());
8390
8435
  }
8391
8436
  if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
8392
8437
  formData.append(
@@ -8396,15 +8441,15 @@ var createTranscription = (createTranscriptionRequest, options) => {
8396
8441
  }
8397
8442
  if (createTranscriptionRequest.known_speaker_names !== void 0) {
8398
8443
  createTranscriptionRequest.known_speaker_names.forEach(
8399
- (value) => formData.append(`known_speaker_names`, value)
8444
+ (value) => formData.append("known_speaker_names", value)
8400
8445
  );
8401
8446
  }
8402
8447
  if (createTranscriptionRequest.known_speaker_references !== void 0) {
8403
8448
  createTranscriptionRequest.known_speaker_references.forEach(
8404
- (value) => formData.append(`known_speaker_references`, value)
8449
+ (value) => formData.append("known_speaker_references", value)
8405
8450
  );
8406
8451
  }
8407
- return import_axios6.default.post(`/audio/transcriptions`, formData, options);
8452
+ return import_axios6.default.post("/audio/transcriptions", formData, options);
8408
8453
  };
8409
8454
 
8410
8455
  // src/generated/openai/schema/createTranscriptionRequestTimestampGranularitiesItem.ts
@@ -8493,7 +8538,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
8493
8538
  const request = {
8494
8539
  ...options?.openai,
8495
8540
  file: audioData,
8496
- // Generated type expects Blob
8497
8541
  model
8498
8542
  };
8499
8543
  if (options?.language) {
@@ -8920,7 +8964,6 @@ function createOpenAIWhisperAdapter(config) {
8920
8964
 
8921
8965
  // src/adapters/speechmatics-adapter.ts
8922
8966
  var import_axios8 = __toESM(require("axios"));
8923
- var import_ws5 = __toESM(require("ws"));
8924
8967
 
8925
8968
  // src/generated/speechmatics/schema/notificationConfigContentsItem.ts
8926
8969
  var NotificationConfigContentsItem = {
@@ -8970,7 +9013,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
8970
9013
  super(...arguments);
8971
9014
  this.name = "speechmatics";
8972
9015
  this.capabilities = {
8973
- streaming: true,
9016
+ streaming: false,
9017
+ // Batch only (streaming available via separate WebSocket API)
8974
9018
  diarization: true,
8975
9019
  wordTimestamps: true,
8976
9020
  languageDetection: false,
@@ -9105,16 +9149,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
9105
9149
  jobConfig.fetch_data = {
9106
9150
  url: audio.url
9107
9151
  };
9108
- const formData = new FormData();
9109
- formData.append("config", JSON.stringify(jobConfig));
9110
- requestBody = formData;
9111
- headers = { "Content-Type": "multipart/form-data" };
9152
+ requestBody = { config: JSON.stringify(jobConfig) };
9153
+ headers = { "Content-Type": "application/json" };
9112
9154
  } else if (audio.type === "file") {
9113
- const formData = new FormData();
9114
- formData.append("config", JSON.stringify(jobConfig));
9115
- const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
9116
- formData.append("data_file", audioBlob, audio.filename || "audio.wav");
9117
- requestBody = formData;
9155
+ requestBody = {
9156
+ config: JSON.stringify(jobConfig),
9157
+ data_file: audio.file
9158
+ };
9118
9159
  headers = { "Content-Type": "multipart/form-data" };
9119
9160
  } else {
9120
9161
  return {
@@ -9219,389 +9260,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
9219
9260
  throw error;
9220
9261
  }
9221
9262
  }
9222
- /**
9223
- * Build WebSocket URL for real-time streaming
9224
- *
9225
- * Note: Real-time API uses a different host from the batch API:
9226
- * - Batch: {region}.asr.api.speechmatics.com
9227
- * - Real-time: {region}.rt.speechmatics.com
9228
- *
9229
- * @param region - Regional endpoint identifier
9230
- * @returns WebSocket URL for real-time API
9231
- */
9232
- getRegionalWsUrl(region) {
9233
- if (this.config?.wsBaseUrl) {
9234
- return this.config.wsBaseUrl;
9235
- }
9236
- const rtRegionMap = {
9237
- eu1: "eu",
9238
- eu2: "eu",
9239
- us1: "us",
9240
- us2: "us",
9241
- au1: "eu"
9242
- // No AU RT endpoint — fall back to EU
9243
- };
9244
- const rtPrefix = rtRegionMap[region || ""] || "eu";
9245
- return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
9246
- }
9247
- /**
9248
- * Stream audio for real-time transcription via WebSocket
9249
- *
9250
- * Connects to Speechmatics' real-time API and sends audio chunks
9251
- * for transcription with results returned via callbacks.
9252
- *
9253
- * @param options - Streaming configuration options
9254
- * @param callbacks - Event callbacks for transcription results
9255
- * @returns Promise that resolves with a StreamingSession
9256
- *
9257
- * @example Basic streaming
9258
- * ```typescript
9259
- * const session = await adapter.transcribeStream({
9260
- * language: 'en',
9261
- * speechmaticsStreaming: {
9262
- * enablePartials: true,
9263
- * operatingPoint: 'enhanced'
9264
- * }
9265
- * }, {
9266
- * onTranscript: (event) => console.log(event.text),
9267
- * onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
9268
- * onError: (error) => console.error(error)
9269
- * });
9270
- *
9271
- * await session.sendAudio({ data: audioBuffer });
9272
- * await session.close();
9273
- * ```
9274
- */
9275
- async transcribeStream(options, callbacks) {
9276
- this.validateConfig();
9277
- const smOpts = options?.speechmaticsStreaming || {};
9278
- const region = smOpts.region || this.config?.region;
9279
- const wsUrl = this.getRegionalWsUrl(region);
9280
- const ws = new import_ws5.default(wsUrl, {
9281
- headers: {
9282
- Authorization: `Bearer ${this.config.apiKey}`
9283
- }
9284
- });
9285
- let sessionStatus = "connecting";
9286
- const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
9287
- let seqNo = 0;
9288
- let utteranceResults = [];
9289
- const sessionReady = new Promise((resolve, reject) => {
9290
- const timeout = setTimeout(() => {
9291
- reject(new Error("WebSocket connection timeout"));
9292
- }, 1e4);
9293
- let wsOpen = false;
9294
- ws.once("error", (error) => {
9295
- clearTimeout(timeout);
9296
- reject(error);
9297
- });
9298
- ws.once("open", () => {
9299
- wsOpen = true;
9300
- const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
9301
- const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
9302
- const startMsg = {
9303
- message: "StartRecognition",
9304
- audio_format: {
9305
- type: "raw",
9306
- encoding,
9307
- sample_rate: sampleRate
9308
- },
9309
- transcription_config: {
9310
- language: smOpts.language || options?.language || "en",
9311
- enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
9312
- }
9313
- };
9314
- const txConfig = startMsg.transcription_config;
9315
- if (smOpts.domain) txConfig.domain = smOpts.domain;
9316
- if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
9317
- if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
9318
- if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
9319
- if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
9320
- if (smOpts.diarization === "speaker" || options?.diarization) {
9321
- txConfig.diarization = "speaker";
9322
- if (smOpts.maxSpeakers) {
9323
- txConfig.speaker_diarization_config = {
9324
- max_speakers: smOpts.maxSpeakers
9325
- };
9326
- } else if (options?.speakersExpected) {
9327
- txConfig.speaker_diarization_config = {
9328
- max_speakers: options.speakersExpected
9329
- };
9330
- }
9331
- }
9332
- if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
9333
- txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
9334
- content: word
9335
- }));
9336
- } else if (options?.customVocabulary && options.customVocabulary.length > 0) {
9337
- txConfig.additional_vocab = options.customVocabulary.map((word) => ({
9338
- content: word
9339
- }));
9340
- }
9341
- if (smOpts.conversationConfig) {
9342
- txConfig.conversation_config = {
9343
- end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
9344
- };
9345
- }
9346
- const startPayload = JSON.stringify(startMsg);
9347
- if (callbacks?.onRawMessage) {
9348
- callbacks.onRawMessage({
9349
- provider: "speechmatics",
9350
- direction: "outgoing",
9351
- timestamp: Date.now(),
9352
- payload: startPayload,
9353
- messageType: "StartRecognition"
9354
- });
9355
- }
9356
- ws.send(startPayload);
9357
- });
9358
- const onMessage = (data) => {
9359
- const rawPayload = data.toString();
9360
- try {
9361
- const msg = JSON.parse(rawPayload);
9362
- if (msg.message === "RecognitionStarted") {
9363
- clearTimeout(timeout);
9364
- ws.removeListener("message", onMessage);
9365
- ws.emit("message", data);
9366
- resolve();
9367
- } else if (msg.message === "Error") {
9368
- clearTimeout(timeout);
9369
- ws.removeListener("message", onMessage);
9370
- reject(new Error(msg.reason || "Recognition failed to start"));
9371
- }
9372
- } catch {
9373
- }
9374
- };
9375
- ws.on("message", onMessage);
9376
- });
9377
- ws.on("message", (data) => {
9378
- const rawPayload = data.toString();
9379
- try {
9380
- const message = JSON.parse(rawPayload);
9381
- if (callbacks?.onRawMessage) {
9382
- callbacks.onRawMessage({
9383
- provider: "speechmatics",
9384
- direction: "incoming",
9385
- timestamp: Date.now(),
9386
- payload: rawPayload,
9387
- messageType: message.message
9388
- });
9389
- }
9390
- this.handleStreamingMessage(message, callbacks, utteranceResults);
9391
- } catch (error) {
9392
- if (callbacks?.onRawMessage) {
9393
- callbacks.onRawMessage({
9394
- provider: "speechmatics",
9395
- direction: "incoming",
9396
- timestamp: Date.now(),
9397
- payload: rawPayload,
9398
- messageType: "parse_error"
9399
- });
9400
- }
9401
- callbacks?.onError?.({
9402
- code: "PARSE_ERROR",
9403
- message: "Failed to parse WebSocket message",
9404
- details: error
9405
- });
9406
- }
9407
- });
9408
- ws.on("error", (error) => {
9409
- callbacks?.onError?.({
9410
- code: "WEBSOCKET_ERROR",
9411
- message: error.message,
9412
- details: error
9413
- });
9414
- });
9415
- ws.on("close", (code, reason) => {
9416
- sessionStatus = "closed";
9417
- callbacks?.onClose?.(code, reason.toString());
9418
- });
9419
- await sessionReady;
9420
- sessionStatus = "open";
9421
- callbacks?.onOpen?.();
9422
- return {
9423
- id: sessionId,
9424
- provider: this.name,
9425
- createdAt: /* @__PURE__ */ new Date(),
9426
- getStatus: () => sessionStatus,
9427
- sendAudio: async (chunk) => {
9428
- if (sessionStatus !== "open") {
9429
- throw new Error(`Cannot send audio: session is ${sessionStatus}`);
9430
- }
9431
- if (ws.readyState !== import_ws5.default.OPEN) {
9432
- throw new Error("WebSocket is not open");
9433
- }
9434
- if (callbacks?.onRawMessage) {
9435
- const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
9436
- chunk.data.byteOffset,
9437
- chunk.data.byteOffset + chunk.data.byteLength
9438
- );
9439
- callbacks.onRawMessage({
9440
- provider: this.name,
9441
- direction: "outgoing",
9442
- timestamp: Date.now(),
9443
- payload: audioPayload,
9444
- messageType: "audio"
9445
- });
9446
- }
9447
- ws.send(chunk.data);
9448
- seqNo++;
9449
- if (chunk.isLast) {
9450
- const endMsg = JSON.stringify({
9451
- message: "EndOfStream",
9452
- last_seq_no: seqNo
9453
- });
9454
- if (callbacks?.onRawMessage) {
9455
- callbacks.onRawMessage({
9456
- provider: this.name,
9457
- direction: "outgoing",
9458
- timestamp: Date.now(),
9459
- payload: endMsg,
9460
- messageType: "EndOfStream"
9461
- });
9462
- }
9463
- ws.send(endMsg);
9464
- }
9465
- },
9466
- close: async () => {
9467
- if (sessionStatus === "closed" || sessionStatus === "closing") {
9468
- return;
9469
- }
9470
- sessionStatus = "closing";
9471
- if (ws.readyState === import_ws5.default.OPEN) {
9472
- seqNo++;
9473
- ws.send(
9474
- JSON.stringify({
9475
- message: "EndOfStream",
9476
- last_seq_no: seqNo
9477
- })
9478
- );
9479
- }
9480
- return new Promise((resolve) => {
9481
- const timeout = setTimeout(() => {
9482
- ws.terminate();
9483
- sessionStatus = "closed";
9484
- resolve();
9485
- }, 5e3);
9486
- const onMsg = (data) => {
9487
- try {
9488
- const msg = JSON.parse(data.toString());
9489
- if (msg.message === "EndOfTranscript") {
9490
- ws.removeListener("message", onMsg);
9491
- clearTimeout(timeout);
9492
- ws.close();
9493
- }
9494
- } catch {
9495
- }
9496
- };
9497
- ws.on("message", onMsg);
9498
- ws.once("close", () => {
9499
- clearTimeout(timeout);
9500
- sessionStatus = "closed";
9501
- resolve();
9502
- });
9503
- });
9504
- }
9505
- };
9506
- }
9507
- /**
9508
- * Handle incoming Speechmatics real-time WebSocket messages
9509
- */
9510
- handleStreamingMessage(message, callbacks, utteranceResults) {
9511
- switch (message.message) {
9512
- case "RecognitionStarted": {
9513
- break;
9514
- }
9515
- case "AddPartialTranscript": {
9516
- const results = message.results || [];
9517
- const text = buildTextFromSpeechmaticsResults(results);
9518
- if (text) {
9519
- callbacks?.onTranscript?.({
9520
- type: "transcript",
9521
- text,
9522
- isFinal: false,
9523
- words: this.extractWordsFromResults(results),
9524
- data: message
9525
- });
9526
- }
9527
- break;
9528
- }
9529
- case "AddTranscript": {
9530
- const results = message.results || [];
9531
- const text = buildTextFromSpeechmaticsResults(results);
9532
- if (utteranceResults) {
9533
- utteranceResults.push(...results);
9534
- }
9535
- if (text) {
9536
- callbacks?.onTranscript?.({
9537
- type: "transcript",
9538
- text,
9539
- isFinal: true,
9540
- words: this.extractWordsFromResults(results),
9541
- data: message
9542
- });
9543
- }
9544
- break;
9545
- }
9546
- case "EndOfUtterance": {
9547
- if (utteranceResults && utteranceResults.length > 0) {
9548
- const text = buildTextFromSpeechmaticsResults(utteranceResults);
9549
- const words = this.extractWordsFromResults(utteranceResults);
9550
- const utterances = buildUtterancesFromWords(words);
9551
- if (utterances.length > 0) {
9552
- for (const utt of utterances) {
9553
- callbacks?.onUtterance?.(utt);
9554
- }
9555
- } else if (text) {
9556
- callbacks?.onUtterance?.({
9557
- text,
9558
- start: words.length > 0 ? words[0].start : 0,
9559
- end: words.length > 0 ? words[words.length - 1].end : 0,
9560
- words
9561
- });
9562
- }
9563
- utteranceResults.length = 0;
9564
- }
9565
- break;
9566
- }
9567
- case "AudioAdded": {
9568
- break;
9569
- }
9570
- case "EndOfTranscript": {
9571
- break;
9572
- }
9573
- case "Info":
9574
- case "Warning": {
9575
- callbacks?.onMetadata?.(message);
9576
- break;
9577
- }
9578
- case "Error": {
9579
- const errMsg = message;
9580
- callbacks?.onError?.({
9581
- code: errMsg.type || "SPEECHMATICS_ERROR",
9582
- message: errMsg.reason || "Unknown error",
9583
- details: message
9584
- });
9585
- break;
9586
- }
9587
- default: {
9588
- callbacks?.onMetadata?.(message);
9589
- break;
9590
- }
9591
- }
9592
- }
9593
- /**
9594
- * Extract unified Word[] from Speechmatics recognition results
9595
- */
9596
- extractWordsFromResults(results) {
9597
- return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
9598
- word: result.alternatives?.[0]?.content || "",
9599
- start: result.start_time,
9600
- end: result.end_time,
9601
- confidence: result.alternatives?.[0]?.confidence,
9602
- speaker: result.alternatives?.[0]?.speaker
9603
- }));
9604
- }
9605
9263
  /**
9606
9264
  * Normalize Speechmatics status to unified status
9607
9265
  * Uses generated JobDetailsStatus enum values
@@ -9670,9 +9328,6 @@ function createSpeechmaticsAdapter(config) {
9670
9328
  return adapter;
9671
9329
  }
9672
9330
 
9673
- // src/adapters/soniox-adapter.ts
9674
- var import_axios9 = __toESM(require("axios"));
9675
-
9676
9331
  // src/generated/soniox/schema/transcriptionStatus.ts
9677
9332
  var TranscriptionStatus = {
9678
9333
  queued: "queued",
@@ -9681,6 +9336,57 @@ var TranscriptionStatus = {
9681
9336
  error: "error"
9682
9337
  };
9683
9338
 
9339
+ // src/generated/soniox/api/sonioxPublicAPI.ts
9340
+ var import_axios9 = __toESM(require("axios"));
9341
+
9342
+ // src/generated/soniox/schema/index.ts
9343
+ var schema_exports4 = {};
9344
+ __export(schema_exports4, {
9345
+ TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
9346
+ TranscriptionMode: () => TranscriptionMode,
9347
+ TranscriptionStatus: () => TranscriptionStatus,
9348
+ TranslationConfigType: () => TranslationConfigType
9349
+ });
9350
+
9351
+ // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
9352
+ var TemporaryApiKeyUsageType = {
9353
+ transcribe_websocket: "transcribe_websocket"
9354
+ };
9355
+
9356
+ // src/generated/soniox/schema/transcriptionMode.ts
9357
+ var TranscriptionMode = {
9358
+ real_time: "real_time",
9359
+ async: "async"
9360
+ };
9361
+
9362
+ // src/generated/soniox/schema/translationConfigType.ts
9363
+ var TranslationConfigType = {
9364
+ one_way: "one_way",
9365
+ two_way: "two_way"
9366
+ };
9367
+
9368
+ // src/generated/soniox/api/sonioxPublicAPI.ts
9369
+ var uploadFile = (uploadFileBody2, options) => {
9370
+ const formData = new FormData();
9371
+ if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
9372
+ formData.append("client_reference_id", uploadFileBody2.client_reference_id);
9373
+ }
9374
+ formData.append("file", uploadFileBody2.file);
9375
+ return import_axios9.default.post("/v1/files", formData, options);
9376
+ };
9377
+ var createTranscription2 = (createTranscriptionPayload, options) => {
9378
+ return import_axios9.default.post("/v1/transcriptions", createTranscriptionPayload, options);
9379
+ };
9380
+ var getTranscription = (transcriptionId, options) => {
9381
+ return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}`, options);
9382
+ };
9383
+ var getTranscriptionTranscript = (transcriptionId, options) => {
9384
+ return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
9385
+ };
9386
+ var getModels = (options) => {
9387
+ return import_axios9.default.get("/v1/models", options);
9388
+ };
9389
+
9684
9390
  // src/adapters/soniox-adapter.ts
9685
9391
  var SonioxAdapter = class extends BaseAdapter {
9686
9392
  constructor() {
@@ -9735,11 +9441,17 @@ var SonioxAdapter = class extends BaseAdapter {
9735
9441
  }
9736
9442
  }
9737
9443
  /**
9738
- * Get the base URL for API requests
9444
+ * Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
9739
9445
  */
9740
9446
  get baseUrl() {
9741
9447
  if (this.config?.baseUrl) return this.config.baseUrl;
9742
- return `https://${this.getRegionalHost()}/v1`;
9448
+ return `https://${this.getRegionalHost()}`;
9449
+ }
9450
+ /**
9451
+ * Build axios config with Soniox Bearer auth
9452
+ */
9453
+ getAxiosConfig() {
9454
+ return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
9743
9455
  }
9744
9456
  initialize(config) {
9745
9457
  super.initialize(config);
@@ -9749,15 +9461,6 @@ var SonioxAdapter = class extends BaseAdapter {
9749
9461
  if (config.model) {
9750
9462
  this.defaultModel = config.model;
9751
9463
  }
9752
- this.client = import_axios9.default.create({
9753
- baseURL: this.baseUrl,
9754
- timeout: config.timeout || 12e4,
9755
- headers: {
9756
- Authorization: `Bearer ${config.apiKey}`,
9757
- "Content-Type": "application/json",
9758
- ...config.headers
9759
- }
9760
- });
9761
9464
  }
9762
9465
  /**
9763
9466
  * Get current region
@@ -9787,23 +9490,12 @@ var SonioxAdapter = class extends BaseAdapter {
9787
9490
  */
9788
9491
  setRegion(region) {
9789
9492
  this.region = region;
9790
- if (this.config?.apiKey) {
9791
- this.client = import_axios9.default.create({
9792
- baseURL: this.baseUrl,
9793
- timeout: this.config.timeout || 12e4,
9794
- headers: {
9795
- Authorization: `Bearer ${this.config.apiKey}`,
9796
- "Content-Type": "application/json",
9797
- ...this.config.headers
9798
- }
9799
- });
9800
- }
9801
9493
  }
9802
9494
  /**
9803
9495
  * Submit audio for transcription
9804
9496
  *
9805
- * Soniox uses async batch processing. The transcribe method submits audio
9806
- * and waits for completion (or use getTranscript for polling).
9497
+ * Uses the async v1 API: createTranscription returns status `queued`,
9498
+ * then polls until completed (or returns immediately if webhook is set).
9807
9499
  *
9808
9500
  * @param audio - Audio input (URL or file)
9809
9501
  * @param options - Transcription options
@@ -9812,21 +9504,44 @@ var SonioxAdapter = class extends BaseAdapter {
9812
9504
  async transcribe(audio, options) {
9813
9505
  this.validateConfig();
9814
9506
  try {
9815
- const requestBody = {
9816
- model: options?.model || this.defaultModel
9817
- };
9818
- if (audio.type === "url") {
9819
- requestBody.audio_url = audio.url;
9820
- } else if (audio.type === "file") {
9821
- const formData = new FormData();
9507
+ const sonioxOpts = options?.soniox;
9508
+ if (audio.type === "file") {
9822
9509
  const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
9823
- formData.append("file", audioBlob, audio.filename || "audio.wav");
9824
- const uploadResponse = await this.client.post("/files", formData, {
9825
- headers: {
9826
- "Content-Type": "multipart/form-data"
9827
- }
9828
- });
9829
- requestBody.file_id = uploadResponse.data.id;
9510
+ const uploadBody = { file: audioBlob };
9511
+ const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
9512
+ const payload = {
9513
+ ...sonioxOpts,
9514
+ model: options?.model || this.defaultModel,
9515
+ file_id: fileResp.data.id,
9516
+ language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
9517
+ enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
9518
+ enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
9519
+ context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
9520
+ webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
9521
+ };
9522
+ const createResp = await createTranscription2(payload, this.getAxiosConfig());
9523
+ const meta = createResp.data;
9524
+ if (options?.webhookUrl || sonioxOpts?.webhook_url) {
9525
+ return this.normalizeTranscription(meta);
9526
+ }
9527
+ return this.pollForCompletion(meta.id);
9528
+ } else if (audio.type === "url") {
9529
+ const payload = {
9530
+ ...sonioxOpts,
9531
+ model: options?.model || this.defaultModel,
9532
+ audio_url: audio.url,
9533
+ language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
9534
+ enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
9535
+ enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
9536
+ context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
9537
+ webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
9538
+ };
9539
+ const createResp = await createTranscription2(payload, this.getAxiosConfig());
9540
+ const meta = createResp.data;
9541
+ if (options?.webhookUrl || sonioxOpts?.webhook_url) {
9542
+ return this.normalizeTranscription(meta);
9543
+ }
9544
+ return this.pollForCompletion(meta.id);
9830
9545
  } else {
9831
9546
  return {
9832
9547
  success: false,
@@ -9837,23 +9552,6 @@ var SonioxAdapter = class extends BaseAdapter {
9837
9552
  }
9838
9553
  };
9839
9554
  }
9840
- if (options?.language) {
9841
- requestBody.language_hints = [options.language];
9842
- }
9843
- if (options?.diarization) {
9844
- requestBody.enable_speaker_diarization = true;
9845
- }
9846
- if (options?.languageDetection) {
9847
- requestBody.enable_language_identification = true;
9848
- }
9849
- if (options?.customVocabulary && options.customVocabulary.length > 0) {
9850
- requestBody.context = {
9851
- terms: options.customVocabulary
9852
- };
9853
- }
9854
- const response = await this.client.post("/transcriptions", requestBody);
9855
- const transcriptionId = response.data.id;
9856
- return await this.pollForCompletion(transcriptionId);
9857
9555
  } catch (error) {
9858
9556
  return this.createErrorResponse(error);
9859
9557
  }
@@ -9861,9 +9559,8 @@ var SonioxAdapter = class extends BaseAdapter {
9861
9559
  /**
9862
9560
  * Get transcription result by ID
9863
9561
  *
9864
- * Checks job status via GET /v1/transcriptions/{id}, then fetches
9865
- * the full transcript via GET /v1/transcriptions/{id}/transcript
9866
- * when completed.
9562
+ * Fetches transcription metadata and, if completed, the transcript text/tokens.
9563
+ * Used by pollForCompletion() for async polling.
9867
9564
  *
9868
9565
  * @param transcriptId - Transcript ID
9869
9566
  * @returns Transcription response
@@ -9871,39 +9568,20 @@ var SonioxAdapter = class extends BaseAdapter {
9871
9568
  async getTranscript(transcriptId) {
9872
9569
  this.validateConfig();
9873
9570
  try {
9874
- const statusResponse = await this.client.get(`/transcriptions/${transcriptId}`);
9875
- const job = statusResponse.data;
9876
- if (job.status === "error") {
9877
- return {
9878
- success: false,
9879
- provider: this.name,
9880
- error: {
9881
- code: "TRANSCRIPTION_ERROR",
9882
- message: job.error_message || "Transcription failed"
9883
- }
9884
- };
9885
- }
9886
- if (job.status !== "completed") {
9887
- return {
9888
- success: true,
9889
- provider: this.name,
9890
- data: {
9891
- id: job.id,
9892
- text: "",
9893
- status: job.status
9894
- },
9895
- raw: job
9896
- };
9571
+ const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
9572
+ const meta = metaResp.data;
9573
+ if (meta.status === TranscriptionStatus.completed) {
9574
+ try {
9575
+ const transcriptResp = await getTranscriptionTranscript(
9576
+ transcriptId,
9577
+ this.getAxiosConfig()
9578
+ );
9579
+ return this.normalizeTranscription(meta, transcriptResp.data);
9580
+ } catch (transcriptError) {
9581
+ return this.createErrorResponse(transcriptError);
9582
+ }
9897
9583
  }
9898
- const transcriptResponse = await this.client.get(
9899
- `/transcriptions/${transcriptId}/transcript`
9900
- );
9901
- return this.normalizeResponse({
9902
- ...transcriptResponse.data,
9903
- // Carry over job metadata
9904
- id: job.id,
9905
- audio_duration_ms: job.audio_duration_ms
9906
- });
9584
+ return this.normalizeTranscription(meta);
9907
9585
  } catch (error) {
9908
9586
  return this.createErrorResponse(error);
9909
9587
  }
@@ -9923,51 +9601,50 @@ var SonioxAdapter = class extends BaseAdapter {
9923
9601
  const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
9924
9602
  const createdAt = /* @__PURE__ */ new Date();
9925
9603
  const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
9926
- const wsUrl = `${wsBase}/transcribe-websocket`;
9927
- const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
9928
- const sonioxOpts = options?.sonioxStreaming;
9929
- const initMessage = {
9930
- api_key: this.config.apiKey,
9931
- model: modelId
9932
- };
9933
- if (sonioxOpts?.audioFormat) {
9934
- initMessage.audio_format = sonioxOpts.audioFormat;
9935
- } else if (options?.encoding) {
9604
+ const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
9605
+ wsUrl.searchParams.set("api_key", this.config.apiKey);
9606
+ const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
9607
+ wsUrl.searchParams.set("model", modelId);
9608
+ if (options?.encoding) {
9936
9609
  const encodingMap = {
9937
9610
  linear16: "pcm_s16le",
9938
9611
  pcm: "pcm_s16le",
9939
9612
  mulaw: "mulaw",
9940
9613
  alaw: "alaw"
9941
9614
  };
9942
- initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
9615
+ wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
9943
9616
  }
9944
- if (sonioxOpts?.sampleRate || options?.sampleRate) {
9945
- initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
9617
+ if (options?.sampleRate) {
9618
+ wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
9946
9619
  }
9947
- if (sonioxOpts?.numChannels || options?.channels) {
9948
- initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
9620
+ if (options?.channels) {
9621
+ wsUrl.searchParams.set("num_channels", options.channels.toString());
9949
9622
  }
9623
+ const sonioxOpts = options?.sonioxStreaming;
9950
9624
  if (sonioxOpts) {
9951
9625
  if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
9952
- initMessage.language_hints = sonioxOpts.languageHints;
9626
+ wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
9953
9627
  }
9954
9628
  if (sonioxOpts.enableLanguageIdentification) {
9955
- initMessage.enable_language_identification = true;
9629
+ wsUrl.searchParams.set("enable_language_identification", "true");
9956
9630
  }
9957
9631
  if (sonioxOpts.enableEndpointDetection) {
9958
- initMessage.enable_endpoint_detection = true;
9632
+ wsUrl.searchParams.set("enable_endpoint_detection", "true");
9959
9633
  }
9960
9634
  if (sonioxOpts.enableSpeakerDiarization) {
9961
- initMessage.enable_speaker_diarization = true;
9635
+ wsUrl.searchParams.set("enable_speaker_diarization", "true");
9962
9636
  }
9963
9637
  if (sonioxOpts.context) {
9964
- initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
9638
+ wsUrl.searchParams.set(
9639
+ "context",
9640
+ typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
9641
+ );
9965
9642
  }
9966
9643
  if (sonioxOpts.translation) {
9967
- initMessage.translation = sonioxOpts.translation;
9644
+ wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
9968
9645
  }
9969
9646
  if (sonioxOpts.clientReferenceId) {
9970
- initMessage.client_reference_id = sonioxOpts.clientReferenceId;
9647
+ wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
9971
9648
  }
9972
9649
  }
9973
9650
  if (!sonioxOpts?.languageHints && options?.language) {
@@ -9976,33 +9653,24 @@ var SonioxAdapter = class extends BaseAdapter {
9976
9653
  `[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
9977
9654
  );
9978
9655
  }
9979
- initMessage.language_hints = [options.language];
9656
+ wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
9980
9657
  }
9981
9658
  if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
9982
- initMessage.enable_speaker_diarization = true;
9659
+ wsUrl.searchParams.set("enable_speaker_diarization", "true");
9983
9660
  }
9984
9661
  if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
9985
- initMessage.enable_language_identification = true;
9662
+ wsUrl.searchParams.set("enable_language_identification", "true");
9663
+ }
9664
+ if (options?.interimResults !== false) {
9986
9665
  }
9987
9666
  let status = "connecting";
9988
9667
  let openedAt = null;
9989
9668
  let receivedData = false;
9990
9669
  const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
9991
- const ws = new WebSocketImpl(wsUrl);
9670
+ const ws = new WebSocketImpl(wsUrl.toString());
9992
9671
  ws.onopen = () => {
9993
- openedAt = Date.now();
9994
- const initPayload = JSON.stringify(initMessage);
9995
- if (callbacks?.onRawMessage) {
9996
- callbacks.onRawMessage({
9997
- provider: this.name,
9998
- direction: "outgoing",
9999
- timestamp: Date.now(),
10000
- payload: initPayload,
10001
- messageType: "init"
10002
- });
10003
- }
10004
- ws.send(initPayload);
10005
9672
  status = "open";
9673
+ openedAt = Date.now();
10006
9674
  callbacks?.onOpen?.();
10007
9675
  };
10008
9676
  ws.onmessage = (event) => {
@@ -10011,7 +9679,8 @@ var SonioxAdapter = class extends BaseAdapter {
10011
9679
  let messageType;
10012
9680
  try {
10013
9681
  const data = JSON.parse(rawPayload);
10014
- if (data.error) {
9682
+ const errorMessage = data.error_message || data.error;
9683
+ if (errorMessage) {
10015
9684
  messageType = "error";
10016
9685
  } else if (data.finished) {
10017
9686
  messageType = "finished";
@@ -10027,10 +9696,10 @@ var SonioxAdapter = class extends BaseAdapter {
10027
9696
  messageType
10028
9697
  });
10029
9698
  }
10030
- if (data.error) {
9699
+ if (errorMessage) {
10031
9700
  callbacks?.onError?.({
10032
9701
  code: data.error_code?.toString() || "STREAM_ERROR",
10033
- message: data.error
9702
+ message: errorMessage
10034
9703
  });
10035
9704
  return;
10036
9705
  }
@@ -10044,7 +9713,7 @@ var SonioxAdapter = class extends BaseAdapter {
10044
9713
  start: token.start_ms ? token.start_ms / 1e3 : 0,
10045
9714
  end: token.end_ms ? token.end_ms / 1e3 : 0,
10046
9715
  confidence: token.confidence,
10047
- speaker: token.speaker
9716
+ speaker: token.speaker ?? void 0
10048
9717
  }));
10049
9718
  const text = data.text || data.tokens.map((t) => t.text).join("");
10050
9719
  const isFinal = data.tokens.every((t) => t.is_final);
@@ -10053,8 +9722,8 @@ var SonioxAdapter = class extends BaseAdapter {
10053
9722
  text,
10054
9723
  isFinal,
10055
9724
  words,
10056
- speaker: data.tokens[0]?.speaker,
10057
- language: data.tokens[0]?.language,
9725
+ speaker: data.tokens[0]?.speaker ?? void 0,
9726
+ language: data.tokens[0]?.language ?? void 0,
10058
9727
  confidence: data.tokens[0]?.confidence
10059
9728
  };
10060
9729
  callbacks?.onTranscript?.(event2);
@@ -10081,10 +9750,10 @@ var SonioxAdapter = class extends BaseAdapter {
10081
9750
  ws.onclose = (event) => {
10082
9751
  status = "closed";
10083
9752
  const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
10084
- const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
10085
- if (isEarlyClose && event.code === 1e3) {
9753
+ const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
9754
+ if (isImmediateClose && event.code === 1e3) {
10086
9755
  const errorMessage = [
10087
- "Soniox closed connection shortly after opening.",
9756
+ "Soniox closed connection immediately after opening.",
10088
9757
  `Current config: region=${this.region}, model=${modelId}`,
10089
9758
  "Likely causes:",
10090
9759
  " - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
@@ -10170,7 +9839,7 @@ var SonioxAdapter = class extends BaseAdapter {
10170
9839
  async getModels() {
10171
9840
  this.validateConfig();
10172
9841
  try {
10173
- const response = await this.client.get("/models");
9842
+ const response = await getModels(this.getAxiosConfig());
10174
9843
  return response.data.models || [];
10175
9844
  } catch (error) {
10176
9845
  console.error("Failed to fetch Soniox models:", error);
@@ -10197,55 +9866,82 @@ var SonioxAdapter = class extends BaseAdapter {
10197
9866
  start: token.start_ms ? token.start_ms / 1e3 : 0,
10198
9867
  end: token.end_ms ? token.end_ms / 1e3 : 0,
10199
9868
  confidence: token.confidence,
10200
- speaker: token.speaker
9869
+ speaker: token.speaker ?? void 0
10201
9870
  }));
10202
9871
  return buildUtterancesFromWords(words);
10203
9872
  }
10204
9873
  /**
10205
- * Normalize Soniox response to unified format
9874
+ * Normalize v1 API response to unified format
9875
+ *
9876
+ * @param meta - Transcription metadata from getTranscription/createTranscription
9877
+ * @param transcript - Transcript data (text/tokens), only present when status is completed
10206
9878
  */
10207
- normalizeResponse(response) {
10208
- const text = response.text || (response.tokens ? response.tokens.filter((t) => t.is_final !== false).map((t) => t.text).join("") : "");
10209
- const words = response.tokens ? response.tokens.filter(
10210
- (t) => t.is_final !== false && t.start_ms !== void 0 && t.end_ms !== void 0
10211
- ).map((token) => ({
9879
+ normalizeTranscription(meta, transcript) {
9880
+ if (meta.status === TranscriptionStatus.error) {
9881
+ return {
9882
+ success: false,
9883
+ provider: this.name,
9884
+ data: {
9885
+ id: meta.id,
9886
+ text: "",
9887
+ status: "error"
9888
+ },
9889
+ error: {
9890
+ code: meta.error_type || "TRANSCRIPTION_ERROR",
9891
+ message: meta.error_message || "Transcription failed"
9892
+ },
9893
+ raw: { meta, transcript }
9894
+ };
9895
+ }
9896
+ if (!transcript) {
9897
+ return {
9898
+ success: true,
9899
+ provider: this.name,
9900
+ data: {
9901
+ id: meta.id,
9902
+ text: "",
9903
+ status: meta.status,
9904
+ duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
9905
+ },
9906
+ raw: { meta }
9907
+ };
9908
+ }
9909
+ const tokens = transcript.tokens || [];
9910
+ const text = transcript.text || tokens.map((t) => t.text).join("");
9911
+ const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
10212
9912
  word: token.text,
10213
9913
  start: token.start_ms / 1e3,
10214
9914
  end: token.end_ms / 1e3,
10215
9915
  confidence: token.confidence,
10216
- speaker: token.speaker
10217
- })) : [];
9916
+ speaker: token.speaker ?? void 0
9917
+ }));
10218
9918
  const speakerSet = /* @__PURE__ */ new Set();
10219
- if (response.tokens) {
10220
- response.tokens.forEach((t) => {
10221
- if (t.speaker) speakerSet.add(t.speaker);
10222
- });
10223
- }
9919
+ tokens.forEach((t) => {
9920
+ if (t.speaker) speakerSet.add(String(t.speaker));
9921
+ });
10224
9922
  const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
10225
9923
  id,
10226
9924
  label: `Speaker ${id}`
10227
9925
  })) : void 0;
10228
- const tokens = response.tokens ? response.tokens.filter((t) => t.is_final !== false) : [];
10229
- const utterances = tokens.length > 0 ? this.buildUtterancesFromTokens(tokens) : [];
10230
- const language = response.tokens?.find((t) => t.language)?.language;
9926
+ const utterances = this.buildUtterancesFromTokens(tokens);
9927
+ const language = tokens.find((t) => t.language)?.language ?? void 0;
10231
9928
  return {
10232
9929
  success: true,
10233
9930
  provider: this.name,
10234
9931
  data: {
10235
- id: response.id || `soniox_${Date.now()}`,
9932
+ id: meta.id,
10236
9933
  text,
10237
9934
  status: TranscriptionStatus.completed,
10238
9935
  language,
10239
- duration: response.audio_duration_ms ? response.audio_duration_ms / 1e3 : response.total_audio_proc_ms ? response.total_audio_proc_ms / 1e3 : void 0,
9936
+ duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
10240
9937
  speakers,
10241
9938
  words: words.length > 0 ? words : void 0,
10242
9939
  utterances: utterances.length > 0 ? utterances : void 0
10243
9940
  },
10244
9941
  tracking: {
10245
- requestId: response.id,
10246
- processingTimeMs: response.total_audio_proc_ms
9942
+ requestId: meta.id
10247
9943
  },
10248
- raw: response
9944
+ raw: { meta, transcript }
10249
9945
  };
10250
9946
  }
10251
9947
  };
@@ -10669,7 +10365,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
10669
10365
  * - Multi-channel: `MultichannelSpeechToTextResponseModel` with `transcripts[]`
10670
10366
  */
10671
10367
  normalizeResponse(response) {
10672
- const chunks = response.transcripts ? response.transcripts : [response];
10368
+ const chunks = "transcripts" in response ? response.transcripts : [response];
10673
10369
  const text = chunks.map((c) => c.text).join(" ");
10674
10370
  const words = [];
10675
10371
  const speakerSet = /* @__PURE__ */ new Set();
@@ -11063,9 +10759,7 @@ var listenTranscribeQueryParams = import_zod.z.object({
11063
10759
  ),
11064
10760
  dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
11065
10761
  encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
11066
- filler_words: import_zod.z.boolean().optional().describe(
11067
- 'Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'
11068
- ),
10762
+ filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
11069
10763
  keyterm: import_zod.z.array(import_zod.z.string()).optional().describe(
11070
10764
  "Key term prompting can boost or suppress specialized terminology and brands. Only compatible with Nova-3"
11071
10765
  ),
@@ -11769,6 +11463,7 @@ __export(assemblyAIAPI_zod_exports, {
11769
11463
  createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault,
11770
11464
  createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault,
11771
11465
  createTranscriptBodySummarizationDefault: () => createTranscriptBodySummarizationDefault,
11466
+ createTranscriptBodyTemperatureDefault: () => createTranscriptBodyTemperatureDefault,
11772
11467
  createTranscriptResponse: () => createTranscriptResponse,
11773
11468
  createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault,
11774
11469
  createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault,
@@ -11838,6 +11533,7 @@ var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
11838
11533
  var createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault = true;
11839
11534
  var createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
11840
11535
  var createTranscriptBodySummarizationDefault = false;
11536
+ var createTranscriptBodyTemperatureDefault = 0;
11841
11537
  var createTranscriptBodyCustomTopicsDefault = false;
11842
11538
  var createTranscriptBody = import_zod3.z.object({
11843
11539
  audio_end_at: import_zod3.z.number().optional().describe(
@@ -11847,10 +11543,10 @@ var createTranscriptBody = import_zod3.z.object({
11847
11543
  "The point in time, in milliseconds, to begin transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
11848
11544
  ),
11849
11545
  auto_chapters: import_zod3.z.boolean().optional().describe(
11850
- "Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11546
+ "Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
11851
11547
  ),
11852
11548
  auto_highlights: import_zod3.z.boolean().optional().describe(
11853
- "Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases), either true or false"
11549
+ "Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights), either true or false"
11854
11550
  ),
11855
11551
  content_safety: import_zod3.z.boolean().optional().describe(
11856
11552
  "Enable [Content Moderation](https://www.assemblyai.com/docs/content-moderation), can be true or false"
@@ -11866,16 +11562,16 @@ var createTranscriptBody = import_zod3.z.object({
11866
11562
  "Object containing words or phrases to replace, and the word or phrase to replace with"
11867
11563
  )
11868
11564
  ).optional().describe(
11869
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
11565
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
11870
11566
  ),
11871
11567
  disfluencies: import_zod3.z.boolean().optional().describe(
11872
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
11568
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
11873
11569
  ),
11874
11570
  domain: import_zod3.z.string().nullish().describe(
11875
11571
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
11876
11572
  ),
11877
11573
  entity_detection: import_zod3.z.boolean().optional().describe(
11878
- "Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection), can be true or false"
11574
+ "Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript), can be true or false"
11879
11575
  ),
11880
11576
  filter_profanity: import_zod3.z.boolean().optional().describe(
11881
11577
  "Filter profanity from the transcribed text, can be true or false. See [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) for more details."
@@ -11884,7 +11580,7 @@ var createTranscriptBody = import_zod3.z.object({
11884
11580
  "Enable [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
11885
11581
  ),
11886
11582
  iab_categories: import_zod3.z.boolean().optional().describe(
11887
- "Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection), can be true or false"
11583
+ "Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics), can be true or false"
11888
11584
  ),
11889
11585
  keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
11890
11586
  "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
@@ -12132,7 +11828,7 @@ var createTranscriptBody = import_zod3.z.object({
12132
11828
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12133
11829
  ),
12134
11830
  multichannel: import_zod3.z.boolean().optional().describe(
12135
- "Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) transcription, can be true or false."
11831
+ "Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) transcription, can be true or false."
12136
11832
  ),
12137
11833
  prompt: import_zod3.z.string().optional().describe(
12138
11834
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -12215,23 +11911,23 @@ var createTranscriptBody = import_zod3.z.object({
12215
11911
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12216
11912
  ),
12217
11913
  sentiment_analysis: import_zod3.z.boolean().optional().describe(
12218
- "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis), can be true or false"
11914
+ "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
12219
11915
  ),
12220
11916
  speaker_labels: import_zod3.z.boolean().optional().describe(
12221
- "Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization), can be true or false"
11917
+ "Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers), can be true or false"
12222
11918
  ),
12223
11919
  speaker_options: import_zod3.z.object({
12224
11920
  min_speakers_expected: import_zod3.z.number().default(createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault).describe(
12225
- "The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details."
11921
+ "The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details."
12226
11922
  ),
12227
11923
  max_speakers_expected: import_zod3.z.number().optional().describe(
12228
- "<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details.\n"
11924
+ "<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details.\n"
12229
11925
  )
12230
11926
  }).optional().describe(
12231
- "Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
11927
+ "Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
12232
11928
  ),
12233
11929
  speakers_expected: import_zod3.z.number().nullish().describe(
12234
- "Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
11930
+ "Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
12235
11931
  ),
12236
11932
  speech_models: import_zod3.z.array(
12237
11933
  import_zod3.z.string().describe(
@@ -12307,7 +12003,7 @@ var createTranscriptBody = import_zod3.z.object({
12307
12003
  "Enable speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
12308
12004
  ),
12309
12005
  summarization: import_zod3.z.boolean().optional().describe(
12310
- "Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12006
+ "Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12311
12007
  ),
12312
12008
  summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
12313
12009
  summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
@@ -12316,6 +12012,9 @@ var createTranscriptBody = import_zod3.z.object({
12316
12012
  ).or(import_zod3.z.null()).optional().describe(
12317
12013
  'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12318
12014
  ),
12015
+ temperature: import_zod3.z.number().optional().describe(
12016
+ "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
12017
+ ),
12319
12018
  webhook_auth_header_name: import_zod3.z.string().nullish().describe(
12320
12019
  "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
12321
12020
  ),
@@ -12337,7 +12036,7 @@ var createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
12337
12036
  var createTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
12338
12037
  var createTranscriptResponse = import_zod3.z.object({
12339
12038
  audio_channels: import_zod3.z.number().optional().describe(
12340
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
12039
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
12341
12040
  ),
12342
12041
  audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
12343
12042
  audio_end_at: import_zod3.z.number().nullish().describe(
@@ -12348,10 +12047,10 @@ var createTranscriptResponse = import_zod3.z.object({
12348
12047
  ),
12349
12048
  audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
12350
12049
  auto_chapters: import_zod3.z.boolean().nullish().describe(
12351
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12050
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12352
12051
  ),
12353
12052
  auto_highlights: import_zod3.z.boolean().describe(
12354
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
12053
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
12355
12054
  ),
12356
12055
  auto_highlights_result: import_zod3.z.object({
12357
12056
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -12371,9 +12070,9 @@ var createTranscriptResponse = import_zod3.z.object({
12371
12070
  })
12372
12071
  ).describe("A temporally-sequential array of Key Phrases")
12373
12072
  }).describe(
12374
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12073
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12375
12074
  ).or(import_zod3.z.null()).optional().describe(
12376
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12075
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
12377
12076
  ),
12378
12077
  chapters: import_zod3.z.array(
12379
12078
  import_zod3.z.object({
@@ -12386,7 +12085,7 @@ var createTranscriptResponse = import_zod3.z.object({
12386
12085
  end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
12387
12086
  }).describe("Chapter of the audio file")
12388
12087
  ).nullish().describe(
12389
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
12088
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
12390
12089
  ),
12391
12090
  confidence: import_zod3.z.number().nullish().describe(
12392
12091
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -12442,10 +12141,10 @@ var createTranscriptResponse = import_zod3.z.object({
12442
12141
  "Object containing words or phrases to replace, and the word or phrase to replace with"
12443
12142
  )
12444
12143
  ).nullish().describe(
12445
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
12144
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
12446
12145
  ),
12447
12146
  disfluencies: import_zod3.z.boolean().nullish().describe(
12448
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
12147
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
12449
12148
  ),
12450
12149
  domain: import_zod3.z.string().nullish().describe(
12451
12150
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12507,10 +12206,10 @@ var createTranscriptResponse = import_zod3.z.object({
12507
12206
  )
12508
12207
  }).describe("A detected entity")
12509
12208
  ).nullish().describe(
12510
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
12209
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
12511
12210
  ),
12512
12211
  entity_detection: import_zod3.z.boolean().nullish().describe(
12513
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
12212
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
12514
12213
  ),
12515
12214
  error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
12516
12215
  filter_profanity: import_zod3.z.boolean().nullish().describe(
@@ -12520,7 +12219,7 @@ var createTranscriptResponse = import_zod3.z.object({
12520
12219
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
12521
12220
  ),
12522
12221
  iab_categories: import_zod3.z.boolean().nullish().describe(
12523
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
12222
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
12524
12223
  ),
12525
12224
  iab_categories_result: import_zod3.z.object({
12526
12225
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -12543,9 +12242,9 @@ var createTranscriptResponse = import_zod3.z.object({
12543
12242
  ).describe("An array of results for the Topic Detection model"),
12544
12243
  summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
12545
12244
  }).describe(
12546
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12245
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
12547
12246
  ).or(import_zod3.z.null()).optional().describe(
12548
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
12247
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
12549
12248
  ),
12550
12249
  id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
12551
12250
  keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
@@ -12795,7 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
12795
12494
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12796
12495
  ),
12797
12496
  multichannel: import_zod3.z.boolean().nullish().describe(
12798
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
12497
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
12799
12498
  ),
12800
12499
  prompt: import_zod3.z.string().optional().describe(
12801
12500
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -12878,7 +12577,7 @@ var createTranscriptResponse = import_zod3.z.object({
12878
12577
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12879
12578
  ),
12880
12579
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
12881
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
12580
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
12882
12581
  ),
12883
12582
  sentiment_analysis_results: import_zod3.z.array(
12884
12583
  import_zod3.z.object({
@@ -12893,17 +12592,17 @@ var createTranscriptResponse = import_zod3.z.object({
12893
12592
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12894
12593
  ),
12895
12594
  speaker: import_zod3.z.string().nullable().describe(
12896
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12595
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12897
12596
  )
12898
12597
  }).describe("The result of the Sentiment Analysis model")
12899
12598
  ).nullish().describe(
12900
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
12599
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
12901
12600
  ),
12902
12601
  speaker_labels: import_zod3.z.boolean().nullish().describe(
12903
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
12602
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
12904
12603
  ),
12905
12604
  speakers_expected: import_zod3.z.number().nullish().describe(
12906
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
12605
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
12907
12606
  ),
12908
12607
  speech_model_used: import_zod3.z.string().optional().describe(
12909
12608
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -13006,22 +12705,25 @@ var createTranscriptResponse = import_zod3.z.object({
13006
12705
  "The status of your transcript. Possible values are queued, processing, completed, or error."
13007
12706
  ),
13008
12707
  summarization: import_zod3.z.boolean().describe(
13009
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12708
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13010
12709
  ),
13011
12710
  summary: import_zod3.z.string().nullish().describe(
13012
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
12711
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13013
12712
  ),
13014
12713
  summary_model: import_zod3.z.string().nullish().describe(
13015
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
12714
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13016
12715
  ),
13017
12716
  summary_type: import_zod3.z.string().nullish().describe(
13018
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
12717
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13019
12718
  ),
13020
12719
  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13021
12720
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13022
12721
  ).or(import_zod3.z.null()).optional().describe(
13023
12722
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13024
12723
  ),
12724
+ temperature: import_zod3.z.number().nullish().describe(
12725
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
12726
+ ),
13025
12727
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13026
12728
  throttled: import_zod3.z.boolean().nullish().describe(
13027
12729
  "True while a request is throttled and false when a request is no longer throttled"
@@ -13042,7 +12744,7 @@ var createTranscriptResponse = import_zod3.z.object({
13042
12744
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13043
12745
  ),
13044
12746
  speaker: import_zod3.z.string().nullable().describe(
13045
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12747
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13046
12748
  )
13047
12749
  })
13048
12750
  ).describe("The words in the utterance."),
@@ -13057,7 +12759,7 @@ var createTranscriptResponse = import_zod3.z.object({
13057
12759
  )
13058
12760
  })
13059
12761
  ).nullish().describe(
13060
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
12762
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13061
12763
  ),
13062
12764
  webhook_auth: import_zod3.z.boolean().describe(
13063
12765
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -13081,7 +12783,7 @@ var createTranscriptResponse = import_zod3.z.object({
13081
12783
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13082
12784
  ),
13083
12785
  speaker: import_zod3.z.string().nullable().describe(
13084
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
12786
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13085
12787
  )
13086
12788
  })
13087
12789
  ).nullish().describe(
@@ -13154,7 +12856,7 @@ var getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = tr
13154
12856
  var getTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
13155
12857
  var getTranscriptResponse = import_zod3.z.object({
13156
12858
  audio_channels: import_zod3.z.number().optional().describe(
13157
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
12859
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
13158
12860
  ),
13159
12861
  audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
13160
12862
  audio_end_at: import_zod3.z.number().nullish().describe(
@@ -13165,10 +12867,10 @@ var getTranscriptResponse = import_zod3.z.object({
13165
12867
  ),
13166
12868
  audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
13167
12869
  auto_chapters: import_zod3.z.boolean().nullish().describe(
13168
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
12870
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13169
12871
  ),
13170
12872
  auto_highlights: import_zod3.z.boolean().describe(
13171
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
12873
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
13172
12874
  ),
13173
12875
  auto_highlights_result: import_zod3.z.object({
13174
12876
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13188,9 +12890,9 @@ var getTranscriptResponse = import_zod3.z.object({
13188
12890
  })
13189
12891
  ).describe("A temporally-sequential array of Key Phrases")
13190
12892
  }).describe(
13191
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12893
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13192
12894
  ).or(import_zod3.z.null()).optional().describe(
13193
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
12895
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13194
12896
  ),
13195
12897
  chapters: import_zod3.z.array(
13196
12898
  import_zod3.z.object({
@@ -13203,7 +12905,7 @@ var getTranscriptResponse = import_zod3.z.object({
13203
12905
  end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
13204
12906
  }).describe("Chapter of the audio file")
13205
12907
  ).nullish().describe(
13206
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
12908
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
13207
12909
  ),
13208
12910
  confidence: import_zod3.z.number().nullish().describe(
13209
12911
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -13259,10 +12961,10 @@ var getTranscriptResponse = import_zod3.z.object({
13259
12961
  "Object containing words or phrases to replace, and the word or phrase to replace with"
13260
12962
  )
13261
12963
  ).nullish().describe(
13262
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
12964
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13263
12965
  ),
13264
12966
  disfluencies: import_zod3.z.boolean().nullish().describe(
13265
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
12967
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13266
12968
  ),
13267
12969
  domain: import_zod3.z.string().nullish().describe(
13268
12970
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13324,10 +13026,10 @@ var getTranscriptResponse = import_zod3.z.object({
13324
13026
  )
13325
13027
  }).describe("A detected entity")
13326
13028
  ).nullish().describe(
13327
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
13029
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
13328
13030
  ),
13329
13031
  entity_detection: import_zod3.z.boolean().nullish().describe(
13330
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
13032
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
13331
13033
  ),
13332
13034
  error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
13333
13035
  filter_profanity: import_zod3.z.boolean().nullish().describe(
@@ -13337,7 +13039,7 @@ var getTranscriptResponse = import_zod3.z.object({
13337
13039
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
13338
13040
  ),
13339
13041
  iab_categories: import_zod3.z.boolean().nullish().describe(
13340
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
13042
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
13341
13043
  ),
13342
13044
  iab_categories_result: import_zod3.z.object({
13343
13045
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13360,9 +13062,9 @@ var getTranscriptResponse = import_zod3.z.object({
13360
13062
  ).describe("An array of results for the Topic Detection model"),
13361
13063
  summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
13362
13064
  }).describe(
13363
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13065
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13364
13066
  ).or(import_zod3.z.null()).optional().describe(
13365
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13067
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
13366
13068
  ),
13367
13069
  id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
13368
13070
  keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
@@ -13612,7 +13314,7 @@ var getTranscriptResponse = import_zod3.z.object({
13612
13314
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
13613
13315
  ),
13614
13316
  multichannel: import_zod3.z.boolean().nullish().describe(
13615
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
13317
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
13616
13318
  ),
13617
13319
  prompt: import_zod3.z.string().optional().describe(
13618
13320
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -13695,7 +13397,7 @@ var getTranscriptResponse = import_zod3.z.object({
13695
13397
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
13696
13398
  ),
13697
13399
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
13698
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
13400
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
13699
13401
  ),
13700
13402
  sentiment_analysis_results: import_zod3.z.array(
13701
13403
  import_zod3.z.object({
@@ -13710,17 +13412,17 @@ var getTranscriptResponse = import_zod3.z.object({
13710
13412
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13711
13413
  ),
13712
13414
  speaker: import_zod3.z.string().nullable().describe(
13713
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13415
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13714
13416
  )
13715
13417
  }).describe("The result of the Sentiment Analysis model")
13716
13418
  ).nullish().describe(
13717
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
13419
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
13718
13420
  ),
13719
13421
  speaker_labels: import_zod3.z.boolean().nullish().describe(
13720
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
13422
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
13721
13423
  ),
13722
13424
  speakers_expected: import_zod3.z.number().nullish().describe(
13723
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
13425
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
13724
13426
  ),
13725
13427
  speech_model_used: import_zod3.z.string().optional().describe(
13726
13428
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -13823,22 +13525,25 @@ var getTranscriptResponse = import_zod3.z.object({
13823
13525
  "The status of your transcript. Possible values are queued, processing, completed, or error."
13824
13526
  ),
13825
13527
  summarization: import_zod3.z.boolean().describe(
13826
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13528
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13827
13529
  ),
13828
13530
  summary: import_zod3.z.string().nullish().describe(
13829
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
13531
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13830
13532
  ),
13831
13533
  summary_model: import_zod3.z.string().nullish().describe(
13832
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
13534
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13833
13535
  ),
13834
13536
  summary_type: import_zod3.z.string().nullish().describe(
13835
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
13537
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13836
13538
  ),
13837
13539
  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13838
13540
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13839
13541
  ).or(import_zod3.z.null()).optional().describe(
13840
13542
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13841
13543
  ),
13544
+ temperature: import_zod3.z.number().nullish().describe(
13545
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13546
+ ),
13842
13547
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13843
13548
  throttled: import_zod3.z.boolean().nullish().describe(
13844
13549
  "True while a request is throttled and false when a request is no longer throttled"
@@ -13859,7 +13564,7 @@ var getTranscriptResponse = import_zod3.z.object({
13859
13564
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13860
13565
  ),
13861
13566
  speaker: import_zod3.z.string().nullable().describe(
13862
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13567
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13863
13568
  )
13864
13569
  })
13865
13570
  ).describe("The words in the utterance."),
@@ -13874,7 +13579,7 @@ var getTranscriptResponse = import_zod3.z.object({
13874
13579
  )
13875
13580
  })
13876
13581
  ).nullish().describe(
13877
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
13582
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13878
13583
  ),
13879
13584
  webhook_auth: import_zod3.z.boolean().describe(
13880
13585
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -13898,7 +13603,7 @@ var getTranscriptResponse = import_zod3.z.object({
13898
13603
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13899
13604
  ),
13900
13605
  speaker: import_zod3.z.string().nullable().describe(
13901
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
13606
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13902
13607
  )
13903
13608
  })
13904
13609
  ).nullish().describe(
@@ -13931,7 +13636,7 @@ var deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
13931
13636
  var deleteTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
13932
13637
  var deleteTranscriptResponse = import_zod3.z.object({
13933
13638
  audio_channels: import_zod3.z.number().optional().describe(
13934
- "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
13639
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
13935
13640
  ),
13936
13641
  audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
13937
13642
  audio_end_at: import_zod3.z.number().nullish().describe(
@@ -13942,10 +13647,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
13942
13647
  ),
13943
13648
  audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
13944
13649
  auto_chapters: import_zod3.z.boolean().nullish().describe(
13945
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13650
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
13946
13651
  ),
13947
13652
  auto_highlights: import_zod3.z.boolean().describe(
13948
- "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
13653
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
13949
13654
  ),
13950
13655
  auto_highlights_result: import_zod3.z.object({
13951
13656
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -13965,9 +13670,9 @@ var deleteTranscriptResponse = import_zod3.z.object({
13965
13670
  })
13966
13671
  ).describe("A temporally-sequential array of Key Phrases")
13967
13672
  }).describe(
13968
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
13673
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13969
13674
  ).or(import_zod3.z.null()).optional().describe(
13970
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
13675
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
13971
13676
  ),
13972
13677
  chapters: import_zod3.z.array(
13973
13678
  import_zod3.z.object({
@@ -13980,7 +13685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
13980
13685
  end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
13981
13686
  }).describe("Chapter of the audio file")
13982
13687
  ).nullish().describe(
13983
- "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
13688
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
13984
13689
  ),
13985
13690
  confidence: import_zod3.z.number().nullish().describe(
13986
13691
  "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
@@ -14036,10 +13741,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
14036
13741
  "Object containing words or phrases to replace, and the word or phrase to replace with"
14037
13742
  )
14038
13743
  ).nullish().describe(
14039
- "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
13744
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
14040
13745
  ),
14041
13746
  disfluencies: import_zod3.z.boolean().nullish().describe(
14042
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
13747
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
14043
13748
  ),
14044
13749
  domain: import_zod3.z.string().nullish().describe(
14045
13750
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -14101,10 +13806,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
14101
13806
  )
14102
13807
  }).describe("A detected entity")
14103
13808
  ).nullish().describe(
14104
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
13809
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
14105
13810
  ),
14106
13811
  entity_detection: import_zod3.z.boolean().nullish().describe(
14107
- "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
13812
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
14108
13813
  ),
14109
13814
  error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
14110
13815
  filter_profanity: import_zod3.z.boolean().nullish().describe(
@@ -14114,7 +13819,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14114
13819
  "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
14115
13820
  ),
14116
13821
  iab_categories: import_zod3.z.boolean().nullish().describe(
14117
- "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
13822
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
14118
13823
  ),
14119
13824
  iab_categories_result: import_zod3.z.object({
14120
13825
  status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -14137,9 +13842,9 @@ var deleteTranscriptResponse = import_zod3.z.object({
14137
13842
  ).describe("An array of results for the Topic Detection model"),
14138
13843
  summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
14139
13844
  }).describe(
14140
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13845
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
14141
13846
  ).or(import_zod3.z.null()).optional().describe(
14142
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
13847
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
14143
13848
  ),
14144
13849
  id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
14145
13850
  keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
@@ -14389,7 +14094,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14389
14094
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
14390
14095
  ),
14391
14096
  multichannel: import_zod3.z.boolean().nullish().describe(
14392
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
14097
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
14393
14098
  ),
14394
14099
  prompt: import_zod3.z.string().optional().describe(
14395
14100
  "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
@@ -14472,7 +14177,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14472
14177
  "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
14473
14178
  ),
14474
14179
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
14475
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
14180
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
14476
14181
  ),
14477
14182
  sentiment_analysis_results: import_zod3.z.array(
14478
14183
  import_zod3.z.object({
@@ -14487,17 +14192,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
14487
14192
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14488
14193
  ),
14489
14194
  speaker: import_zod3.z.string().nullable().describe(
14490
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14195
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14491
14196
  )
14492
14197
  }).describe("The result of the Sentiment Analysis model")
14493
14198
  ).nullish().describe(
14494
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
14199
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
14495
14200
  ),
14496
14201
  speaker_labels: import_zod3.z.boolean().nullish().describe(
14497
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
14202
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
14498
14203
  ),
14499
14204
  speakers_expected: import_zod3.z.number().nullish().describe(
14500
- "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
14205
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
14501
14206
  ),
14502
14207
  speech_model_used: import_zod3.z.string().optional().describe(
14503
14208
  "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
@@ -14600,22 +14305,25 @@ var deleteTranscriptResponse = import_zod3.z.object({
14600
14305
  "The status of your transcript. Possible values are queued, processing, completed, or error."
14601
14306
  ),
14602
14307
  summarization: import_zod3.z.boolean().describe(
14603
- "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
14308
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
14604
14309
  ),
14605
14310
  summary: import_zod3.z.string().nullish().describe(
14606
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
14311
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14607
14312
  ),
14608
14313
  summary_model: import_zod3.z.string().nullish().describe(
14609
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
14314
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14610
14315
  ),
14611
14316
  summary_type: import_zod3.z.string().nullish().describe(
14612
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
14317
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14613
14318
  ),
14614
14319
  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
14615
14320
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14616
14321
  ).or(import_zod3.z.null()).optional().describe(
14617
14322
  "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14618
14323
  ),
14324
+ temperature: import_zod3.z.number().nullish().describe(
14325
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
14326
+ ),
14619
14327
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
14620
14328
  throttled: import_zod3.z.boolean().nullish().describe(
14621
14329
  "True while a request is throttled and false when a request is no longer throttled"
@@ -14636,7 +14344,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14636
14344
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14637
14345
  ),
14638
14346
  speaker: import_zod3.z.string().nullable().describe(
14639
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14347
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14640
14348
  )
14641
14349
  })
14642
14350
  ).describe("The words in the utterance."),
@@ -14651,7 +14359,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14651
14359
  )
14652
14360
  })
14653
14361
  ).nullish().describe(
14654
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
14362
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
14655
14363
  ),
14656
14364
  webhook_auth: import_zod3.z.boolean().describe(
14657
14365
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
@@ -14675,7 +14383,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14675
14383
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14676
14384
  ),
14677
14385
  speaker: import_zod3.z.string().nullable().describe(
14678
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14386
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14679
14387
  )
14680
14388
  })
14681
14389
  ).nullish().describe(
@@ -14720,7 +14428,7 @@ var getTranscriptSentencesResponse = import_zod3.z.object({
14720
14428
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14721
14429
  ),
14722
14430
  speaker: import_zod3.z.string().nullable().describe(
14723
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14431
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14724
14432
  )
14725
14433
  })
14726
14434
  ).describe("An array of words in the sentence"),
@@ -14728,7 +14436,7 @@ var getTranscriptSentencesResponse = import_zod3.z.object({
14728
14436
  "The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14729
14437
  ),
14730
14438
  speaker: import_zod3.z.string().nullable().describe(
14731
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14439
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14732
14440
  )
14733
14441
  })
14734
14442
  ).describe("An array of sentences in the transcript")
@@ -14756,7 +14464,7 @@ var getTranscriptParagraphsResponse = import_zod3.z.object({
14756
14464
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14757
14465
  ),
14758
14466
  speaker: import_zod3.z.string().nullable().describe(
14759
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
14467
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14760
14468
  )
14761
14469
  })
14762
14470
  ).describe("An array of words in the paragraph")
@@ -17368,23 +17076,6 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
17368
17076
  }).optional().describe(
17369
17077
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
17370
17078
  ),
17371
- speaker_reidentification: import_zod5.z.object({
17372
- success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
17373
- is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
17374
- exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
17375
- error: import_zod5.z.object({
17376
- status_code: import_zod5.z.number().describe("Status code of the addon error"),
17377
- exception: import_zod5.z.string().describe("Reason of the addon error"),
17378
- message: import_zod5.z.string().describe("Detailed message of the addon error")
17379
- }).nullable().describe(
17380
- "`null` if `success` is `true`. Contains the error details of the failed model"
17381
- ),
17382
- results: import_zod5.z.string().describe(
17383
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
17384
- )
17385
- }).optional().describe(
17386
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
17387
- ),
17388
17079
  structured_data_extraction: import_zod5.z.object({
17389
17080
  success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
17390
17081
  is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
@@ -18865,23 +18556,6 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
18865
18556
  }).optional().describe(
18866
18557
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
18867
18558
  ),
18868
- speaker_reidentification: import_zod5.z.object({
18869
- success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
18870
- is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
18871
- exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
18872
- error: import_zod5.z.object({
18873
- status_code: import_zod5.z.number().describe("Status code of the addon error"),
18874
- exception: import_zod5.z.string().describe("Reason of the addon error"),
18875
- message: import_zod5.z.string().describe("Detailed message of the addon error")
18876
- }).nullable().describe(
18877
- "`null` if `success` is `true`. Contains the error details of the failed model"
18878
- ),
18879
- results: import_zod5.z.string().describe(
18880
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
18881
- )
18882
- }).optional().describe(
18883
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
18884
- ),
18885
18559
  structured_data_extraction: import_zod5.z.object({
18886
18560
  success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
18887
18561
  is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
@@ -21019,23 +20693,6 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
21019
20693
  }).optional().describe(
21020
20694
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
21021
20695
  ),
21022
- speaker_reidentification: import_zod5.z.object({
21023
- success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
21024
- is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
21025
- exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
21026
- error: import_zod5.z.object({
21027
- status_code: import_zod5.z.number().describe("Status code of the addon error"),
21028
- exception: import_zod5.z.string().describe("Reason of the addon error"),
21029
- message: import_zod5.z.string().describe("Detailed message of the addon error")
21030
- }).nullable().describe(
21031
- "`null` if `success` is `true`. Contains the error details of the failed model"
21032
- ),
21033
- results: import_zod5.z.string().describe(
21034
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
21035
- )
21036
- }).optional().describe(
21037
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
21038
- ),
21039
20696
  structured_data_extraction: import_zod5.z.object({
21040
20697
  success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
21041
20698
  is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
@@ -21335,11 +20992,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
21335
20992
  channels: import_zod5.z.number().min(1).max(transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsMax).default(
21336
20993
  transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsDefault
21337
20994
  ).describe("The number of channels of the audio stream"),
21338
- model: import_zod5.z.enum(["solaria-1"]).describe(
21339
- 'The model used to process the audio. "solaria-1" is used by default.'
21340
- ).default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe(
21341
- 'The model used to process the audio. "solaria-1" is used by default.'
21342
- ),
20995
+ model: import_zod5.z.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
21343
20996
  endpointing: import_zod5.z.number().min(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMin).max(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMax).default(
21344
20997
  transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingDefault
21345
20998
  ).describe(
@@ -23763,23 +23416,6 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
23763
23416
  }).optional().describe(
23764
23417
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
23765
23418
  ),
23766
- speaker_reidentification: import_zod5.z.object({
23767
- success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
23768
- is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
23769
- exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
23770
- error: import_zod5.z.object({
23771
- status_code: import_zod5.z.number().describe("Status code of the addon error"),
23772
- exception: import_zod5.z.string().describe("Reason of the addon error"),
23773
- message: import_zod5.z.string().describe("Detailed message of the addon error")
23774
- }).nullable().describe(
23775
- "`null` if `success` is `true`. Contains the error details of the failed model"
23776
- ),
23777
- results: import_zod5.z.string().describe(
23778
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
23779
- )
23780
- }).optional().describe(
23781
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
23782
- ),
23783
23419
  structured_data_extraction: import_zod5.z.object({
23784
23420
  success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
23785
23421
  is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
@@ -26945,23 +26581,6 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26945
26581
  }).optional().describe(
26946
26582
  "If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
26947
26583
  ),
26948
- speaker_reidentification: import_zod5.z.object({
26949
- success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
26950
- is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
26951
- exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
26952
- error: import_zod5.z.object({
26953
- status_code: import_zod5.z.number().describe("Status code of the addon error"),
26954
- exception: import_zod5.z.string().describe("Reason of the addon error"),
26955
- message: import_zod5.z.string().describe("Detailed message of the addon error")
26956
- }).nullable().describe(
26957
- "`null` if `success` is `true`. Contains the error details of the failed model"
26958
- ),
26959
- results: import_zod5.z.string().describe(
26960
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
26961
- )
26962
- }).optional().describe(
26963
- "If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
26964
- ),
26965
26584
  structured_data_extraction: import_zod5.z.object({
26966
26585
  success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
26967
26586
  is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
@@ -27255,11 +26874,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
27255
26874
  historyControllerGetListV1ResponseItemsItemRequestParamsSampleRateDefault
27256
26875
  ).describe("The sample rate of the audio stream"),
27257
26876
  channels: import_zod5.z.number().min(1).max(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsMax).default(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsDefault).describe("The number of channels of the audio stream"),
27258
- model: import_zod5.z.enum(["solaria-1"]).describe(
27259
- 'The model used to process the audio. "solaria-1" is used by default.'
27260
- ).default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe(
27261
- 'The model used to process the audio. "solaria-1" is used by default.'
27262
- ),
26877
+ model: import_zod5.z.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
27263
26878
  endpointing: import_zod5.z.number().min(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMin).max(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMax).default(
27264
26879
  historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingDefault
27265
26880
  ).describe(
@@ -36420,6 +36035,7 @@ __export(sonioxPublicAPI_zod_exports, {
36420
36035
  createTemporaryApiKeyBody: () => createTemporaryApiKeyBody,
36421
36036
  createTemporaryApiKeyBodyClientReferenceIdMaxOne: () => createTemporaryApiKeyBodyClientReferenceIdMaxOne,
36422
36037
  createTemporaryApiKeyBodyExpiresInSecondsMax: () => createTemporaryApiKeyBodyExpiresInSecondsMax,
36038
+ createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne: () => createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne,
36423
36039
  createTranscriptionBody: () => createTranscriptionBody2,
36424
36040
  createTranscriptionBodyAudioUrlMaxOne: () => createTranscriptionBodyAudioUrlMaxOne,
36425
36041
  createTranscriptionBodyAudioUrlRegExpOne: () => createTranscriptionBodyAudioUrlRegExpOne,
@@ -36550,11 +36166,11 @@ var getTranscriptionsResponse = import_zod10.z.object({
36550
36166
  });
36551
36167
  var createTranscriptionBodyModelMaxThree = 32;
36552
36168
  var createTranscriptionBodyAudioUrlMaxOne = 4096;
36553
- var createTranscriptionBodyAudioUrlRegExpOne = new RegExp("^https?://[^\\s]+$");
36169
+ var createTranscriptionBodyAudioUrlRegExpOne = /^https?:\/\/[^\s]+$/;
36554
36170
  var createTranscriptionBodyLanguageHintsItemMax = 10;
36555
36171
  var createTranscriptionBodyLanguageHintsMaxOne = 100;
36556
36172
  var createTranscriptionBodyWebhookUrlMaxOne = 256;
36557
- var createTranscriptionBodyWebhookUrlRegExpOne = new RegExp("^https?://[^\\s]+$");
36173
+ var createTranscriptionBodyWebhookUrlRegExpOne = /^https?:\/\/[^\s]+$/;
36558
36174
  var createTranscriptionBodyWebhookAuthHeaderNameMaxOne = 256;
36559
36175
  var createTranscriptionBodyWebhookAuthHeaderValueMaxOne = 256;
36560
36176
  var createTranscriptionBodyClientReferenceIdMaxOne = 256;
@@ -36702,22 +36318,25 @@ var getModelsResponse = import_zod10.z.object({
36702
36318
  });
36703
36319
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
36704
36320
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
36321
+ var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
36705
36322
  var createTemporaryApiKeyBody = import_zod10.z.object({
36706
36323
  usage_type: import_zod10.z.enum(["transcribe_websocket"]),
36707
36324
  expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
36708
- client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique.")
36325
+ client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
36326
+ single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
36327
+ max_session_duration_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne).or(import_zod10.z.null()).optional().describe(
36328
+ "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
36329
+ )
36709
36330
  });
36710
36331
 
36711
36332
  // src/generated/soniox/streaming-types.zod.ts
36712
36333
  var streaming_types_zod_exports = {};
36713
36334
  __export(streaming_types_zod_exports, {
36714
36335
  sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
36715
- sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
36716
36336
  sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
36717
36337
  sonioxContextSchema: () => sonioxContextSchema,
36718
36338
  sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
36719
36339
  sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
36720
- sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
36721
36340
  sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
36722
36341
  sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
36723
36342
  sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
@@ -36731,7 +36350,7 @@ __export(streaming_types_zod_exports, {
36731
36350
  streamingUpdateConfigParams: () => streamingUpdateConfigParams3
36732
36351
  });
36733
36352
  var import_zod11 = require("zod");
36734
- var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
36353
+ var sonioxAudioFormatSchema = import_zod11.z.enum([
36735
36354
  "auto",
36736
36355
  "aac",
36737
36356
  "aiff",
@@ -36741,10 +36360,7 @@ var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
36741
36360
  "mp3",
36742
36361
  "ogg",
36743
36362
  "wav",
36744
- "webm"
36745
- ]);
36746
- var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
36747
- // Signed PCM
36363
+ "webm",
36748
36364
  "pcm_s8",
36749
36365
  "pcm_s16le",
36750
36366
  "pcm_s16be",
@@ -36752,7 +36368,6 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
36752
36368
  "pcm_s24be",
36753
36369
  "pcm_s32le",
36754
36370
  "pcm_s32be",
36755
- // Unsigned PCM
36756
36371
  "pcm_u8",
36757
36372
  "pcm_u16le",
36758
36373
  "pcm_u16be",
@@ -36760,86 +36375,81 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
36760
36375
  "pcm_u24be",
36761
36376
  "pcm_u32le",
36762
36377
  "pcm_u32be",
36763
- // Float PCM
36764
36378
  "pcm_f32le",
36765
36379
  "pcm_f32be",
36766
36380
  "pcm_f64le",
36767
36381
  "pcm_f64be",
36768
- // Companded
36769
36382
  "mulaw",
36770
36383
  "alaw"
36771
36384
  ]);
36772
- var sonioxAudioFormatSchema = import_zod11.z.union([
36773
- sonioxAutoDetectedAudioFormatSchema,
36774
- sonioxPcmAudioEncodingSchema
36775
- ]);
36776
36385
  var sonioxOneWayTranslationSchema = import_zod11.z.object({
36777
36386
  type: import_zod11.z.literal("one_way"),
36778
- target_language: import_zod11.z.string().describe("Target language code for translation")
36387
+ target_language: import_zod11.z.string()
36779
36388
  });
36780
36389
  var sonioxTwoWayTranslationSchema = import_zod11.z.object({
36781
36390
  type: import_zod11.z.literal("two_way"),
36782
- language_a: import_zod11.z.string().describe("First language for bidirectional translation"),
36783
- language_b: import_zod11.z.string().describe("Second language for bidirectional translation")
36391
+ language_a: import_zod11.z.string(),
36392
+ language_b: import_zod11.z.string()
36784
36393
  });
36785
36394
  var sonioxTranslationConfigSchema = import_zod11.z.union([
36786
36395
  sonioxOneWayTranslationSchema,
36787
36396
  sonioxTwoWayTranslationSchema
36788
36397
  ]);
36789
36398
  var sonioxContextGeneralItemSchema = import_zod11.z.object({
36790
- key: import_zod11.z.string().describe("Context item key (e.g. 'Domain')"),
36791
- value: import_zod11.z.string().describe("Context item value (e.g. 'medicine')")
36399
+ key: import_zod11.z.string(),
36400
+ value: import_zod11.z.string()
36792
36401
  });
36793
36402
  var sonioxTranslationTermSchema = import_zod11.z.object({
36794
- source: import_zod11.z.string().describe("Source term"),
36795
- target: import_zod11.z.string().describe("Target term to translate to")
36403
+ source: import_zod11.z.string(),
36404
+ target: import_zod11.z.string()
36796
36405
  });
36797
36406
  var sonioxStructuredContextSchema = import_zod11.z.object({
36798
- general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional().describe("General context items (key-value pairs)"),
36799
- text: import_zod11.z.string().optional().describe("Text context"),
36800
- terms: import_zod11.z.array(import_zod11.z.string()).optional().describe("Terms that might occur in speech"),
36801
- translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional().describe("Hints how to translate specific terms (ignored if translation is not enabled)")
36407
+ general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional(),
36408
+ text: import_zod11.z.string().optional(),
36409
+ terms: import_zod11.z.array(import_zod11.z.string()).optional(),
36410
+ translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
36802
36411
  });
36803
36412
  var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
36804
36413
  var sonioxRealtimeModelSchema = import_zod11.z.enum([
36414
+ "stt-rt-v4",
36805
36415
  "stt-rt-v3",
36806
36416
  "stt-rt-preview",
36807
36417
  "stt-rt-v3-preview",
36808
36418
  "stt-rt-preview-v2"
36809
36419
  ]);
36810
36420
  var streamingTranscriberParams3 = import_zod11.z.object({
36811
- model: sonioxRealtimeModelSchema.describe("Real-time model to use"),
36812
- audioFormat: sonioxAudioFormatSchema.optional().describe("Audio format specification. Use 'auto' for automatic detection"),
36813
- sampleRate: import_zod11.z.number().optional().describe("Sample rate in Hz (required for raw PCM formats)"),
36814
- numChannels: import_zod11.z.number().min(1).max(2).optional().describe("Number of audio channels (1 for mono, 2 for stereo) - required for raw PCM formats"),
36815
- languageHints: import_zod11.z.array(import_zod11.z.string()).optional().describe("Expected languages in the audio (ISO language codes)"),
36816
- context: sonioxContextSchema.optional().describe("Additional context to improve transcription accuracy"),
36817
- enableSpeakerDiarization: import_zod11.z.boolean().optional().describe("Enable speaker diarization - each token will include a speaker field"),
36818
- enableLanguageIdentification: import_zod11.z.boolean().optional().describe("Enable language identification - each token will include a language field"),
36819
- enableEndpointDetection: import_zod11.z.boolean().optional().describe("Enable endpoint detection to detect when a speaker has finished talking"),
36820
- translation: sonioxTranslationConfigSchema.optional().describe("Translation configuration"),
36821
- clientReferenceId: import_zod11.z.string().optional().describe("Optional tracking identifier (client-defined)")
36822
- });
36823
- var sonioxTranslationStatusSchema = import_zod11.z.enum(["none", "original", "translation"]);
36421
+ model: sonioxRealtimeModelSchema,
36422
+ audioFormat: sonioxAudioFormatSchema.optional(),
36423
+ sampleRate: import_zod11.z.number().optional(),
36424
+ numChannels: import_zod11.z.number().optional(),
36425
+ languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
36426
+ context: sonioxContextSchema.optional(),
36427
+ enableSpeakerDiarization: import_zod11.z.boolean().optional(),
36428
+ enableLanguageIdentification: import_zod11.z.boolean().optional(),
36429
+ enableEndpointDetection: import_zod11.z.boolean().optional(),
36430
+ translation: sonioxTranslationConfigSchema.optional(),
36431
+ clientReferenceId: import_zod11.z.string().optional()
36432
+ });
36433
+ var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
36824
36434
  var sonioxTokenSchema = import_zod11.z.object({
36825
- text: import_zod11.z.string().describe("Token text content (subword, word, or space)"),
36826
- start_ms: import_zod11.z.number().optional().describe("Start time of the token in milliseconds"),
36827
- end_ms: import_zod11.z.number().optional().describe("End time of the token in milliseconds"),
36828
- confidence: import_zod11.z.number().min(0).max(1).optional().describe("Confidence score between 0.0 and 1.0"),
36829
- is_final: import_zod11.z.boolean().describe("Whether this token is final (confirmed) or provisional"),
36830
- speaker: import_zod11.z.string().optional().describe("Speaker identifier (only present when speaker diarization is enabled)"),
36831
- language: import_zod11.z.string().optional().describe("Detected language code (only present when language identification is enabled)"),
36832
- source_language: import_zod11.z.string().optional().describe("Original language code for translated tokens"),
36833
- translation_status: sonioxTranslationStatusSchema.optional().describe("Translation status: 'none', 'original', or 'translation'")
36435
+ text: import_zod11.z.string(),
36436
+ start_ms: import_zod11.z.number().optional(),
36437
+ end_ms: import_zod11.z.number().optional(),
36438
+ confidence: import_zod11.z.number(),
36439
+ is_final: import_zod11.z.boolean(),
36440
+ speaker: import_zod11.z.string().optional(),
36441
+ translation_status: sonioxTranslationStatusSchema.optional(),
36442
+ language: import_zod11.z.string().optional(),
36443
+ source_language: import_zod11.z.string().optional()
36834
36444
  });
36835
36445
  var sonioxStreamingResponseSchema = import_zod11.z.object({
36836
- text: import_zod11.z.string().optional().describe("Complete transcribed text"),
36837
- tokens: import_zod11.z.array(sonioxTokenSchema).describe("List of recognized tokens"),
36838
- final_audio_proc_ms: import_zod11.z.number().optional().describe("Milliseconds of audio processed into final tokens"),
36839
- total_audio_proc_ms: import_zod11.z.number().optional().describe("Milliseconds of audio processed (final + non-final)"),
36840
- finished: import_zod11.z.boolean().optional().describe("Whether the transcription is complete"),
36841
- error: import_zod11.z.string().optional().describe("Error message if an error occurred"),
36842
- error_code: import_zod11.z.number().optional().describe("Error code if an error occurred")
36446
+ text: import_zod11.z.string(),
36447
+ tokens: import_zod11.z.array(sonioxTokenSchema),
36448
+ final_audio_proc_ms: import_zod11.z.number(),
36449
+ total_audio_proc_ms: import_zod11.z.number(),
36450
+ finished: import_zod11.z.boolean().optional(),
36451
+ error_code: import_zod11.z.number().optional(),
36452
+ error_message: import_zod11.z.string().optional()
36843
36453
  });
36844
36454
  var sonioxRecorderStateSchema = import_zod11.z.enum([
36845
36455
  "Init",
@@ -37405,8 +37015,8 @@ var BatchOnlyProviders = AllProviders.filter(
37405
37015
  );
37406
37016
 
37407
37017
  // src/generated/deepgram/schema/index.ts
37408
- var schema_exports4 = {};
37409
- __export(schema_exports4, {
37018
+ var schema_exports5 = {};
37019
+ __export(schema_exports5, {
37410
37020
  V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
37411
37021
  V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
37412
37022
  V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
@@ -37661,8 +37271,8 @@ var V1SpeakPostParametersSampleRate = {
37661
37271
  };
37662
37272
 
37663
37273
  // src/generated/openai/schema/index.ts
37664
- var schema_exports5 = {};
37665
- __export(schema_exports5, {
37274
+ var schema_exports6 = {};
37275
+ __export(schema_exports6, {
37666
37276
  AudioResponseFormat: () => AudioResponseFormat,
37667
37277
  CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
37668
37278
  CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
@@ -37956,6 +37566,16 @@ var ToolChoiceOptions = {
37956
37566
  required: "required"
37957
37567
  };
37958
37568
 
37569
+ // src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
37570
+ var TranscriptionDiarizedSegmentType = {
37571
+ transcripttextsegment: "transcript.text.segment"
37572
+ };
37573
+
37574
+ // src/generated/openai/schema/transcriptionInclude.ts
37575
+ var TranscriptionInclude = {
37576
+ logprobs: "logprobs"
37577
+ };
37578
+
37959
37579
  // src/generated/openai/schema/transcriptTextDeltaEventType.ts
37960
37580
  var TranscriptTextDeltaEventType = {
37961
37581
  transcripttextdelta: "transcript.text.delta"
@@ -37981,16 +37601,6 @@ var TranscriptTextUsageTokensType = {
37981
37601
  tokens: "tokens"
37982
37602
  };
37983
37603
 
37984
- // src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
37985
- var TranscriptionDiarizedSegmentType = {
37986
- transcripttextsegment: "transcript.text.segment"
37987
- };
37988
-
37989
- // src/generated/openai/schema/transcriptionInclude.ts
37990
- var TranscriptionInclude = {
37991
- logprobs: "logprobs"
37992
- };
37993
-
37994
37604
  // src/generated/openai/schema/vadConfigType.ts
37995
37605
  var VadConfigType = {
37996
37606
  server_vad: "server_vad"
@@ -38002,8 +37612,8 @@ var VoiceResourceObject = {
38002
37612
  };
38003
37613
 
38004
37614
  // src/generated/speechmatics/schema/index.ts
38005
- var schema_exports6 = {};
38006
- __export(schema_exports6, {
37615
+ var schema_exports7 = {};
37616
+ __export(schema_exports7, {
38007
37617
  AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
38008
37618
  ErrorResponseError: () => ErrorResponseError,
38009
37619
  GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
@@ -38192,32 +37802,6 @@ var WrittenFormRecognitionResultType = {
38192
37802
  word: "word"
38193
37803
  };
38194
37804
 
38195
- // src/generated/soniox/schema/index.ts
38196
- var schema_exports7 = {};
38197
- __export(schema_exports7, {
38198
- TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
38199
- TranscriptionMode: () => TranscriptionMode,
38200
- TranscriptionStatus: () => TranscriptionStatus,
38201
- TranslationConfigType: () => TranslationConfigType
38202
- });
38203
-
38204
- // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
38205
- var TemporaryApiKeyUsageType = {
38206
- transcribe_websocket: "transcribe_websocket"
38207
- };
38208
-
38209
- // src/generated/soniox/schema/transcriptionMode.ts
38210
- var TranscriptionMode = {
38211
- real_time: "real_time",
38212
- async: "async"
38213
- };
38214
-
38215
- // src/generated/soniox/schema/translationConfigType.ts
38216
- var TranslationConfigType = {
38217
- one_way: "one_way",
38218
- two_way: "two_way"
38219
- };
38220
-
38221
37805
  // src/generated/elevenlabs/schema/index.ts
38222
37806
  var schema_exports8 = {};
38223
37807
  __export(schema_exports8, {
@@ -38372,8 +37956,8 @@ var getJobsQueryParams = import_zod12.z.object({
38372
37956
  var getJobsResponseJobsItemDurationMin = 0;
38373
37957
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38374
37958
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38375
- var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38376
- var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
37959
+ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
37960
+ var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38377
37961
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38378
37962
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38379
37963
  var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38571,8 +38155,8 @@ var getJobsJobidParams = import_zod12.z.object({
38571
38155
  var getJobsJobidResponseJobDurationMin = 0;
38572
38156
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38573
38157
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38574
- var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38575
- var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
38158
+ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38159
+ var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38576
38160
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38577
38161
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38578
38162
  var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38769,8 +38353,8 @@ var deleteJobsJobidQueryParams = import_zod12.z.object({
38769
38353
  var deleteJobsJobidResponseJobDurationMin = 0;
38770
38354
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38771
38355
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38772
- var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38773
- var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
38356
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38357
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38774
38358
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38775
38359
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38776
38360
  var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
@@ -38973,8 +38557,8 @@ var getJobsJobidTranscriptQueryParams = import_zod12.z.object({
38973
38557
  var getJobsJobidTranscriptResponseJobDurationMin = 0;
38974
38558
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
38975
38559
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38976
- var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = new RegExp("^(.|all)$");
38977
- var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = new RegExp("^[A-Za-z0-9._]+$");
38560
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38561
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38978
38562
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38979
38563
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38980
38564
  var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;