voice-router-dev 0.8.9 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +83 -0
- package/dist/constants.d.mts +23 -21
- package/dist/constants.d.ts +23 -21
- package/dist/constants.js +24 -24
- package/dist/constants.mjs +24 -24
- package/dist/{field-configs-CSOt3yc9.d.mts → field-configs-CH0lgAe8.d.mts} +5691 -6202
- package/dist/{field-configs-CSOt3yc9.d.ts → field-configs-CH0lgAe8.d.ts} +5691 -6202
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +160 -247
- package/dist/field-configs.mjs +160 -247
- package/dist/index.d.mts +2362 -2810
- package/dist/index.d.ts +2362 -2810
- package/dist/index.js +531 -947
- package/dist/index.mjs +531 -947
- package/dist/{speechToTextChunkResponseModel-B4kVoFc3.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +2405 -1918
- package/dist/{speechToTextChunkResponseModel-DmajV4F-.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +2405 -1918
- package/dist/webhooks.d.mts +70 -2
- package/dist/webhooks.d.ts +70 -2
- package/dist/webhooks.js +113 -1
- package/dist/webhooks.mjs +111 -1
- package/package.json +2 -1
package/dist/index.mjs
CHANGED
|
@@ -754,60 +754,60 @@ var SonioxLanguage = {
|
|
|
754
754
|
// src/generated/soniox/models.ts
|
|
755
755
|
var SonioxModels = [
|
|
756
756
|
{ id: "stt-rt-v4", name: "Speech-to-Text Real-time v4", mode: "real_time" },
|
|
757
|
-
{ id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time" },
|
|
758
757
|
{ id: "stt-async-v4", name: "Speech-to-Text Async v4", mode: "async" },
|
|
759
|
-
{ id: "stt-
|
|
760
|
-
{ id: "stt-
|
|
761
|
-
{ id: "stt-
|
|
762
|
-
{ id: "stt-rt-
|
|
763
|
-
{ id: "stt-
|
|
764
|
-
{ id: "stt-
|
|
758
|
+
{ id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
759
|
+
{ id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v4" },
|
|
760
|
+
{ id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
761
|
+
{ id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
762
|
+
{ id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v4" },
|
|
763
|
+
{ id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
764
|
+
{ id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async", aliasOf: "stt-async-v4" }
|
|
765
765
|
];
|
|
766
766
|
var SonioxModelCodes = [
|
|
767
767
|
"stt-rt-v4",
|
|
768
|
-
"stt-rt-v3",
|
|
769
768
|
"stt-async-v4",
|
|
770
|
-
"stt-async-v3",
|
|
771
769
|
"stt-rt-preview",
|
|
772
770
|
"stt-async-preview",
|
|
773
771
|
"stt-rt-v3-preview",
|
|
774
772
|
"stt-rt-preview-v2",
|
|
775
|
-
"stt-async-preview-v1"
|
|
773
|
+
"stt-async-preview-v1",
|
|
774
|
+
"stt-rt-v3",
|
|
775
|
+
"stt-async-v3"
|
|
776
776
|
];
|
|
777
777
|
var SonioxModelLabels = {
|
|
778
778
|
"stt-rt-v4": "Speech-to-Text Real-time v4",
|
|
779
|
-
"stt-rt-v3": "Speech-to-Text Real-time v3",
|
|
780
779
|
"stt-async-v4": "Speech-to-Text Async v4",
|
|
781
|
-
"stt-async-v3": "Speech-to-Text Async v3",
|
|
782
780
|
"stt-rt-preview": "Speech-to-Text Real-time Preview",
|
|
783
781
|
"stt-async-preview": "Speech-to-Text Async Preview",
|
|
784
782
|
"stt-rt-v3-preview": "Speech-to-Text Real-time v3 Preview",
|
|
785
783
|
"stt-rt-preview-v2": "Speech-to-Text Real-time Preview v2",
|
|
786
|
-
"stt-async-preview-v1": "Speech-to-Text Async Preview v1"
|
|
784
|
+
"stt-async-preview-v1": "Speech-to-Text Async Preview v1",
|
|
785
|
+
"stt-rt-v3": "Speech-to-Text Real-time v3",
|
|
786
|
+
"stt-async-v3": "Speech-to-Text Async v3"
|
|
787
787
|
};
|
|
788
788
|
var SonioxModel = {
|
|
789
789
|
stt_rt_v4: "stt-rt-v4",
|
|
790
|
-
stt_rt_v3: "stt-rt-v3",
|
|
791
790
|
stt_async_v4: "stt-async-v4",
|
|
792
|
-
stt_async_v3: "stt-async-v3",
|
|
793
791
|
stt_rt_preview: "stt-rt-preview",
|
|
794
792
|
stt_async_preview: "stt-async-preview",
|
|
795
793
|
stt_rt_v3_preview: "stt-rt-v3-preview",
|
|
796
794
|
stt_rt_preview_v2: "stt-rt-preview-v2",
|
|
797
|
-
stt_async_preview_v1: "stt-async-preview-v1"
|
|
795
|
+
stt_async_preview_v1: "stt-async-preview-v1",
|
|
796
|
+
stt_rt_v3: "stt-rt-v3",
|
|
797
|
+
stt_async_v3: "stt-async-v3"
|
|
798
798
|
};
|
|
799
799
|
var SonioxRealtimeModel = {
|
|
800
800
|
stt_rt_v4: "stt-rt-v4",
|
|
801
|
-
stt_rt_v3: "stt-rt-v3",
|
|
802
801
|
stt_rt_preview: "stt-rt-preview",
|
|
803
802
|
stt_rt_v3_preview: "stt-rt-v3-preview",
|
|
804
|
-
stt_rt_preview_v2: "stt-rt-preview-v2"
|
|
803
|
+
stt_rt_preview_v2: "stt-rt-preview-v2",
|
|
804
|
+
stt_rt_v3: "stt-rt-v3"
|
|
805
805
|
};
|
|
806
806
|
var SonioxAsyncModel = {
|
|
807
807
|
stt_async_v4: "stt-async-v4",
|
|
808
|
-
stt_async_v3: "stt-async-v3",
|
|
809
808
|
stt_async_preview: "stt-async-preview",
|
|
810
|
-
stt_async_preview_v1: "stt-async-preview-v1"
|
|
809
|
+
stt_async_preview_v1: "stt-async-preview-v1",
|
|
810
|
+
stt_async_v3: "stt-async-v3"
|
|
811
811
|
};
|
|
812
812
|
|
|
813
813
|
// src/generated/speechmatics/languages.ts
|
|
@@ -3782,17 +3782,17 @@ var SummaryTypesEnum = {
|
|
|
3782
3782
|
concise: "concise"
|
|
3783
3783
|
};
|
|
3784
3784
|
|
|
3785
|
-
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
3786
|
-
var TranscriptMessageType = {
|
|
3787
|
-
transcript: "transcript"
|
|
3788
|
-
};
|
|
3789
|
-
|
|
3790
3785
|
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
3791
3786
|
var TranscriptionControllerListV2KindItem = {
|
|
3792
3787
|
"pre-recorded": "pre-recorded",
|
|
3793
3788
|
live: "live"
|
|
3794
3789
|
};
|
|
3795
3790
|
|
|
3791
|
+
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
3792
|
+
var TranscriptMessageType = {
|
|
3793
|
+
transcript: "transcript"
|
|
3794
|
+
};
|
|
3795
|
+
|
|
3796
3796
|
// src/generated/gladia/schema/translationMessageType.ts
|
|
3797
3797
|
var TranslationMessageType = {
|
|
3798
3798
|
translation: "translation"
|
|
@@ -4064,7 +4064,7 @@ var WebhookTranscriptionSuccessPayloadEvent = {
|
|
|
4064
4064
|
|
|
4065
4065
|
// src/generated/gladia/api/gladiaControlAPI.ts
|
|
4066
4066
|
var preRecordedControllerInitPreRecordedJobV2 = (initTranscriptionRequest, options) => {
|
|
4067
|
-
return axios.post(
|
|
4067
|
+
return axios.post("/v2/pre-recorded", initTranscriptionRequest, options);
|
|
4068
4068
|
};
|
|
4069
4069
|
var preRecordedControllerGetPreRecordedJobV2 = (id, options) => {
|
|
4070
4070
|
return axios.get(`/v2/pre-recorded/${id}`, options);
|
|
@@ -4079,13 +4079,13 @@ var preRecordedControllerGetAudioV2 = (id, options) => {
|
|
|
4079
4079
|
});
|
|
4080
4080
|
};
|
|
4081
4081
|
var transcriptionControllerListV2 = (params, options) => {
|
|
4082
|
-
return axios.get(
|
|
4082
|
+
return axios.get("/v2/transcription", {
|
|
4083
4083
|
...options,
|
|
4084
4084
|
params: { ...params, ...options?.params }
|
|
4085
4085
|
});
|
|
4086
4086
|
};
|
|
4087
4087
|
var streamingControllerInitStreamingSessionV2 = (streamingRequest, params, options) => {
|
|
4088
|
-
return axios.post(
|
|
4088
|
+
return axios.post("/v2/live", streamingRequest, {
|
|
4089
4089
|
...options,
|
|
4090
4090
|
params: { ...params, ...options?.params }
|
|
4091
4091
|
});
|
|
@@ -4351,7 +4351,6 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
4351
4351
|
sentiment: result?.sentiment_analysis || void 0,
|
|
4352
4352
|
audioToLlm: result?.audio_to_llm || void 0,
|
|
4353
4353
|
chapters: result?.chapterization || void 0,
|
|
4354
|
-
speakerReidentification: result?.speaker_reidentification || void 0,
|
|
4355
4354
|
structuredData: result?.structured_data_extraction || void 0,
|
|
4356
4355
|
customMetadata: response.custom_metadata || void 0
|
|
4357
4356
|
},
|
|
@@ -5413,17 +5412,17 @@ var PiiPolicy = {
|
|
|
5413
5412
|
zodiac_sign: "zodiac_sign"
|
|
5414
5413
|
};
|
|
5415
5414
|
|
|
5415
|
+
// src/generated/assemblyai/schema/redactedAudioStatus.ts
|
|
5416
|
+
var RedactedAudioStatus = {
|
|
5417
|
+
redacted_audio_ready: "redacted_audio_ready"
|
|
5418
|
+
};
|
|
5419
|
+
|
|
5416
5420
|
// src/generated/assemblyai/schema/redactPiiAudioQuality.ts
|
|
5417
5421
|
var RedactPiiAudioQuality = {
|
|
5418
5422
|
mp3: "mp3",
|
|
5419
5423
|
wav: "wav"
|
|
5420
5424
|
};
|
|
5421
5425
|
|
|
5422
|
-
// src/generated/assemblyai/schema/redactedAudioStatus.ts
|
|
5423
|
-
var RedactedAudioStatus = {
|
|
5424
|
-
redacted_audio_ready: "redacted_audio_ready"
|
|
5425
|
-
};
|
|
5426
|
-
|
|
5427
5426
|
// src/generated/assemblyai/schema/sentiment.ts
|
|
5428
5427
|
var Sentiment = {
|
|
5429
5428
|
POSITIVE: "POSITIVE",
|
|
@@ -5487,10 +5486,10 @@ var TranscriptRemoveAudioTags = {
|
|
|
5487
5486
|
|
|
5488
5487
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
5489
5488
|
var createTranscript = (transcriptParams, options) => {
|
|
5490
|
-
return axios2.post(
|
|
5489
|
+
return axios2.post("/v2/transcript", transcriptParams, options);
|
|
5491
5490
|
};
|
|
5492
5491
|
var listTranscripts = (params, options) => {
|
|
5493
|
-
return axios2.get(
|
|
5492
|
+
return axios2.get("/v2/transcript", {
|
|
5494
5493
|
...options,
|
|
5495
5494
|
params: { ...params, ...options?.params }
|
|
5496
5495
|
});
|
|
@@ -5836,23 +5835,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
5836
5835
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
5837
5836
|
);
|
|
5838
5837
|
}
|
|
5839
|
-
const
|
|
5840
|
-
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
5844
|
-
|
|
5838
|
+
const passthrough = options?.assemblyai;
|
|
5839
|
+
let speechModels;
|
|
5840
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
5841
|
+
speechModels = [passthrough.speech_model];
|
|
5842
|
+
} else if (passthrough?.speech_models) {
|
|
5843
|
+
speechModels = passthrough.speech_models;
|
|
5845
5844
|
}
|
|
5845
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
5846
5846
|
const request = {
|
|
5847
|
-
...
|
|
5847
|
+
...typedOpts,
|
|
5848
5848
|
audio_url: audioUrl,
|
|
5849
5849
|
// speech_models is required — default to universal-3-pro
|
|
5850
|
-
speech_models:
|
|
5851
|
-
"universal-3-pro"
|
|
5852
|
-
],
|
|
5850
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
5853
5851
|
// Enable punctuation and formatting by default
|
|
5854
|
-
punctuate:
|
|
5855
|
-
format_text:
|
|
5852
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
5853
|
+
format_text: typedOpts.format_text ?? true
|
|
5856
5854
|
};
|
|
5857
5855
|
if (options) {
|
|
5858
5856
|
if (options.model) {
|
|
@@ -6567,8 +6565,10 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6567
6565
|
/**
|
|
6568
6566
|
* Submit audio for transcription
|
|
6569
6567
|
*
|
|
6570
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6571
|
-
* synchronously and returns results immediately
|
|
6568
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6569
|
+
* synchronously and returns results immediately. When `webhookUrl` is set,
|
|
6570
|
+
* Deepgram can instead return an async callback acknowledgment containing a
|
|
6571
|
+
* request ID.
|
|
6572
6572
|
*
|
|
6573
6573
|
* @param audio - Audio input (URL or file buffer)
|
|
6574
6574
|
* @param options - Transcription options
|
|
@@ -6619,17 +6619,59 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6619
6619
|
{ params }
|
|
6620
6620
|
).then((res) => res.data);
|
|
6621
6621
|
} else if (audio.type === "file") {
|
|
6622
|
-
response = await this.client.post(
|
|
6623
|
-
|
|
6624
|
-
|
|
6625
|
-
|
|
6622
|
+
response = await this.client.post(
|
|
6623
|
+
"/listen",
|
|
6624
|
+
audio.file,
|
|
6625
|
+
{
|
|
6626
|
+
params,
|
|
6627
|
+
headers: {
|
|
6628
|
+
"Content-Type": "audio/*"
|
|
6629
|
+
}
|
|
6626
6630
|
}
|
|
6627
|
-
|
|
6631
|
+
).then((res) => res.data);
|
|
6628
6632
|
} else {
|
|
6629
6633
|
throw new Error(
|
|
6630
6634
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6631
6635
|
);
|
|
6632
6636
|
}
|
|
6637
|
+
if (options?.webhookUrl) {
|
|
6638
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6639
|
+
if (!requestId) {
|
|
6640
|
+
return {
|
|
6641
|
+
success: false,
|
|
6642
|
+
provider: this.name,
|
|
6643
|
+
error: {
|
|
6644
|
+
code: "MISSING_REQUEST_ID",
|
|
6645
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6646
|
+
},
|
|
6647
|
+
raw: response
|
|
6648
|
+
};
|
|
6649
|
+
}
|
|
6650
|
+
return {
|
|
6651
|
+
success: true,
|
|
6652
|
+
provider: this.name,
|
|
6653
|
+
data: {
|
|
6654
|
+
id: requestId,
|
|
6655
|
+
text: "",
|
|
6656
|
+
status: "queued"
|
|
6657
|
+
},
|
|
6658
|
+
tracking: {
|
|
6659
|
+
requestId
|
|
6660
|
+
},
|
|
6661
|
+
raw: response
|
|
6662
|
+
};
|
|
6663
|
+
}
|
|
6664
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6665
|
+
return {
|
|
6666
|
+
success: false,
|
|
6667
|
+
provider: this.name,
|
|
6668
|
+
error: {
|
|
6669
|
+
code: "INVALID_RESPONSE",
|
|
6670
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6671
|
+
},
|
|
6672
|
+
raw: response
|
|
6673
|
+
};
|
|
6674
|
+
}
|
|
6633
6675
|
return this.normalizeResponse(response);
|
|
6634
6676
|
} catch (error) {
|
|
6635
6677
|
return this.createErrorResponse(error);
|
|
@@ -7290,7 +7332,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7290
7332
|
break;
|
|
7291
7333
|
}
|
|
7292
7334
|
case "Metadata": {
|
|
7293
|
-
|
|
7335
|
+
const { type: _, ...metadata } = message;
|
|
7336
|
+
callbacks?.onMetadata?.(metadata);
|
|
7294
7337
|
break;
|
|
7295
7338
|
}
|
|
7296
7339
|
case "Error": {
|
|
@@ -7627,13 +7670,13 @@ var TextNormalizationKind = {
|
|
|
7627
7670
|
|
|
7628
7671
|
// src/generated/azure/api/speechServicesAPIVersion32.ts
|
|
7629
7672
|
var transcriptionsList = (params, options) => {
|
|
7630
|
-
return axios4.get(
|
|
7673
|
+
return axios4.get("/transcriptions", {
|
|
7631
7674
|
...options,
|
|
7632
7675
|
params: { ...params, ...options?.params }
|
|
7633
7676
|
});
|
|
7634
7677
|
};
|
|
7635
7678
|
var transcriptionsCreate = (transcription, options) => {
|
|
7636
|
-
return axios4.post(
|
|
7679
|
+
return axios4.post("/transcriptions", transcription, options);
|
|
7637
7680
|
};
|
|
7638
7681
|
var transcriptionsGet = (id, options) => {
|
|
7639
7682
|
return axios4.get(`/transcriptions/${id}`, options);
|
|
@@ -7648,13 +7691,13 @@ var transcriptionsListFiles = (id, params, options) => {
|
|
|
7648
7691
|
});
|
|
7649
7692
|
};
|
|
7650
7693
|
var webHooksList = (params, options) => {
|
|
7651
|
-
return axios4.get(
|
|
7694
|
+
return axios4.get("/webhooks", {
|
|
7652
7695
|
...options,
|
|
7653
7696
|
params: { ...params, ...options?.params }
|
|
7654
7697
|
});
|
|
7655
7698
|
};
|
|
7656
7699
|
var webHooksCreate = (webHook, options) => {
|
|
7657
|
-
return axios4.post(
|
|
7700
|
+
return axios4.post("/webhooks", webHook, options);
|
|
7658
7701
|
};
|
|
7659
7702
|
var webHooksDelete = (id, options) => {
|
|
7660
7703
|
return axios4.delete(`/webhooks/${id}`, options);
|
|
@@ -7726,10 +7769,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7726
7769
|
contentUrls: [audio.url],
|
|
7727
7770
|
properties: this.buildTranscriptionProperties(options)
|
|
7728
7771
|
};
|
|
7729
|
-
const response = await transcriptionsCreate(
|
|
7730
|
-
transcriptionRequest,
|
|
7731
|
-
this.getAxiosConfig()
|
|
7732
|
-
);
|
|
7772
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7733
7773
|
const transcription = response.data;
|
|
7734
7774
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7735
7775
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -7782,7 +7822,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7782
7822
|
this.getAxiosConfig()
|
|
7783
7823
|
);
|
|
7784
7824
|
const files = filesResponse.data?.values || [];
|
|
7785
|
-
const resultFile = files.find((file) => file.kind ===
|
|
7825
|
+
const resultFile = files.find((file) => file.kind === FileKind.Transcription);
|
|
7786
7826
|
if (!resultFile?.links?.contentUrl) {
|
|
7787
7827
|
return {
|
|
7788
7828
|
success: false,
|
|
@@ -8006,15 +8046,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
8006
8046
|
return properties;
|
|
8007
8047
|
}
|
|
8008
8048
|
/**
|
|
8009
|
-
* Normalize Azure status to unified status
|
|
8049
|
+
* Normalize Azure status to unified status using generated AzureStatus constants
|
|
8010
8050
|
*/
|
|
8011
8051
|
normalizeStatus(status) {
|
|
8012
|
-
|
|
8013
|
-
|
|
8014
|
-
|
|
8015
|
-
|
|
8016
|
-
|
|
8017
|
-
|
|
8052
|
+
switch (status) {
|
|
8053
|
+
case Status.Succeeded:
|
|
8054
|
+
return "completed";
|
|
8055
|
+
case Status.Running:
|
|
8056
|
+
return "processing";
|
|
8057
|
+
case Status.Failed:
|
|
8058
|
+
return "error";
|
|
8059
|
+
case Status.NotStarted:
|
|
8060
|
+
default:
|
|
8061
|
+
return "queued";
|
|
8062
|
+
}
|
|
8018
8063
|
}
|
|
8019
8064
|
/**
|
|
8020
8065
|
* Normalize Azure transcription response to unified format
|
|
@@ -8134,30 +8179,30 @@ function getAzureOpenAIRealtimeUrl(endpoint, deployment, apiVersion = "2024-10-0
|
|
|
8134
8179
|
import axios6 from "axios";
|
|
8135
8180
|
var createTranscription = (createTranscriptionRequest, options) => {
|
|
8136
8181
|
const formData = new FormData();
|
|
8137
|
-
formData.append(
|
|
8138
|
-
formData.append(
|
|
8182
|
+
formData.append("file", createTranscriptionRequest.file);
|
|
8183
|
+
formData.append("model", createTranscriptionRequest.model);
|
|
8139
8184
|
if (createTranscriptionRequest.language !== void 0) {
|
|
8140
|
-
formData.append(
|
|
8185
|
+
formData.append("language", createTranscriptionRequest.language);
|
|
8141
8186
|
}
|
|
8142
8187
|
if (createTranscriptionRequest.prompt !== void 0) {
|
|
8143
|
-
formData.append(
|
|
8188
|
+
formData.append("prompt", createTranscriptionRequest.prompt);
|
|
8144
8189
|
}
|
|
8145
8190
|
if (createTranscriptionRequest.response_format !== void 0) {
|
|
8146
|
-
formData.append(
|
|
8191
|
+
formData.append("response_format", createTranscriptionRequest.response_format);
|
|
8147
8192
|
}
|
|
8148
8193
|
if (createTranscriptionRequest.temperature !== void 0) {
|
|
8149
|
-
formData.append(
|
|
8194
|
+
formData.append("temperature", createTranscriptionRequest.temperature.toString());
|
|
8150
8195
|
}
|
|
8151
8196
|
if (createTranscriptionRequest.include !== void 0) {
|
|
8152
|
-
createTranscriptionRequest.include.forEach((value) => formData.append(
|
|
8197
|
+
createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
|
|
8153
8198
|
}
|
|
8154
8199
|
if (createTranscriptionRequest.timestamp_granularities !== void 0) {
|
|
8155
8200
|
createTranscriptionRequest.timestamp_granularities.forEach(
|
|
8156
|
-
(value) => formData.append(
|
|
8201
|
+
(value) => formData.append("timestamp_granularities", value)
|
|
8157
8202
|
);
|
|
8158
8203
|
}
|
|
8159
8204
|
if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
|
|
8160
|
-
formData.append(
|
|
8205
|
+
formData.append("stream", createTranscriptionRequest.stream.toString());
|
|
8161
8206
|
}
|
|
8162
8207
|
if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
|
|
8163
8208
|
formData.append(
|
|
@@ -8167,15 +8212,15 @@ var createTranscription = (createTranscriptionRequest, options) => {
|
|
|
8167
8212
|
}
|
|
8168
8213
|
if (createTranscriptionRequest.known_speaker_names !== void 0) {
|
|
8169
8214
|
createTranscriptionRequest.known_speaker_names.forEach(
|
|
8170
|
-
(value) => formData.append(
|
|
8215
|
+
(value) => formData.append("known_speaker_names", value)
|
|
8171
8216
|
);
|
|
8172
8217
|
}
|
|
8173
8218
|
if (createTranscriptionRequest.known_speaker_references !== void 0) {
|
|
8174
8219
|
createTranscriptionRequest.known_speaker_references.forEach(
|
|
8175
|
-
(value) => formData.append(
|
|
8220
|
+
(value) => formData.append("known_speaker_references", value)
|
|
8176
8221
|
);
|
|
8177
8222
|
}
|
|
8178
|
-
return axios6.post(
|
|
8223
|
+
return axios6.post("/audio/transcriptions", formData, options);
|
|
8179
8224
|
};
|
|
8180
8225
|
|
|
8181
8226
|
// src/generated/openai/schema/createTranscriptionRequestTimestampGranularitiesItem.ts
|
|
@@ -8264,7 +8309,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8264
8309
|
const request = {
|
|
8265
8310
|
...options?.openai,
|
|
8266
8311
|
file: audioData,
|
|
8267
|
-
// Generated type expects Blob
|
|
8268
8312
|
model
|
|
8269
8313
|
};
|
|
8270
8314
|
if (options?.language) {
|
|
@@ -8691,7 +8735,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8691
8735
|
|
|
8692
8736
|
// src/adapters/speechmatics-adapter.ts
|
|
8693
8737
|
import axios8 from "axios";
|
|
8694
|
-
import WebSocket6 from "ws";
|
|
8695
8738
|
|
|
8696
8739
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8697
8740
|
var NotificationConfigContentsItem = {
|
|
@@ -8741,7 +8784,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8741
8784
|
super(...arguments);
|
|
8742
8785
|
this.name = "speechmatics";
|
|
8743
8786
|
this.capabilities = {
|
|
8744
|
-
streaming:
|
|
8787
|
+
streaming: false,
|
|
8788
|
+
// Batch only (streaming available via separate WebSocket API)
|
|
8745
8789
|
diarization: true,
|
|
8746
8790
|
wordTimestamps: true,
|
|
8747
8791
|
languageDetection: false,
|
|
@@ -8876,16 +8920,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8876
8920
|
jobConfig.fetch_data = {
|
|
8877
8921
|
url: audio.url
|
|
8878
8922
|
};
|
|
8879
|
-
|
|
8880
|
-
|
|
8881
|
-
requestBody = formData;
|
|
8882
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
8923
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
8924
|
+
headers = { "Content-Type": "application/json" };
|
|
8883
8925
|
} else if (audio.type === "file") {
|
|
8884
|
-
|
|
8885
|
-
|
|
8886
|
-
|
|
8887
|
-
|
|
8888
|
-
requestBody = formData;
|
|
8926
|
+
requestBody = {
|
|
8927
|
+
config: JSON.stringify(jobConfig),
|
|
8928
|
+
data_file: audio.file
|
|
8929
|
+
};
|
|
8889
8930
|
headers = { "Content-Type": "multipart/form-data" };
|
|
8890
8931
|
} else {
|
|
8891
8932
|
return {
|
|
@@ -8990,389 +9031,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8990
9031
|
throw error;
|
|
8991
9032
|
}
|
|
8992
9033
|
}
|
|
8993
|
-
/**
|
|
8994
|
-
* Build WebSocket URL for real-time streaming
|
|
8995
|
-
*
|
|
8996
|
-
* Note: Real-time API uses a different host from the batch API:
|
|
8997
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
8998
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
8999
|
-
*
|
|
9000
|
-
* @param region - Regional endpoint identifier
|
|
9001
|
-
* @returns WebSocket URL for real-time API
|
|
9002
|
-
*/
|
|
9003
|
-
getRegionalWsUrl(region) {
|
|
9004
|
-
if (this.config?.wsBaseUrl) {
|
|
9005
|
-
return this.config.wsBaseUrl;
|
|
9006
|
-
}
|
|
9007
|
-
const rtRegionMap = {
|
|
9008
|
-
eu1: "eu",
|
|
9009
|
-
eu2: "eu",
|
|
9010
|
-
us1: "us",
|
|
9011
|
-
us2: "us",
|
|
9012
|
-
au1: "eu"
|
|
9013
|
-
// No AU RT endpoint — fall back to EU
|
|
9014
|
-
};
|
|
9015
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9016
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9017
|
-
}
|
|
9018
|
-
/**
|
|
9019
|
-
* Stream audio for real-time transcription via WebSocket
|
|
9020
|
-
*
|
|
9021
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9022
|
-
* for transcription with results returned via callbacks.
|
|
9023
|
-
*
|
|
9024
|
-
* @param options - Streaming configuration options
|
|
9025
|
-
* @param callbacks - Event callbacks for transcription results
|
|
9026
|
-
* @returns Promise that resolves with a StreamingSession
|
|
9027
|
-
*
|
|
9028
|
-
* @example Basic streaming
|
|
9029
|
-
* ```typescript
|
|
9030
|
-
* const session = await adapter.transcribeStream({
|
|
9031
|
-
* language: 'en',
|
|
9032
|
-
* speechmaticsStreaming: {
|
|
9033
|
-
* enablePartials: true,
|
|
9034
|
-
* operatingPoint: 'enhanced'
|
|
9035
|
-
* }
|
|
9036
|
-
* }, {
|
|
9037
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9038
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9039
|
-
* onError: (error) => console.error(error)
|
|
9040
|
-
* });
|
|
9041
|
-
*
|
|
9042
|
-
* await session.sendAudio({ data: audioBuffer });
|
|
9043
|
-
* await session.close();
|
|
9044
|
-
* ```
|
|
9045
|
-
*/
|
|
9046
|
-
async transcribeStream(options, callbacks) {
|
|
9047
|
-
this.validateConfig();
|
|
9048
|
-
const smOpts = options?.speechmaticsStreaming || {};
|
|
9049
|
-
const region = smOpts.region || this.config?.region;
|
|
9050
|
-
const wsUrl = this.getRegionalWsUrl(region);
|
|
9051
|
-
const ws = new WebSocket6(wsUrl, {
|
|
9052
|
-
headers: {
|
|
9053
|
-
Authorization: `Bearer ${this.config.apiKey}`
|
|
9054
|
-
}
|
|
9055
|
-
});
|
|
9056
|
-
let sessionStatus = "connecting";
|
|
9057
|
-
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9058
|
-
let seqNo = 0;
|
|
9059
|
-
let utteranceResults = [];
|
|
9060
|
-
const sessionReady = new Promise((resolve, reject) => {
|
|
9061
|
-
const timeout = setTimeout(() => {
|
|
9062
|
-
reject(new Error("WebSocket connection timeout"));
|
|
9063
|
-
}, 1e4);
|
|
9064
|
-
let wsOpen = false;
|
|
9065
|
-
ws.once("error", (error) => {
|
|
9066
|
-
clearTimeout(timeout);
|
|
9067
|
-
reject(error);
|
|
9068
|
-
});
|
|
9069
|
-
ws.once("open", () => {
|
|
9070
|
-
wsOpen = true;
|
|
9071
|
-
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9072
|
-
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9073
|
-
const startMsg = {
|
|
9074
|
-
message: "StartRecognition",
|
|
9075
|
-
audio_format: {
|
|
9076
|
-
type: "raw",
|
|
9077
|
-
encoding,
|
|
9078
|
-
sample_rate: sampleRate
|
|
9079
|
-
},
|
|
9080
|
-
transcription_config: {
|
|
9081
|
-
language: smOpts.language || options?.language || "en",
|
|
9082
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9083
|
-
}
|
|
9084
|
-
};
|
|
9085
|
-
const txConfig = startMsg.transcription_config;
|
|
9086
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9087
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9088
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9089
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9090
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9091
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9092
|
-
txConfig.diarization = "speaker";
|
|
9093
|
-
if (smOpts.maxSpeakers) {
|
|
9094
|
-
txConfig.speaker_diarization_config = {
|
|
9095
|
-
max_speakers: smOpts.maxSpeakers
|
|
9096
|
-
};
|
|
9097
|
-
} else if (options?.speakersExpected) {
|
|
9098
|
-
txConfig.speaker_diarization_config = {
|
|
9099
|
-
max_speakers: options.speakersExpected
|
|
9100
|
-
};
|
|
9101
|
-
}
|
|
9102
|
-
}
|
|
9103
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9104
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9105
|
-
content: word
|
|
9106
|
-
}));
|
|
9107
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9108
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9109
|
-
content: word
|
|
9110
|
-
}));
|
|
9111
|
-
}
|
|
9112
|
-
if (smOpts.conversationConfig) {
|
|
9113
|
-
txConfig.conversation_config = {
|
|
9114
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9115
|
-
};
|
|
9116
|
-
}
|
|
9117
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9118
|
-
if (callbacks?.onRawMessage) {
|
|
9119
|
-
callbacks.onRawMessage({
|
|
9120
|
-
provider: "speechmatics",
|
|
9121
|
-
direction: "outgoing",
|
|
9122
|
-
timestamp: Date.now(),
|
|
9123
|
-
payload: startPayload,
|
|
9124
|
-
messageType: "StartRecognition"
|
|
9125
|
-
});
|
|
9126
|
-
}
|
|
9127
|
-
ws.send(startPayload);
|
|
9128
|
-
});
|
|
9129
|
-
const onMessage = (data) => {
|
|
9130
|
-
const rawPayload = data.toString();
|
|
9131
|
-
try {
|
|
9132
|
-
const msg = JSON.parse(rawPayload);
|
|
9133
|
-
if (msg.message === "RecognitionStarted") {
|
|
9134
|
-
clearTimeout(timeout);
|
|
9135
|
-
ws.removeListener("message", onMessage);
|
|
9136
|
-
ws.emit("message", data);
|
|
9137
|
-
resolve();
|
|
9138
|
-
} else if (msg.message === "Error") {
|
|
9139
|
-
clearTimeout(timeout);
|
|
9140
|
-
ws.removeListener("message", onMessage);
|
|
9141
|
-
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9142
|
-
}
|
|
9143
|
-
} catch {
|
|
9144
|
-
}
|
|
9145
|
-
};
|
|
9146
|
-
ws.on("message", onMessage);
|
|
9147
|
-
});
|
|
9148
|
-
ws.on("message", (data) => {
|
|
9149
|
-
const rawPayload = data.toString();
|
|
9150
|
-
try {
|
|
9151
|
-
const message = JSON.parse(rawPayload);
|
|
9152
|
-
if (callbacks?.onRawMessage) {
|
|
9153
|
-
callbacks.onRawMessage({
|
|
9154
|
-
provider: "speechmatics",
|
|
9155
|
-
direction: "incoming",
|
|
9156
|
-
timestamp: Date.now(),
|
|
9157
|
-
payload: rawPayload,
|
|
9158
|
-
messageType: message.message
|
|
9159
|
-
});
|
|
9160
|
-
}
|
|
9161
|
-
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9162
|
-
} catch (error) {
|
|
9163
|
-
if (callbacks?.onRawMessage) {
|
|
9164
|
-
callbacks.onRawMessage({
|
|
9165
|
-
provider: "speechmatics",
|
|
9166
|
-
direction: "incoming",
|
|
9167
|
-
timestamp: Date.now(),
|
|
9168
|
-
payload: rawPayload,
|
|
9169
|
-
messageType: "parse_error"
|
|
9170
|
-
});
|
|
9171
|
-
}
|
|
9172
|
-
callbacks?.onError?.({
|
|
9173
|
-
code: "PARSE_ERROR",
|
|
9174
|
-
message: "Failed to parse WebSocket message",
|
|
9175
|
-
details: error
|
|
9176
|
-
});
|
|
9177
|
-
}
|
|
9178
|
-
});
|
|
9179
|
-
ws.on("error", (error) => {
|
|
9180
|
-
callbacks?.onError?.({
|
|
9181
|
-
code: "WEBSOCKET_ERROR",
|
|
9182
|
-
message: error.message,
|
|
9183
|
-
details: error
|
|
9184
|
-
});
|
|
9185
|
-
});
|
|
9186
|
-
ws.on("close", (code, reason) => {
|
|
9187
|
-
sessionStatus = "closed";
|
|
9188
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9189
|
-
});
|
|
9190
|
-
await sessionReady;
|
|
9191
|
-
sessionStatus = "open";
|
|
9192
|
-
callbacks?.onOpen?.();
|
|
9193
|
-
return {
|
|
9194
|
-
id: sessionId,
|
|
9195
|
-
provider: this.name,
|
|
9196
|
-
createdAt: /* @__PURE__ */ new Date(),
|
|
9197
|
-
getStatus: () => sessionStatus,
|
|
9198
|
-
sendAudio: async (chunk) => {
|
|
9199
|
-
if (sessionStatus !== "open") {
|
|
9200
|
-
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9201
|
-
}
|
|
9202
|
-
if (ws.readyState !== WebSocket6.OPEN) {
|
|
9203
|
-
throw new Error("WebSocket is not open");
|
|
9204
|
-
}
|
|
9205
|
-
if (callbacks?.onRawMessage) {
|
|
9206
|
-
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9207
|
-
chunk.data.byteOffset,
|
|
9208
|
-
chunk.data.byteOffset + chunk.data.byteLength
|
|
9209
|
-
);
|
|
9210
|
-
callbacks.onRawMessage({
|
|
9211
|
-
provider: this.name,
|
|
9212
|
-
direction: "outgoing",
|
|
9213
|
-
timestamp: Date.now(),
|
|
9214
|
-
payload: audioPayload,
|
|
9215
|
-
messageType: "audio"
|
|
9216
|
-
});
|
|
9217
|
-
}
|
|
9218
|
-
ws.send(chunk.data);
|
|
9219
|
-
seqNo++;
|
|
9220
|
-
if (chunk.isLast) {
|
|
9221
|
-
const endMsg = JSON.stringify({
|
|
9222
|
-
message: "EndOfStream",
|
|
9223
|
-
last_seq_no: seqNo
|
|
9224
|
-
});
|
|
9225
|
-
if (callbacks?.onRawMessage) {
|
|
9226
|
-
callbacks.onRawMessage({
|
|
9227
|
-
provider: this.name,
|
|
9228
|
-
direction: "outgoing",
|
|
9229
|
-
timestamp: Date.now(),
|
|
9230
|
-
payload: endMsg,
|
|
9231
|
-
messageType: "EndOfStream"
|
|
9232
|
-
});
|
|
9233
|
-
}
|
|
9234
|
-
ws.send(endMsg);
|
|
9235
|
-
}
|
|
9236
|
-
},
|
|
9237
|
-
close: async () => {
|
|
9238
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9239
|
-
return;
|
|
9240
|
-
}
|
|
9241
|
-
sessionStatus = "closing";
|
|
9242
|
-
if (ws.readyState === WebSocket6.OPEN) {
|
|
9243
|
-
seqNo++;
|
|
9244
|
-
ws.send(
|
|
9245
|
-
JSON.stringify({
|
|
9246
|
-
message: "EndOfStream",
|
|
9247
|
-
last_seq_no: seqNo
|
|
9248
|
-
})
|
|
9249
|
-
);
|
|
9250
|
-
}
|
|
9251
|
-
return new Promise((resolve) => {
|
|
9252
|
-
const timeout = setTimeout(() => {
|
|
9253
|
-
ws.terminate();
|
|
9254
|
-
sessionStatus = "closed";
|
|
9255
|
-
resolve();
|
|
9256
|
-
}, 5e3);
|
|
9257
|
-
const onMsg = (data) => {
|
|
9258
|
-
try {
|
|
9259
|
-
const msg = JSON.parse(data.toString());
|
|
9260
|
-
if (msg.message === "EndOfTranscript") {
|
|
9261
|
-
ws.removeListener("message", onMsg);
|
|
9262
|
-
clearTimeout(timeout);
|
|
9263
|
-
ws.close();
|
|
9264
|
-
}
|
|
9265
|
-
} catch {
|
|
9266
|
-
}
|
|
9267
|
-
};
|
|
9268
|
-
ws.on("message", onMsg);
|
|
9269
|
-
ws.once("close", () => {
|
|
9270
|
-
clearTimeout(timeout);
|
|
9271
|
-
sessionStatus = "closed";
|
|
9272
|
-
resolve();
|
|
9273
|
-
});
|
|
9274
|
-
});
|
|
9275
|
-
}
|
|
9276
|
-
};
|
|
9277
|
-
}
|
|
9278
|
-
/**
|
|
9279
|
-
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9280
|
-
*/
|
|
9281
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9282
|
-
switch (message.message) {
|
|
9283
|
-
case "RecognitionStarted": {
|
|
9284
|
-
break;
|
|
9285
|
-
}
|
|
9286
|
-
case "AddPartialTranscript": {
|
|
9287
|
-
const results = message.results || [];
|
|
9288
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9289
|
-
if (text) {
|
|
9290
|
-
callbacks?.onTranscript?.({
|
|
9291
|
-
type: "transcript",
|
|
9292
|
-
text,
|
|
9293
|
-
isFinal: false,
|
|
9294
|
-
words: this.extractWordsFromResults(results),
|
|
9295
|
-
data: message
|
|
9296
|
-
});
|
|
9297
|
-
}
|
|
9298
|
-
break;
|
|
9299
|
-
}
|
|
9300
|
-
case "AddTranscript": {
|
|
9301
|
-
const results = message.results || [];
|
|
9302
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9303
|
-
if (utteranceResults) {
|
|
9304
|
-
utteranceResults.push(...results);
|
|
9305
|
-
}
|
|
9306
|
-
if (text) {
|
|
9307
|
-
callbacks?.onTranscript?.({
|
|
9308
|
-
type: "transcript",
|
|
9309
|
-
text,
|
|
9310
|
-
isFinal: true,
|
|
9311
|
-
words: this.extractWordsFromResults(results),
|
|
9312
|
-
data: message
|
|
9313
|
-
});
|
|
9314
|
-
}
|
|
9315
|
-
break;
|
|
9316
|
-
}
|
|
9317
|
-
case "EndOfUtterance": {
|
|
9318
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9319
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9320
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9321
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9322
|
-
if (utterances.length > 0) {
|
|
9323
|
-
for (const utt of utterances) {
|
|
9324
|
-
callbacks?.onUtterance?.(utt);
|
|
9325
|
-
}
|
|
9326
|
-
} else if (text) {
|
|
9327
|
-
callbacks?.onUtterance?.({
|
|
9328
|
-
text,
|
|
9329
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9330
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9331
|
-
words
|
|
9332
|
-
});
|
|
9333
|
-
}
|
|
9334
|
-
utteranceResults.length = 0;
|
|
9335
|
-
}
|
|
9336
|
-
break;
|
|
9337
|
-
}
|
|
9338
|
-
case "AudioAdded": {
|
|
9339
|
-
break;
|
|
9340
|
-
}
|
|
9341
|
-
case "EndOfTranscript": {
|
|
9342
|
-
break;
|
|
9343
|
-
}
|
|
9344
|
-
case "Info":
|
|
9345
|
-
case "Warning": {
|
|
9346
|
-
callbacks?.onMetadata?.(message);
|
|
9347
|
-
break;
|
|
9348
|
-
}
|
|
9349
|
-
case "Error": {
|
|
9350
|
-
const errMsg = message;
|
|
9351
|
-
callbacks?.onError?.({
|
|
9352
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9353
|
-
message: errMsg.reason || "Unknown error",
|
|
9354
|
-
details: message
|
|
9355
|
-
});
|
|
9356
|
-
break;
|
|
9357
|
-
}
|
|
9358
|
-
default: {
|
|
9359
|
-
callbacks?.onMetadata?.(message);
|
|
9360
|
-
break;
|
|
9361
|
-
}
|
|
9362
|
-
}
|
|
9363
|
-
}
|
|
9364
|
-
/**
|
|
9365
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9366
|
-
*/
|
|
9367
|
-
extractWordsFromResults(results) {
|
|
9368
|
-
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9369
|
-
word: result.alternatives?.[0]?.content || "",
|
|
9370
|
-
start: result.start_time,
|
|
9371
|
-
end: result.end_time,
|
|
9372
|
-
confidence: result.alternatives?.[0]?.confidence,
|
|
9373
|
-
speaker: result.alternatives?.[0]?.speaker
|
|
9374
|
-
}));
|
|
9375
|
-
}
|
|
9376
9034
|
/**
|
|
9377
9035
|
* Normalize Speechmatics status to unified status
|
|
9378
9036
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9441,9 +9099,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9441
9099
|
return adapter;
|
|
9442
9100
|
}
|
|
9443
9101
|
|
|
9444
|
-
// src/adapters/soniox-adapter.ts
|
|
9445
|
-
import axios9 from "axios";
|
|
9446
|
-
|
|
9447
9102
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9448
9103
|
var TranscriptionStatus = {
|
|
9449
9104
|
queued: "queued",
|
|
@@ -9452,6 +9107,57 @@ var TranscriptionStatus = {
|
|
|
9452
9107
|
error: "error"
|
|
9453
9108
|
};
|
|
9454
9109
|
|
|
9110
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9111
|
+
import axios9 from "axios";
|
|
9112
|
+
|
|
9113
|
+
// src/generated/soniox/schema/index.ts
|
|
9114
|
+
var schema_exports4 = {};
|
|
9115
|
+
__export(schema_exports4, {
|
|
9116
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9117
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9118
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9119
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9120
|
+
});
|
|
9121
|
+
|
|
9122
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9123
|
+
var TemporaryApiKeyUsageType = {
|
|
9124
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9125
|
+
};
|
|
9126
|
+
|
|
9127
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9128
|
+
var TranscriptionMode = {
|
|
9129
|
+
real_time: "real_time",
|
|
9130
|
+
async: "async"
|
|
9131
|
+
};
|
|
9132
|
+
|
|
9133
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9134
|
+
var TranslationConfigType = {
|
|
9135
|
+
one_way: "one_way",
|
|
9136
|
+
two_way: "two_way"
|
|
9137
|
+
};
|
|
9138
|
+
|
|
9139
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9140
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9141
|
+
const formData = new FormData();
|
|
9142
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9143
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9144
|
+
}
|
|
9145
|
+
formData.append("file", uploadFileBody2.file);
|
|
9146
|
+
return axios9.post("/v1/files", formData, options);
|
|
9147
|
+
};
|
|
9148
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9149
|
+
return axios9.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9150
|
+
};
|
|
9151
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9152
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9153
|
+
};
|
|
9154
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9155
|
+
return axios9.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9156
|
+
};
|
|
9157
|
+
var getModels = (options) => {
|
|
9158
|
+
return axios9.get("/v1/models", options);
|
|
9159
|
+
};
|
|
9160
|
+
|
|
9455
9161
|
// src/adapters/soniox-adapter.ts
|
|
9456
9162
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9457
9163
|
constructor() {
|
|
@@ -9506,11 +9212,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9506
9212
|
}
|
|
9507
9213
|
}
|
|
9508
9214
|
/**
|
|
9509
|
-
* Get the base URL for API requests
|
|
9215
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9510
9216
|
*/
|
|
9511
9217
|
get baseUrl() {
|
|
9512
9218
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9513
|
-
return `https://${this.getRegionalHost()}
|
|
9219
|
+
return `https://${this.getRegionalHost()}`;
|
|
9220
|
+
}
|
|
9221
|
+
/**
|
|
9222
|
+
* Build axios config with Soniox Bearer auth
|
|
9223
|
+
*/
|
|
9224
|
+
getAxiosConfig() {
|
|
9225
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9514
9226
|
}
|
|
9515
9227
|
initialize(config) {
|
|
9516
9228
|
super.initialize(config);
|
|
@@ -9520,15 +9232,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9520
9232
|
if (config.model) {
|
|
9521
9233
|
this.defaultModel = config.model;
|
|
9522
9234
|
}
|
|
9523
|
-
this.client = axios9.create({
|
|
9524
|
-
baseURL: this.baseUrl,
|
|
9525
|
-
timeout: config.timeout || 12e4,
|
|
9526
|
-
headers: {
|
|
9527
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9528
|
-
"Content-Type": "application/json",
|
|
9529
|
-
...config.headers
|
|
9530
|
-
}
|
|
9531
|
-
});
|
|
9532
9235
|
}
|
|
9533
9236
|
/**
|
|
9534
9237
|
* Get current region
|
|
@@ -9558,23 +9261,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9558
9261
|
*/
|
|
9559
9262
|
setRegion(region) {
|
|
9560
9263
|
this.region = region;
|
|
9561
|
-
if (this.config?.apiKey) {
|
|
9562
|
-
this.client = axios9.create({
|
|
9563
|
-
baseURL: this.baseUrl,
|
|
9564
|
-
timeout: this.config.timeout || 12e4,
|
|
9565
|
-
headers: {
|
|
9566
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9567
|
-
"Content-Type": "application/json",
|
|
9568
|
-
...this.config.headers
|
|
9569
|
-
}
|
|
9570
|
-
});
|
|
9571
|
-
}
|
|
9572
9264
|
}
|
|
9573
9265
|
/**
|
|
9574
9266
|
* Submit audio for transcription
|
|
9575
9267
|
*
|
|
9576
|
-
*
|
|
9577
|
-
*
|
|
9268
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9269
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9578
9270
|
*
|
|
9579
9271
|
* @param audio - Audio input (URL or file)
|
|
9580
9272
|
* @param options - Transcription options
|
|
@@ -9583,21 +9275,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9583
9275
|
async transcribe(audio, options) {
|
|
9584
9276
|
this.validateConfig();
|
|
9585
9277
|
try {
|
|
9586
|
-
const
|
|
9587
|
-
|
|
9588
|
-
};
|
|
9589
|
-
if (audio.type === "url") {
|
|
9590
|
-
requestBody.audio_url = audio.url;
|
|
9591
|
-
} else if (audio.type === "file") {
|
|
9592
|
-
const formData = new FormData();
|
|
9278
|
+
const sonioxOpts = options?.soniox;
|
|
9279
|
+
if (audio.type === "file") {
|
|
9593
9280
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9594
|
-
|
|
9595
|
-
const
|
|
9596
|
-
|
|
9597
|
-
|
|
9598
|
-
|
|
9599
|
-
|
|
9600
|
-
|
|
9281
|
+
const uploadBody = { file: audioBlob };
|
|
9282
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9283
|
+
const payload = {
|
|
9284
|
+
...sonioxOpts,
|
|
9285
|
+
model: options?.model || this.defaultModel,
|
|
9286
|
+
file_id: fileResp.data.id,
|
|
9287
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9288
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9289
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9290
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9291
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9292
|
+
};
|
|
9293
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9294
|
+
const meta = createResp.data;
|
|
9295
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9296
|
+
return this.normalizeTranscription(meta);
|
|
9297
|
+
}
|
|
9298
|
+
return this.pollForCompletion(meta.id);
|
|
9299
|
+
} else if (audio.type === "url") {
|
|
9300
|
+
const payload = {
|
|
9301
|
+
...sonioxOpts,
|
|
9302
|
+
model: options?.model || this.defaultModel,
|
|
9303
|
+
audio_url: audio.url,
|
|
9304
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9305
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9306
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9307
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9308
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9309
|
+
};
|
|
9310
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9311
|
+
const meta = createResp.data;
|
|
9312
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9313
|
+
return this.normalizeTranscription(meta);
|
|
9314
|
+
}
|
|
9315
|
+
return this.pollForCompletion(meta.id);
|
|
9601
9316
|
} else {
|
|
9602
9317
|
return {
|
|
9603
9318
|
success: false,
|
|
@@ -9608,23 +9323,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9608
9323
|
}
|
|
9609
9324
|
};
|
|
9610
9325
|
}
|
|
9611
|
-
if (options?.language) {
|
|
9612
|
-
requestBody.language_hints = [options.language];
|
|
9613
|
-
}
|
|
9614
|
-
if (options?.diarization) {
|
|
9615
|
-
requestBody.enable_speaker_diarization = true;
|
|
9616
|
-
}
|
|
9617
|
-
if (options?.languageDetection) {
|
|
9618
|
-
requestBody.enable_language_identification = true;
|
|
9619
|
-
}
|
|
9620
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9621
|
-
requestBody.context = {
|
|
9622
|
-
terms: options.customVocabulary
|
|
9623
|
-
};
|
|
9624
|
-
}
|
|
9625
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9626
|
-
const transcriptionId = response.data.id;
|
|
9627
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9628
9326
|
} catch (error) {
|
|
9629
9327
|
return this.createErrorResponse(error);
|
|
9630
9328
|
}
|
|
@@ -9632,9 +9330,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9632
9330
|
/**
|
|
9633
9331
|
* Get transcription result by ID
|
|
9634
9332
|
*
|
|
9635
|
-
*
|
|
9636
|
-
*
|
|
9637
|
-
* when completed.
|
|
9333
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9334
|
+
* Used by pollForCompletion() for async polling.
|
|
9638
9335
|
*
|
|
9639
9336
|
* @param transcriptId - Transcript ID
|
|
9640
9337
|
* @returns Transcription response
|
|
@@ -9642,39 +9339,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9642
9339
|
async getTranscript(transcriptId) {
|
|
9643
9340
|
this.validateConfig();
|
|
9644
9341
|
try {
|
|
9645
|
-
const
|
|
9646
|
-
const
|
|
9647
|
-
if (
|
|
9648
|
-
|
|
9649
|
-
|
|
9650
|
-
|
|
9651
|
-
|
|
9652
|
-
|
|
9653
|
-
|
|
9654
|
-
|
|
9655
|
-
|
|
9656
|
-
|
|
9657
|
-
if (job.status !== "completed") {
|
|
9658
|
-
return {
|
|
9659
|
-
success: true,
|
|
9660
|
-
provider: this.name,
|
|
9661
|
-
data: {
|
|
9662
|
-
id: job.id,
|
|
9663
|
-
text: "",
|
|
9664
|
-
status: job.status
|
|
9665
|
-
},
|
|
9666
|
-
raw: job
|
|
9667
|
-
};
|
|
9342
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9343
|
+
const meta = metaResp.data;
|
|
9344
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9345
|
+
try {
|
|
9346
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9347
|
+
transcriptId,
|
|
9348
|
+
this.getAxiosConfig()
|
|
9349
|
+
);
|
|
9350
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9351
|
+
} catch (transcriptError) {
|
|
9352
|
+
return this.createErrorResponse(transcriptError);
|
|
9353
|
+
}
|
|
9668
9354
|
}
|
|
9669
|
-
|
|
9670
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9671
|
-
);
|
|
9672
|
-
return this.normalizeResponse({
|
|
9673
|
-
...transcriptResponse.data,
|
|
9674
|
-
// Carry over job metadata
|
|
9675
|
-
id: job.id,
|
|
9676
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9677
|
-
});
|
|
9355
|
+
return this.normalizeTranscription(meta);
|
|
9678
9356
|
} catch (error) {
|
|
9679
9357
|
return this.createErrorResponse(error);
|
|
9680
9358
|
}
|
|
@@ -9694,51 +9372,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9694
9372
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9695
9373
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9696
9374
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9697
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9698
|
-
|
|
9699
|
-
const
|
|
9700
|
-
|
|
9701
|
-
|
|
9702
|
-
model: modelId
|
|
9703
|
-
};
|
|
9704
|
-
if (sonioxOpts?.audioFormat) {
|
|
9705
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9706
|
-
} else if (options?.encoding) {
|
|
9375
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9376
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9377
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9378
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9379
|
+
if (options?.encoding) {
|
|
9707
9380
|
const encodingMap = {
|
|
9708
9381
|
linear16: "pcm_s16le",
|
|
9709
9382
|
pcm: "pcm_s16le",
|
|
9710
9383
|
mulaw: "mulaw",
|
|
9711
9384
|
alaw: "alaw"
|
|
9712
9385
|
};
|
|
9713
|
-
|
|
9386
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9714
9387
|
}
|
|
9715
|
-
if (
|
|
9716
|
-
|
|
9388
|
+
if (options?.sampleRate) {
|
|
9389
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9717
9390
|
}
|
|
9718
|
-
if (
|
|
9719
|
-
|
|
9391
|
+
if (options?.channels) {
|
|
9392
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9720
9393
|
}
|
|
9394
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9721
9395
|
if (sonioxOpts) {
|
|
9722
9396
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9723
|
-
|
|
9397
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9724
9398
|
}
|
|
9725
9399
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9726
|
-
|
|
9400
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9727
9401
|
}
|
|
9728
9402
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9729
|
-
|
|
9403
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9730
9404
|
}
|
|
9731
9405
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9732
|
-
|
|
9406
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9733
9407
|
}
|
|
9734
9408
|
if (sonioxOpts.context) {
|
|
9735
|
-
|
|
9409
|
+
wsUrl.searchParams.set(
|
|
9410
|
+
"context",
|
|
9411
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9412
|
+
);
|
|
9736
9413
|
}
|
|
9737
9414
|
if (sonioxOpts.translation) {
|
|
9738
|
-
|
|
9415
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9739
9416
|
}
|
|
9740
9417
|
if (sonioxOpts.clientReferenceId) {
|
|
9741
|
-
|
|
9418
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9742
9419
|
}
|
|
9743
9420
|
}
|
|
9744
9421
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9747,33 +9424,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9747
9424
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9748
9425
|
);
|
|
9749
9426
|
}
|
|
9750
|
-
|
|
9427
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
9751
9428
|
}
|
|
9752
9429
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9753
|
-
|
|
9430
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9754
9431
|
}
|
|
9755
9432
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9756
|
-
|
|
9433
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9434
|
+
}
|
|
9435
|
+
if (options?.interimResults !== false) {
|
|
9757
9436
|
}
|
|
9758
9437
|
let status = "connecting";
|
|
9759
9438
|
let openedAt = null;
|
|
9760
9439
|
let receivedData = false;
|
|
9761
9440
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
|
|
9762
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9441
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
9763
9442
|
ws.onopen = () => {
|
|
9764
|
-
openedAt = Date.now();
|
|
9765
|
-
const initPayload = JSON.stringify(initMessage);
|
|
9766
|
-
if (callbacks?.onRawMessage) {
|
|
9767
|
-
callbacks.onRawMessage({
|
|
9768
|
-
provider: this.name,
|
|
9769
|
-
direction: "outgoing",
|
|
9770
|
-
timestamp: Date.now(),
|
|
9771
|
-
payload: initPayload,
|
|
9772
|
-
messageType: "init"
|
|
9773
|
-
});
|
|
9774
|
-
}
|
|
9775
|
-
ws.send(initPayload);
|
|
9776
9443
|
status = "open";
|
|
9444
|
+
openedAt = Date.now();
|
|
9777
9445
|
callbacks?.onOpen?.();
|
|
9778
9446
|
};
|
|
9779
9447
|
ws.onmessage = (event) => {
|
|
@@ -9782,7 +9450,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9782
9450
|
let messageType;
|
|
9783
9451
|
try {
|
|
9784
9452
|
const data = JSON.parse(rawPayload);
|
|
9785
|
-
|
|
9453
|
+
const errorMessage = data.error_message || data.error;
|
|
9454
|
+
if (errorMessage) {
|
|
9786
9455
|
messageType = "error";
|
|
9787
9456
|
} else if (data.finished) {
|
|
9788
9457
|
messageType = "finished";
|
|
@@ -9798,10 +9467,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9798
9467
|
messageType
|
|
9799
9468
|
});
|
|
9800
9469
|
}
|
|
9801
|
-
if (
|
|
9470
|
+
if (errorMessage) {
|
|
9802
9471
|
callbacks?.onError?.({
|
|
9803
9472
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
9804
|
-
message:
|
|
9473
|
+
message: errorMessage
|
|
9805
9474
|
});
|
|
9806
9475
|
return;
|
|
9807
9476
|
}
|
|
@@ -9815,7 +9484,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9815
9484
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
9816
9485
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
9817
9486
|
confidence: token.confidence,
|
|
9818
|
-
speaker: token.speaker
|
|
9487
|
+
speaker: token.speaker ?? void 0
|
|
9819
9488
|
}));
|
|
9820
9489
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
9821
9490
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -9824,8 +9493,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9824
9493
|
text,
|
|
9825
9494
|
isFinal,
|
|
9826
9495
|
words,
|
|
9827
|
-
speaker: data.tokens[0]?.speaker,
|
|
9828
|
-
language: data.tokens[0]?.language,
|
|
9496
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9497
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
9829
9498
|
confidence: data.tokens[0]?.confidence
|
|
9830
9499
|
};
|
|
9831
9500
|
callbacks?.onTranscript?.(event2);
|
|
@@ -9852,10 +9521,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9852
9521
|
ws.onclose = (event) => {
|
|
9853
9522
|
status = "closed";
|
|
9854
9523
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
9855
|
-
const
|
|
9856
|
-
if (
|
|
9524
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
9525
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
9857
9526
|
const errorMessage = [
|
|
9858
|
-
"Soniox closed connection
|
|
9527
|
+
"Soniox closed connection immediately after opening.",
|
|
9859
9528
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
9860
9529
|
"Likely causes:",
|
|
9861
9530
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -9941,7 +9610,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9941
9610
|
async getModels() {
|
|
9942
9611
|
this.validateConfig();
|
|
9943
9612
|
try {
|
|
9944
|
-
const response = await this.
|
|
9613
|
+
const response = await getModels(this.getAxiosConfig());
|
|
9945
9614
|
return response.data.models || [];
|
|
9946
9615
|
} catch (error) {
|
|
9947
9616
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -9968,55 +9637,82 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9968
9637
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
9969
9638
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
9970
9639
|
confidence: token.confidence,
|
|
9971
|
-
speaker: token.speaker
|
|
9640
|
+
speaker: token.speaker ?? void 0
|
|
9972
9641
|
}));
|
|
9973
9642
|
return buildUtterancesFromWords(words);
|
|
9974
9643
|
}
|
|
9975
9644
|
/**
|
|
9976
|
-
* Normalize
|
|
9645
|
+
* Normalize v1 API response to unified format
|
|
9646
|
+
*
|
|
9647
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
9648
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
9977
9649
|
*/
|
|
9978
|
-
|
|
9979
|
-
|
|
9980
|
-
|
|
9981
|
-
|
|
9982
|
-
|
|
9650
|
+
normalizeTranscription(meta, transcript) {
|
|
9651
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
9652
|
+
return {
|
|
9653
|
+
success: false,
|
|
9654
|
+
provider: this.name,
|
|
9655
|
+
data: {
|
|
9656
|
+
id: meta.id,
|
|
9657
|
+
text: "",
|
|
9658
|
+
status: "error"
|
|
9659
|
+
},
|
|
9660
|
+
error: {
|
|
9661
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
9662
|
+
message: meta.error_message || "Transcription failed"
|
|
9663
|
+
},
|
|
9664
|
+
raw: { meta, transcript }
|
|
9665
|
+
};
|
|
9666
|
+
}
|
|
9667
|
+
if (!transcript) {
|
|
9668
|
+
return {
|
|
9669
|
+
success: true,
|
|
9670
|
+
provider: this.name,
|
|
9671
|
+
data: {
|
|
9672
|
+
id: meta.id,
|
|
9673
|
+
text: "",
|
|
9674
|
+
status: meta.status,
|
|
9675
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
9676
|
+
},
|
|
9677
|
+
raw: { meta }
|
|
9678
|
+
};
|
|
9679
|
+
}
|
|
9680
|
+
const tokens = transcript.tokens || [];
|
|
9681
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
9682
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
9983
9683
|
word: token.text,
|
|
9984
9684
|
start: token.start_ms / 1e3,
|
|
9985
9685
|
end: token.end_ms / 1e3,
|
|
9986
9686
|
confidence: token.confidence,
|
|
9987
|
-
speaker: token.speaker
|
|
9988
|
-
}))
|
|
9687
|
+
speaker: token.speaker ?? void 0
|
|
9688
|
+
}));
|
|
9989
9689
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
9990
|
-
|
|
9991
|
-
|
|
9992
|
-
|
|
9993
|
-
});
|
|
9994
|
-
}
|
|
9690
|
+
tokens.forEach((t) => {
|
|
9691
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
9692
|
+
});
|
|
9995
9693
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
9996
9694
|
id,
|
|
9997
9695
|
label: `Speaker ${id}`
|
|
9998
9696
|
})) : void 0;
|
|
9999
|
-
const
|
|
10000
|
-
const
|
|
10001
|
-
const language = response.tokens?.find((t) => t.language)?.language;
|
|
9697
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
9698
|
+
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10002
9699
|
return {
|
|
10003
9700
|
success: true,
|
|
10004
9701
|
provider: this.name,
|
|
10005
9702
|
data: {
|
|
10006
|
-
id:
|
|
9703
|
+
id: meta.id,
|
|
10007
9704
|
text,
|
|
10008
9705
|
status: TranscriptionStatus.completed,
|
|
10009
9706
|
language,
|
|
10010
|
-
duration:
|
|
9707
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10011
9708
|
speakers,
|
|
10012
9709
|
words: words.length > 0 ? words : void 0,
|
|
10013
9710
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10014
9711
|
},
|
|
10015
9712
|
tracking: {
|
|
10016
|
-
requestId:
|
|
10017
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
9713
|
+
requestId: meta.id
|
|
10018
9714
|
},
|
|
10019
|
-
raw:
|
|
9715
|
+
raw: { meta, transcript }
|
|
10020
9716
|
};
|
|
10021
9717
|
}
|
|
10022
9718
|
};
|
|
@@ -10440,7 +10136,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10440
10136
|
* - Multi-channel: `MultichannelSpeechToTextResponseModel` with `transcripts[]`
|
|
10441
10137
|
*/
|
|
10442
10138
|
normalizeResponse(response) {
|
|
10443
|
-
const chunks =
|
|
10139
|
+
const chunks = "transcripts" in response ? response.transcripts : [response];
|
|
10444
10140
|
const text = chunks.map((c) => c.text).join(" ");
|
|
10445
10141
|
const words = [];
|
|
10446
10142
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
@@ -10834,9 +10530,7 @@ var listenTranscribeQueryParams = zod.object({
|
|
|
10834
10530
|
),
|
|
10835
10531
|
dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
10836
10532
|
encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
10837
|
-
filler_words: zod.boolean().optional().describe(
|
|
10838
|
-
'Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'
|
|
10839
|
-
),
|
|
10533
|
+
filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
10840
10534
|
keyterm: zod.array(zod.string()).optional().describe(
|
|
10841
10535
|
"Key term prompting can boost or suppress specialized terminology and brands. Only compatible with Nova-3"
|
|
10842
10536
|
),
|
|
@@ -11540,6 +11234,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11540
11234
|
createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault,
|
|
11541
11235
|
createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault,
|
|
11542
11236
|
createTranscriptBodySummarizationDefault: () => createTranscriptBodySummarizationDefault,
|
|
11237
|
+
createTranscriptBodyTemperatureDefault: () => createTranscriptBodyTemperatureDefault,
|
|
11543
11238
|
createTranscriptResponse: () => createTranscriptResponse,
|
|
11544
11239
|
createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault,
|
|
11545
11240
|
createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault,
|
|
@@ -11609,6 +11304,7 @@ var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
|
11609
11304
|
var createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault = true;
|
|
11610
11305
|
var createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
11611
11306
|
var createTranscriptBodySummarizationDefault = false;
|
|
11307
|
+
var createTranscriptBodyTemperatureDefault = 0;
|
|
11612
11308
|
var createTranscriptBodyCustomTopicsDefault = false;
|
|
11613
11309
|
var createTranscriptBody = zod3.object({
|
|
11614
11310
|
audio_end_at: zod3.number().optional().describe(
|
|
@@ -11618,10 +11314,10 @@ var createTranscriptBody = zod3.object({
|
|
|
11618
11314
|
"The point in time, in milliseconds, to begin transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
|
|
11619
11315
|
),
|
|
11620
11316
|
auto_chapters: zod3.boolean().optional().describe(
|
|
11621
|
-
"Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
11317
|
+
"Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
11622
11318
|
),
|
|
11623
11319
|
auto_highlights: zod3.boolean().optional().describe(
|
|
11624
|
-
"Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
11320
|
+
"Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights), either true or false"
|
|
11625
11321
|
),
|
|
11626
11322
|
content_safety: zod3.boolean().optional().describe(
|
|
11627
11323
|
"Enable [Content Moderation](https://www.assemblyai.com/docs/content-moderation), can be true or false"
|
|
@@ -11637,16 +11333,16 @@ var createTranscriptBody = zod3.object({
|
|
|
11637
11333
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
11638
11334
|
)
|
|
11639
11335
|
).optional().describe(
|
|
11640
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11336
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11641
11337
|
),
|
|
11642
11338
|
disfluencies: zod3.boolean().optional().describe(
|
|
11643
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
11339
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11644
11340
|
),
|
|
11645
11341
|
domain: zod3.string().nullish().describe(
|
|
11646
11342
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
11647
11343
|
),
|
|
11648
11344
|
entity_detection: zod3.boolean().optional().describe(
|
|
11649
|
-
"Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11345
|
+
"Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript), can be true or false"
|
|
11650
11346
|
),
|
|
11651
11347
|
filter_profanity: zod3.boolean().optional().describe(
|
|
11652
11348
|
"Filter profanity from the transcribed text, can be true or false. See [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) for more details."
|
|
@@ -11655,7 +11351,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11655
11351
|
"Enable [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
|
|
11656
11352
|
),
|
|
11657
11353
|
iab_categories: zod3.boolean().optional().describe(
|
|
11658
|
-
"Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11354
|
+
"Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics), can be true or false"
|
|
11659
11355
|
),
|
|
11660
11356
|
keyterms_prompt: zod3.array(zod3.string()).optional().describe(
|
|
11661
11357
|
"Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
|
|
@@ -11903,7 +11599,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11903
11599
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
11904
11600
|
),
|
|
11905
11601
|
multichannel: zod3.boolean().optional().describe(
|
|
11906
|
-
"Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11602
|
+
"Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) transcription, can be true or false."
|
|
11907
11603
|
),
|
|
11908
11604
|
prompt: zod3.string().optional().describe(
|
|
11909
11605
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -11986,23 +11682,23 @@ var createTranscriptBody = zod3.object({
|
|
|
11986
11682
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11987
11683
|
),
|
|
11988
11684
|
sentiment_analysis: zod3.boolean().optional().describe(
|
|
11989
|
-
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
11685
|
+
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
11990
11686
|
),
|
|
11991
11687
|
speaker_labels: zod3.boolean().optional().describe(
|
|
11992
|
-
"Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11688
|
+
"Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers), can be true or false"
|
|
11993
11689
|
),
|
|
11994
11690
|
speaker_options: zod3.object({
|
|
11995
11691
|
min_speakers_expected: zod3.number().default(createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault).describe(
|
|
11996
|
-
"The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11692
|
+
"The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details."
|
|
11997
11693
|
),
|
|
11998
11694
|
max_speakers_expected: zod3.number().optional().describe(
|
|
11999
|
-
"<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11695
|
+
"<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details.\n"
|
|
12000
11696
|
)
|
|
12001
11697
|
}).optional().describe(
|
|
12002
|
-
"Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11698
|
+
"Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
|
|
12003
11699
|
),
|
|
12004
11700
|
speakers_expected: zod3.number().nullish().describe(
|
|
12005
|
-
"Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11701
|
+
"Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
12006
11702
|
),
|
|
12007
11703
|
speech_models: zod3.array(
|
|
12008
11704
|
zod3.string().describe(
|
|
@@ -12078,7 +11774,7 @@ var createTranscriptBody = zod3.object({
|
|
|
12078
11774
|
"Enable speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
|
|
12079
11775
|
),
|
|
12080
11776
|
summarization: zod3.boolean().optional().describe(
|
|
12081
|
-
"Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
11777
|
+
"Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12082
11778
|
),
|
|
12083
11779
|
summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12084
11780
|
summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
@@ -12087,6 +11783,9 @@ var createTranscriptBody = zod3.object({
|
|
|
12087
11783
|
).or(zod3.null()).optional().describe(
|
|
12088
11784
|
'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12089
11785
|
),
|
|
11786
|
+
temperature: zod3.number().optional().describe(
|
|
11787
|
+
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
11788
|
+
),
|
|
12090
11789
|
webhook_auth_header_name: zod3.string().nullish().describe(
|
|
12091
11790
|
"The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
|
|
12092
11791
|
),
|
|
@@ -12108,7 +11807,7 @@ var createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
|
|
|
12108
11807
|
var createTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
12109
11808
|
var createTranscriptResponse = zod3.object({
|
|
12110
11809
|
audio_channels: zod3.number().optional().describe(
|
|
12111
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11810
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
12112
11811
|
),
|
|
12113
11812
|
audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
12114
11813
|
audio_end_at: zod3.number().nullish().describe(
|
|
@@ -12119,10 +11818,10 @@ var createTranscriptResponse = zod3.object({
|
|
|
12119
11818
|
),
|
|
12120
11819
|
audio_url: zod3.string().describe("The URL of the media that was transcribed"),
|
|
12121
11820
|
auto_chapters: zod3.boolean().nullish().describe(
|
|
12122
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
11821
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12123
11822
|
),
|
|
12124
11823
|
auto_highlights: zod3.boolean().describe(
|
|
12125
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
11824
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
12126
11825
|
),
|
|
12127
11826
|
auto_highlights_result: zod3.object({
|
|
12128
11827
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -12142,9 +11841,9 @@ var createTranscriptResponse = zod3.object({
|
|
|
12142
11841
|
})
|
|
12143
11842
|
).describe("A temporally-sequential array of Key Phrases")
|
|
12144
11843
|
}).describe(
|
|
12145
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
11844
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12146
11845
|
).or(zod3.null()).optional().describe(
|
|
12147
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
11846
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12148
11847
|
),
|
|
12149
11848
|
chapters: zod3.array(
|
|
12150
11849
|
zod3.object({
|
|
@@ -12157,7 +11856,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12157
11856
|
end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
|
|
12158
11857
|
}).describe("Chapter of the audio file")
|
|
12159
11858
|
).nullish().describe(
|
|
12160
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
11859
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
12161
11860
|
),
|
|
12162
11861
|
confidence: zod3.number().nullish().describe(
|
|
12163
11862
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -12213,10 +11912,10 @@ var createTranscriptResponse = zod3.object({
|
|
|
12213
11912
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
12214
11913
|
)
|
|
12215
11914
|
).nullish().describe(
|
|
12216
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11915
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12217
11916
|
),
|
|
12218
11917
|
disfluencies: zod3.boolean().nullish().describe(
|
|
12219
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
11918
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12220
11919
|
),
|
|
12221
11920
|
domain: zod3.string().nullish().describe(
|
|
12222
11921
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12278,10 +11977,10 @@ var createTranscriptResponse = zod3.object({
|
|
|
12278
11977
|
)
|
|
12279
11978
|
}).describe("A detected entity")
|
|
12280
11979
|
).nullish().describe(
|
|
12281
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11980
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
12282
11981
|
),
|
|
12283
11982
|
entity_detection: zod3.boolean().nullish().describe(
|
|
12284
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11983
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
12285
11984
|
),
|
|
12286
11985
|
error: zod3.string().optional().describe("Error message of why the transcript failed"),
|
|
12287
11986
|
filter_profanity: zod3.boolean().nullish().describe(
|
|
@@ -12291,7 +11990,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12291
11990
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
12292
11991
|
),
|
|
12293
11992
|
iab_categories: zod3.boolean().nullish().describe(
|
|
12294
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11993
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
12295
11994
|
),
|
|
12296
11995
|
iab_categories_result: zod3.object({
|
|
12297
11996
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -12314,9 +12013,9 @@ var createTranscriptResponse = zod3.object({
|
|
|
12314
12013
|
).describe("An array of results for the Topic Detection model"),
|
|
12315
12014
|
summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
|
|
12316
12015
|
}).describe(
|
|
12317
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12016
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
12318
12017
|
).or(zod3.null()).optional().describe(
|
|
12319
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12018
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
12320
12019
|
),
|
|
12321
12020
|
id: zod3.string().uuid().describe("The unique identifier of your transcript"),
|
|
12322
12021
|
keyterms_prompt: zod3.array(zod3.string()).optional().describe(
|
|
@@ -12566,7 +12265,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12566
12265
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12567
12266
|
),
|
|
12568
12267
|
multichannel: zod3.boolean().nullish().describe(
|
|
12569
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12268
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12570
12269
|
),
|
|
12571
12270
|
prompt: zod3.string().optional().describe(
|
|
12572
12271
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -12649,7 +12348,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12649
12348
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12650
12349
|
),
|
|
12651
12350
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
12652
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
12351
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
12653
12352
|
),
|
|
12654
12353
|
sentiment_analysis_results: zod3.array(
|
|
12655
12354
|
zod3.object({
|
|
@@ -12664,17 +12363,17 @@ var createTranscriptResponse = zod3.object({
|
|
|
12664
12363
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12665
12364
|
),
|
|
12666
12365
|
speaker: zod3.string().nullable().describe(
|
|
12667
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12366
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12668
12367
|
)
|
|
12669
12368
|
}).describe("The result of the Sentiment Analysis model")
|
|
12670
12369
|
).nullish().describe(
|
|
12671
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
12370
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
12672
12371
|
),
|
|
12673
12372
|
speaker_labels: zod3.boolean().nullish().describe(
|
|
12674
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12373
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
12675
12374
|
),
|
|
12676
12375
|
speakers_expected: zod3.number().nullish().describe(
|
|
12677
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12376
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
12678
12377
|
),
|
|
12679
12378
|
speech_model_used: zod3.string().optional().describe(
|
|
12680
12379
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -12777,22 +12476,25 @@ var createTranscriptResponse = zod3.object({
|
|
|
12777
12476
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
12778
12477
|
),
|
|
12779
12478
|
summarization: zod3.boolean().describe(
|
|
12780
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12479
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12781
12480
|
),
|
|
12782
12481
|
summary: zod3.string().nullish().describe(
|
|
12783
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12482
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12784
12483
|
),
|
|
12785
12484
|
summary_model: zod3.string().nullish().describe(
|
|
12786
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12485
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
12787
12486
|
),
|
|
12788
12487
|
summary_type: zod3.string().nullish().describe(
|
|
12789
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12488
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12790
12489
|
),
|
|
12791
12490
|
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
12792
12491
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
12793
12492
|
).or(zod3.null()).optional().describe(
|
|
12794
12493
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
12795
12494
|
),
|
|
12495
|
+
temperature: zod3.number().nullish().describe(
|
|
12496
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
12497
|
+
),
|
|
12796
12498
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
12797
12499
|
throttled: zod3.boolean().nullish().describe(
|
|
12798
12500
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -12813,7 +12515,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12813
12515
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12814
12516
|
),
|
|
12815
12517
|
speaker: zod3.string().nullable().describe(
|
|
12816
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12518
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12817
12519
|
)
|
|
12818
12520
|
})
|
|
12819
12521
|
).describe("The words in the utterance."),
|
|
@@ -12828,7 +12530,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12828
12530
|
)
|
|
12829
12531
|
})
|
|
12830
12532
|
).nullish().describe(
|
|
12831
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12533
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
12832
12534
|
),
|
|
12833
12535
|
webhook_auth: zod3.boolean().describe(
|
|
12834
12536
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -12852,7 +12554,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12852
12554
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12853
12555
|
),
|
|
12854
12556
|
speaker: zod3.string().nullable().describe(
|
|
12855
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12557
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12856
12558
|
)
|
|
12857
12559
|
})
|
|
12858
12560
|
).nullish().describe(
|
|
@@ -12925,7 +12627,7 @@ var getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = tr
|
|
|
12925
12627
|
var getTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
12926
12628
|
var getTranscriptResponse = zod3.object({
|
|
12927
12629
|
audio_channels: zod3.number().optional().describe(
|
|
12928
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12630
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
12929
12631
|
),
|
|
12930
12632
|
audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
12931
12633
|
audio_end_at: zod3.number().nullish().describe(
|
|
@@ -12936,10 +12638,10 @@ var getTranscriptResponse = zod3.object({
|
|
|
12936
12638
|
),
|
|
12937
12639
|
audio_url: zod3.string().describe("The URL of the media that was transcribed"),
|
|
12938
12640
|
auto_chapters: zod3.boolean().nullish().describe(
|
|
12939
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12641
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12940
12642
|
),
|
|
12941
12643
|
auto_highlights: zod3.boolean().describe(
|
|
12942
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12644
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
12943
12645
|
),
|
|
12944
12646
|
auto_highlights_result: zod3.object({
|
|
12945
12647
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -12959,9 +12661,9 @@ var getTranscriptResponse = zod3.object({
|
|
|
12959
12661
|
})
|
|
12960
12662
|
).describe("A temporally-sequential array of Key Phrases")
|
|
12961
12663
|
}).describe(
|
|
12962
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12664
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12963
12665
|
).or(zod3.null()).optional().describe(
|
|
12964
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12666
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12965
12667
|
),
|
|
12966
12668
|
chapters: zod3.array(
|
|
12967
12669
|
zod3.object({
|
|
@@ -12974,7 +12676,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
12974
12676
|
end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
|
|
12975
12677
|
}).describe("Chapter of the audio file")
|
|
12976
12678
|
).nullish().describe(
|
|
12977
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12679
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
12978
12680
|
),
|
|
12979
12681
|
confidence: zod3.number().nullish().describe(
|
|
12980
12682
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -13030,10 +12732,10 @@ var getTranscriptResponse = zod3.object({
|
|
|
13030
12732
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
13031
12733
|
)
|
|
13032
12734
|
).nullish().describe(
|
|
13033
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12735
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13034
12736
|
),
|
|
13035
12737
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13036
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
12738
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13037
12739
|
),
|
|
13038
12740
|
domain: zod3.string().nullish().describe(
|
|
13039
12741
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13095,10 +12797,10 @@ var getTranscriptResponse = zod3.object({
|
|
|
13095
12797
|
)
|
|
13096
12798
|
}).describe("A detected entity")
|
|
13097
12799
|
).nullish().describe(
|
|
13098
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12800
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
13099
12801
|
),
|
|
13100
12802
|
entity_detection: zod3.boolean().nullish().describe(
|
|
13101
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12803
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
13102
12804
|
),
|
|
13103
12805
|
error: zod3.string().optional().describe("Error message of why the transcript failed"),
|
|
13104
12806
|
filter_profanity: zod3.boolean().nullish().describe(
|
|
@@ -13108,7 +12810,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13108
12810
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
13109
12811
|
),
|
|
13110
12812
|
iab_categories: zod3.boolean().nullish().describe(
|
|
13111
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12813
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
13112
12814
|
),
|
|
13113
12815
|
iab_categories_result: zod3.object({
|
|
13114
12816
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13131,9 +12833,9 @@ var getTranscriptResponse = zod3.object({
|
|
|
13131
12833
|
).describe("An array of results for the Topic Detection model"),
|
|
13132
12834
|
summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
|
|
13133
12835
|
}).describe(
|
|
13134
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12836
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13135
12837
|
).or(zod3.null()).optional().describe(
|
|
13136
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12838
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13137
12839
|
),
|
|
13138
12840
|
id: zod3.string().uuid().describe("The unique identifier of your transcript"),
|
|
13139
12841
|
keyterms_prompt: zod3.array(zod3.string()).optional().describe(
|
|
@@ -13383,7 +13085,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13383
13085
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13384
13086
|
),
|
|
13385
13087
|
multichannel: zod3.boolean().nullish().describe(
|
|
13386
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13088
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13387
13089
|
),
|
|
13388
13090
|
prompt: zod3.string().optional().describe(
|
|
13389
13091
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -13466,7 +13168,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13466
13168
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13467
13169
|
),
|
|
13468
13170
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
13469
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13171
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
13470
13172
|
),
|
|
13471
13173
|
sentiment_analysis_results: zod3.array(
|
|
13472
13174
|
zod3.object({
|
|
@@ -13481,17 +13183,17 @@ var getTranscriptResponse = zod3.object({
|
|
|
13481
13183
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13482
13184
|
),
|
|
13483
13185
|
speaker: zod3.string().nullable().describe(
|
|
13484
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13186
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13485
13187
|
)
|
|
13486
13188
|
}).describe("The result of the Sentiment Analysis model")
|
|
13487
13189
|
).nullish().describe(
|
|
13488
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13190
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
13489
13191
|
),
|
|
13490
13192
|
speaker_labels: zod3.boolean().nullish().describe(
|
|
13491
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13193
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
13492
13194
|
),
|
|
13493
13195
|
speakers_expected: zod3.number().nullish().describe(
|
|
13494
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13196
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
13495
13197
|
),
|
|
13496
13198
|
speech_model_used: zod3.string().optional().describe(
|
|
13497
13199
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -13594,22 +13296,25 @@ var getTranscriptResponse = zod3.object({
|
|
|
13594
13296
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
13595
13297
|
),
|
|
13596
13298
|
summarization: zod3.boolean().describe(
|
|
13597
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13299
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13598
13300
|
),
|
|
13599
13301
|
summary: zod3.string().nullish().describe(
|
|
13600
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13302
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13601
13303
|
),
|
|
13602
13304
|
summary_model: zod3.string().nullish().describe(
|
|
13603
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13305
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13604
13306
|
),
|
|
13605
13307
|
summary_type: zod3.string().nullish().describe(
|
|
13606
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13308
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13607
13309
|
),
|
|
13608
13310
|
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
13609
13311
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13610
13312
|
).or(zod3.null()).optional().describe(
|
|
13611
13313
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13612
13314
|
),
|
|
13315
|
+
temperature: zod3.number().nullish().describe(
|
|
13316
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13317
|
+
),
|
|
13613
13318
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
13614
13319
|
throttled: zod3.boolean().nullish().describe(
|
|
13615
13320
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -13630,7 +13335,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13630
13335
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13631
13336
|
),
|
|
13632
13337
|
speaker: zod3.string().nullable().describe(
|
|
13633
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13338
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13634
13339
|
)
|
|
13635
13340
|
})
|
|
13636
13341
|
).describe("The words in the utterance."),
|
|
@@ -13645,7 +13350,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13645
13350
|
)
|
|
13646
13351
|
})
|
|
13647
13352
|
).nullish().describe(
|
|
13648
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13353
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13649
13354
|
),
|
|
13650
13355
|
webhook_auth: zod3.boolean().describe(
|
|
13651
13356
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -13669,7 +13374,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13669
13374
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13670
13375
|
),
|
|
13671
13376
|
speaker: zod3.string().nullable().describe(
|
|
13672
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13377
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13673
13378
|
)
|
|
13674
13379
|
})
|
|
13675
13380
|
).nullish().describe(
|
|
@@ -13702,7 +13407,7 @@ var deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
|
|
|
13702
13407
|
var deleteTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
13703
13408
|
var deleteTranscriptResponse = zod3.object({
|
|
13704
13409
|
audio_channels: zod3.number().optional().describe(
|
|
13705
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13410
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
13706
13411
|
),
|
|
13707
13412
|
audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
13708
13413
|
audio_end_at: zod3.number().nullish().describe(
|
|
@@ -13713,10 +13418,10 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13713
13418
|
),
|
|
13714
13419
|
audio_url: zod3.string().describe("The URL of the media that was transcribed"),
|
|
13715
13420
|
auto_chapters: zod3.boolean().nullish().describe(
|
|
13716
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
13421
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13717
13422
|
),
|
|
13718
13423
|
auto_highlights: zod3.boolean().describe(
|
|
13719
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13424
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
13720
13425
|
),
|
|
13721
13426
|
auto_highlights_result: zod3.object({
|
|
13722
13427
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13736,9 +13441,9 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13736
13441
|
})
|
|
13737
13442
|
).describe("A temporally-sequential array of Key Phrases")
|
|
13738
13443
|
}).describe(
|
|
13739
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13444
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13740
13445
|
).or(zod3.null()).optional().describe(
|
|
13741
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13446
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13742
13447
|
),
|
|
13743
13448
|
chapters: zod3.array(
|
|
13744
13449
|
zod3.object({
|
|
@@ -13751,7 +13456,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13751
13456
|
end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
|
|
13752
13457
|
}).describe("Chapter of the audio file")
|
|
13753
13458
|
).nullish().describe(
|
|
13754
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
13459
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
13755
13460
|
),
|
|
13756
13461
|
confidence: zod3.number().nullish().describe(
|
|
13757
13462
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -13807,10 +13512,10 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13807
13512
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
13808
13513
|
)
|
|
13809
13514
|
).nullish().describe(
|
|
13810
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13515
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13811
13516
|
),
|
|
13812
13517
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13813
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
13518
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13814
13519
|
),
|
|
13815
13520
|
domain: zod3.string().nullish().describe(
|
|
13816
13521
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13872,10 +13577,10 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13872
13577
|
)
|
|
13873
13578
|
}).describe("A detected entity")
|
|
13874
13579
|
).nullish().describe(
|
|
13875
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13580
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
13876
13581
|
),
|
|
13877
13582
|
entity_detection: zod3.boolean().nullish().describe(
|
|
13878
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13583
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
13879
13584
|
),
|
|
13880
13585
|
error: zod3.string().optional().describe("Error message of why the transcript failed"),
|
|
13881
13586
|
filter_profanity: zod3.boolean().nullish().describe(
|
|
@@ -13885,7 +13590,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13885
13590
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
13886
13591
|
),
|
|
13887
13592
|
iab_categories: zod3.boolean().nullish().describe(
|
|
13888
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13593
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
13889
13594
|
),
|
|
13890
13595
|
iab_categories_result: zod3.object({
|
|
13891
13596
|
status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13908,9 +13613,9 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13908
13613
|
).describe("An array of results for the Topic Detection model"),
|
|
13909
13614
|
summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
|
|
13910
13615
|
}).describe(
|
|
13911
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13616
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13912
13617
|
).or(zod3.null()).optional().describe(
|
|
13913
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13618
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13914
13619
|
),
|
|
13915
13620
|
id: zod3.string().uuid().describe("The unique identifier of your transcript"),
|
|
13916
13621
|
keyterms_prompt: zod3.array(zod3.string()).optional().describe(
|
|
@@ -14160,7 +13865,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14160
13865
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14161
13866
|
),
|
|
14162
13867
|
multichannel: zod3.boolean().nullish().describe(
|
|
14163
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13868
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14164
13869
|
),
|
|
14165
13870
|
prompt: zod3.string().optional().describe(
|
|
14166
13871
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -14243,7 +13948,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14243
13948
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14244
13949
|
),
|
|
14245
13950
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
14246
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13951
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
14247
13952
|
),
|
|
14248
13953
|
sentiment_analysis_results: zod3.array(
|
|
14249
13954
|
zod3.object({
|
|
@@ -14258,17 +13963,17 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14258
13963
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14259
13964
|
),
|
|
14260
13965
|
speaker: zod3.string().nullable().describe(
|
|
14261
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13966
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14262
13967
|
)
|
|
14263
13968
|
}).describe("The result of the Sentiment Analysis model")
|
|
14264
13969
|
).nullish().describe(
|
|
14265
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13970
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
14266
13971
|
),
|
|
14267
13972
|
speaker_labels: zod3.boolean().nullish().describe(
|
|
14268
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13973
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
14269
13974
|
),
|
|
14270
13975
|
speakers_expected: zod3.number().nullish().describe(
|
|
14271
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13976
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
14272
13977
|
),
|
|
14273
13978
|
speech_model_used: zod3.string().optional().describe(
|
|
14274
13979
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -14371,22 +14076,25 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14371
14076
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
14372
14077
|
),
|
|
14373
14078
|
summarization: zod3.boolean().describe(
|
|
14374
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14079
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
14375
14080
|
),
|
|
14376
14081
|
summary: zod3.string().nullish().describe(
|
|
14377
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14082
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14378
14083
|
),
|
|
14379
14084
|
summary_model: zod3.string().nullish().describe(
|
|
14380
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14085
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14381
14086
|
),
|
|
14382
14087
|
summary_type: zod3.string().nullish().describe(
|
|
14383
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14088
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14384
14089
|
),
|
|
14385
14090
|
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
14386
14091
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
14387
14092
|
).or(zod3.null()).optional().describe(
|
|
14388
14093
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
14389
14094
|
),
|
|
14095
|
+
temperature: zod3.number().nullish().describe(
|
|
14096
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14097
|
+
),
|
|
14390
14098
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
14391
14099
|
throttled: zod3.boolean().nullish().describe(
|
|
14392
14100
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -14407,7 +14115,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14407
14115
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14408
14116
|
),
|
|
14409
14117
|
speaker: zod3.string().nullable().describe(
|
|
14410
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14118
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14411
14119
|
)
|
|
14412
14120
|
})
|
|
14413
14121
|
).describe("The words in the utterance."),
|
|
@@ -14422,7 +14130,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14422
14130
|
)
|
|
14423
14131
|
})
|
|
14424
14132
|
).nullish().describe(
|
|
14425
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14133
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14426
14134
|
),
|
|
14427
14135
|
webhook_auth: zod3.boolean().describe(
|
|
14428
14136
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -14446,7 +14154,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14446
14154
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14447
14155
|
),
|
|
14448
14156
|
speaker: zod3.string().nullable().describe(
|
|
14449
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14157
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14450
14158
|
)
|
|
14451
14159
|
})
|
|
14452
14160
|
).nullish().describe(
|
|
@@ -14491,7 +14199,7 @@ var getTranscriptSentencesResponse = zod3.object({
|
|
|
14491
14199
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14492
14200
|
),
|
|
14493
14201
|
speaker: zod3.string().nullable().describe(
|
|
14494
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14202
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14495
14203
|
)
|
|
14496
14204
|
})
|
|
14497
14205
|
).describe("An array of words in the sentence"),
|
|
@@ -14499,7 +14207,7 @@ var getTranscriptSentencesResponse = zod3.object({
|
|
|
14499
14207
|
"The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14500
14208
|
),
|
|
14501
14209
|
speaker: zod3.string().nullable().describe(
|
|
14502
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14210
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14503
14211
|
)
|
|
14504
14212
|
})
|
|
14505
14213
|
).describe("An array of sentences in the transcript")
|
|
@@ -14527,7 +14235,7 @@ var getTranscriptParagraphsResponse = zod3.object({
|
|
|
14527
14235
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14528
14236
|
),
|
|
14529
14237
|
speaker: zod3.string().nullable().describe(
|
|
14530
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14238
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14531
14239
|
)
|
|
14532
14240
|
})
|
|
14533
14241
|
).describe("An array of words in the paragraph")
|
|
@@ -17139,23 +16847,6 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
17139
16847
|
}).optional().describe(
|
|
17140
16848
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
17141
16849
|
),
|
|
17142
|
-
speaker_reidentification: zod5.object({
|
|
17143
|
-
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
17144
|
-
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
17145
|
-
exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
|
|
17146
|
-
error: zod5.object({
|
|
17147
|
-
status_code: zod5.number().describe("Status code of the addon error"),
|
|
17148
|
-
exception: zod5.string().describe("Reason of the addon error"),
|
|
17149
|
-
message: zod5.string().describe("Detailed message of the addon error")
|
|
17150
|
-
}).nullable().describe(
|
|
17151
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
17152
|
-
),
|
|
17153
|
-
results: zod5.string().describe(
|
|
17154
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
17155
|
-
)
|
|
17156
|
-
}).optional().describe(
|
|
17157
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
17158
|
-
),
|
|
17159
16850
|
structured_data_extraction: zod5.object({
|
|
17160
16851
|
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
17161
16852
|
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -18636,23 +18327,6 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
18636
18327
|
}).optional().describe(
|
|
18637
18328
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
18638
18329
|
),
|
|
18639
|
-
speaker_reidentification: zod5.object({
|
|
18640
|
-
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
18641
|
-
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
18642
|
-
exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
|
|
18643
|
-
error: zod5.object({
|
|
18644
|
-
status_code: zod5.number().describe("Status code of the addon error"),
|
|
18645
|
-
exception: zod5.string().describe("Reason of the addon error"),
|
|
18646
|
-
message: zod5.string().describe("Detailed message of the addon error")
|
|
18647
|
-
}).nullable().describe(
|
|
18648
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
18649
|
-
),
|
|
18650
|
-
results: zod5.string().describe(
|
|
18651
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
18652
|
-
)
|
|
18653
|
-
}).optional().describe(
|
|
18654
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
18655
|
-
),
|
|
18656
18330
|
structured_data_extraction: zod5.object({
|
|
18657
18331
|
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
18658
18332
|
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -20790,23 +20464,6 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
20790
20464
|
}).optional().describe(
|
|
20791
20465
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
20792
20466
|
),
|
|
20793
|
-
speaker_reidentification: zod5.object({
|
|
20794
|
-
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
20795
|
-
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
20796
|
-
exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
|
|
20797
|
-
error: zod5.object({
|
|
20798
|
-
status_code: zod5.number().describe("Status code of the addon error"),
|
|
20799
|
-
exception: zod5.string().describe("Reason of the addon error"),
|
|
20800
|
-
message: zod5.string().describe("Detailed message of the addon error")
|
|
20801
|
-
}).nullable().describe(
|
|
20802
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
20803
|
-
),
|
|
20804
|
-
results: zod5.string().describe(
|
|
20805
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
20806
|
-
)
|
|
20807
|
-
}).optional().describe(
|
|
20808
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
20809
|
-
),
|
|
20810
20467
|
structured_data_extraction: zod5.object({
|
|
20811
20468
|
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
20812
20469
|
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -21106,11 +20763,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
21106
20763
|
channels: zod5.number().min(1).max(transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsMax).default(
|
|
21107
20764
|
transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsDefault
|
|
21108
20765
|
).describe("The number of channels of the audio stream"),
|
|
21109
|
-
model: zod5.enum(["solaria-1"]).describe(
|
|
21110
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
21111
|
-
).default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe(
|
|
21112
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
21113
|
-
),
|
|
20766
|
+
model: zod5.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
|
|
21114
20767
|
endpointing: zod5.number().min(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMin).max(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMax).default(
|
|
21115
20768
|
transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingDefault
|
|
21116
20769
|
).describe(
|
|
@@ -23534,23 +23187,6 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
23534
23187
|
}).optional().describe(
|
|
23535
23188
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
23536
23189
|
),
|
|
23537
|
-
speaker_reidentification: zod5.object({
|
|
23538
|
-
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
23539
|
-
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
23540
|
-
exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
|
|
23541
|
-
error: zod5.object({
|
|
23542
|
-
status_code: zod5.number().describe("Status code of the addon error"),
|
|
23543
|
-
exception: zod5.string().describe("Reason of the addon error"),
|
|
23544
|
-
message: zod5.string().describe("Detailed message of the addon error")
|
|
23545
|
-
}).nullable().describe(
|
|
23546
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
23547
|
-
),
|
|
23548
|
-
results: zod5.string().describe(
|
|
23549
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
23550
|
-
)
|
|
23551
|
-
}).optional().describe(
|
|
23552
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
23553
|
-
),
|
|
23554
23190
|
structured_data_extraction: zod5.object({
|
|
23555
23191
|
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
23556
23192
|
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -26716,23 +26352,6 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
26716
26352
|
}).optional().describe(
|
|
26717
26353
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
26718
26354
|
),
|
|
26719
|
-
speaker_reidentification: zod5.object({
|
|
26720
|
-
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
26721
|
-
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
26722
|
-
exec_time: zod5.number().describe("Time audio intelligence model took to complete the task"),
|
|
26723
|
-
error: zod5.object({
|
|
26724
|
-
status_code: zod5.number().describe("Status code of the addon error"),
|
|
26725
|
-
exception: zod5.string().describe("Reason of the addon error"),
|
|
26726
|
-
message: zod5.string().describe("Detailed message of the addon error")
|
|
26727
|
-
}).nullable().describe(
|
|
26728
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
26729
|
-
),
|
|
26730
|
-
results: zod5.string().describe(
|
|
26731
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
26732
|
-
)
|
|
26733
|
-
}).optional().describe(
|
|
26734
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
26735
|
-
),
|
|
26736
26355
|
structured_data_extraction: zod5.object({
|
|
26737
26356
|
success: zod5.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
26738
26357
|
is_empty: zod5.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -27026,11 +26645,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
27026
26645
|
historyControllerGetListV1ResponseItemsItemRequestParamsSampleRateDefault
|
|
27027
26646
|
).describe("The sample rate of the audio stream"),
|
|
27028
26647
|
channels: zod5.number().min(1).max(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsMax).default(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsDefault).describe("The number of channels of the audio stream"),
|
|
27029
|
-
model: zod5.enum(["solaria-1"]).describe(
|
|
27030
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
27031
|
-
).default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe(
|
|
27032
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
27033
|
-
),
|
|
26648
|
+
model: zod5.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
|
|
27034
26649
|
endpointing: zod5.number().min(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMin).max(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMax).default(
|
|
27035
26650
|
historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingDefault
|
|
27036
26651
|
).describe(
|
|
@@ -36191,6 +35806,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36191
35806
|
createTemporaryApiKeyBody: () => createTemporaryApiKeyBody,
|
|
36192
35807
|
createTemporaryApiKeyBodyClientReferenceIdMaxOne: () => createTemporaryApiKeyBodyClientReferenceIdMaxOne,
|
|
36193
35808
|
createTemporaryApiKeyBodyExpiresInSecondsMax: () => createTemporaryApiKeyBodyExpiresInSecondsMax,
|
|
35809
|
+
createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne: () => createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne,
|
|
36194
35810
|
createTranscriptionBody: () => createTranscriptionBody2,
|
|
36195
35811
|
createTranscriptionBodyAudioUrlMaxOne: () => createTranscriptionBodyAudioUrlMaxOne,
|
|
36196
35812
|
createTranscriptionBodyAudioUrlRegExpOne: () => createTranscriptionBodyAudioUrlRegExpOne,
|
|
@@ -36321,11 +35937,11 @@ var getTranscriptionsResponse = zod10.object({
|
|
|
36321
35937
|
});
|
|
36322
35938
|
var createTranscriptionBodyModelMaxThree = 32;
|
|
36323
35939
|
var createTranscriptionBodyAudioUrlMaxOne = 4096;
|
|
36324
|
-
var createTranscriptionBodyAudioUrlRegExpOne =
|
|
35940
|
+
var createTranscriptionBodyAudioUrlRegExpOne = /^https?:\/\/[^\s]+$/;
|
|
36325
35941
|
var createTranscriptionBodyLanguageHintsItemMax = 10;
|
|
36326
35942
|
var createTranscriptionBodyLanguageHintsMaxOne = 100;
|
|
36327
35943
|
var createTranscriptionBodyWebhookUrlMaxOne = 256;
|
|
36328
|
-
var createTranscriptionBodyWebhookUrlRegExpOne =
|
|
35944
|
+
var createTranscriptionBodyWebhookUrlRegExpOne = /^https?:\/\/[^\s]+$/;
|
|
36329
35945
|
var createTranscriptionBodyWebhookAuthHeaderNameMaxOne = 256;
|
|
36330
35946
|
var createTranscriptionBodyWebhookAuthHeaderValueMaxOne = 256;
|
|
36331
35947
|
var createTranscriptionBodyClientReferenceIdMaxOne = 256;
|
|
@@ -36473,22 +36089,25 @@ var getModelsResponse = zod10.object({
|
|
|
36473
36089
|
});
|
|
36474
36090
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36475
36091
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36092
|
+
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36476
36093
|
var createTemporaryApiKeyBody = zod10.object({
|
|
36477
36094
|
usage_type: zod10.enum(["transcribe_websocket"]),
|
|
36478
36095
|
expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36479
|
-
client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique.")
|
|
36096
|
+
client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36097
|
+
single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
36098
|
+
max_session_duration_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne).or(zod10.null()).optional().describe(
|
|
36099
|
+
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36100
|
+
)
|
|
36480
36101
|
});
|
|
36481
36102
|
|
|
36482
36103
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36483
36104
|
var streaming_types_zod_exports = {};
|
|
36484
36105
|
__export(streaming_types_zod_exports, {
|
|
36485
36106
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36486
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36487
36107
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36488
36108
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36489
36109
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36490
36110
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36491
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36492
36111
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36493
36112
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36494
36113
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36502,7 +36121,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36502
36121
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36503
36122
|
});
|
|
36504
36123
|
import { z as zod11 } from "zod";
|
|
36505
|
-
var
|
|
36124
|
+
var sonioxAudioFormatSchema = zod11.enum([
|
|
36506
36125
|
"auto",
|
|
36507
36126
|
"aac",
|
|
36508
36127
|
"aiff",
|
|
@@ -36512,10 +36131,7 @@ var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
|
|
|
36512
36131
|
"mp3",
|
|
36513
36132
|
"ogg",
|
|
36514
36133
|
"wav",
|
|
36515
|
-
"webm"
|
|
36516
|
-
]);
|
|
36517
|
-
var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
36518
|
-
// Signed PCM
|
|
36134
|
+
"webm",
|
|
36519
36135
|
"pcm_s8",
|
|
36520
36136
|
"pcm_s16le",
|
|
36521
36137
|
"pcm_s16be",
|
|
@@ -36523,7 +36139,6 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36523
36139
|
"pcm_s24be",
|
|
36524
36140
|
"pcm_s32le",
|
|
36525
36141
|
"pcm_s32be",
|
|
36526
|
-
// Unsigned PCM
|
|
36527
36142
|
"pcm_u8",
|
|
36528
36143
|
"pcm_u16le",
|
|
36529
36144
|
"pcm_u16be",
|
|
@@ -36531,86 +36146,81 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
|
|
|
36531
36146
|
"pcm_u24be",
|
|
36532
36147
|
"pcm_u32le",
|
|
36533
36148
|
"pcm_u32be",
|
|
36534
|
-
// Float PCM
|
|
36535
36149
|
"pcm_f32le",
|
|
36536
36150
|
"pcm_f32be",
|
|
36537
36151
|
"pcm_f64le",
|
|
36538
36152
|
"pcm_f64be",
|
|
36539
|
-
// Companded
|
|
36540
36153
|
"mulaw",
|
|
36541
36154
|
"alaw"
|
|
36542
36155
|
]);
|
|
36543
|
-
var sonioxAudioFormatSchema = zod11.union([
|
|
36544
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36545
|
-
sonioxPcmAudioEncodingSchema
|
|
36546
|
-
]);
|
|
36547
36156
|
var sonioxOneWayTranslationSchema = zod11.object({
|
|
36548
36157
|
type: zod11.literal("one_way"),
|
|
36549
|
-
target_language: zod11.string()
|
|
36158
|
+
target_language: zod11.string()
|
|
36550
36159
|
});
|
|
36551
36160
|
var sonioxTwoWayTranslationSchema = zod11.object({
|
|
36552
36161
|
type: zod11.literal("two_way"),
|
|
36553
|
-
language_a: zod11.string()
|
|
36554
|
-
language_b: zod11.string()
|
|
36162
|
+
language_a: zod11.string(),
|
|
36163
|
+
language_b: zod11.string()
|
|
36555
36164
|
});
|
|
36556
36165
|
var sonioxTranslationConfigSchema = zod11.union([
|
|
36557
36166
|
sonioxOneWayTranslationSchema,
|
|
36558
36167
|
sonioxTwoWayTranslationSchema
|
|
36559
36168
|
]);
|
|
36560
36169
|
var sonioxContextGeneralItemSchema = zod11.object({
|
|
36561
|
-
key: zod11.string()
|
|
36562
|
-
value: zod11.string()
|
|
36170
|
+
key: zod11.string(),
|
|
36171
|
+
value: zod11.string()
|
|
36563
36172
|
});
|
|
36564
36173
|
var sonioxTranslationTermSchema = zod11.object({
|
|
36565
|
-
source: zod11.string()
|
|
36566
|
-
target: zod11.string()
|
|
36174
|
+
source: zod11.string(),
|
|
36175
|
+
target: zod11.string()
|
|
36567
36176
|
});
|
|
36568
36177
|
var sonioxStructuredContextSchema = zod11.object({
|
|
36569
|
-
general: zod11.array(sonioxContextGeneralItemSchema).optional()
|
|
36570
|
-
text: zod11.string().optional()
|
|
36571
|
-
terms: zod11.array(zod11.string()).optional()
|
|
36572
|
-
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36178
|
+
general: zod11.array(sonioxContextGeneralItemSchema).optional(),
|
|
36179
|
+
text: zod11.string().optional(),
|
|
36180
|
+
terms: zod11.array(zod11.string()).optional(),
|
|
36181
|
+
translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
|
|
36573
36182
|
});
|
|
36574
36183
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
36575
36184
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
36185
|
+
"stt-rt-v4",
|
|
36576
36186
|
"stt-rt-v3",
|
|
36577
36187
|
"stt-rt-preview",
|
|
36578
36188
|
"stt-rt-v3-preview",
|
|
36579
36189
|
"stt-rt-preview-v2"
|
|
36580
36190
|
]);
|
|
36581
36191
|
var streamingTranscriberParams3 = zod11.object({
|
|
36582
|
-
model: sonioxRealtimeModelSchema
|
|
36583
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36584
|
-
sampleRate: zod11.number().optional()
|
|
36585
|
-
numChannels: zod11.number().
|
|
36586
|
-
languageHints: zod11.array(zod11.string()).optional()
|
|
36587
|
-
context: sonioxContextSchema.optional()
|
|
36588
|
-
enableSpeakerDiarization: zod11.boolean().optional()
|
|
36589
|
-
enableLanguageIdentification: zod11.boolean().optional()
|
|
36590
|
-
enableEndpointDetection: zod11.boolean().optional()
|
|
36591
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36592
|
-
clientReferenceId: zod11.string().optional()
|
|
36593
|
-
});
|
|
36594
|
-
var sonioxTranslationStatusSchema = zod11.enum(["
|
|
36192
|
+
model: sonioxRealtimeModelSchema,
|
|
36193
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36194
|
+
sampleRate: zod11.number().optional(),
|
|
36195
|
+
numChannels: zod11.number().optional(),
|
|
36196
|
+
languageHints: zod11.array(zod11.string()).optional(),
|
|
36197
|
+
context: sonioxContextSchema.optional(),
|
|
36198
|
+
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
36199
|
+
enableLanguageIdentification: zod11.boolean().optional(),
|
|
36200
|
+
enableEndpointDetection: zod11.boolean().optional(),
|
|
36201
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36202
|
+
clientReferenceId: zod11.string().optional()
|
|
36203
|
+
});
|
|
36204
|
+
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
36595
36205
|
var sonioxTokenSchema = zod11.object({
|
|
36596
|
-
text: zod11.string()
|
|
36597
|
-
start_ms: zod11.number().optional()
|
|
36598
|
-
end_ms: zod11.number().optional()
|
|
36599
|
-
confidence: zod11.number()
|
|
36600
|
-
is_final: zod11.boolean()
|
|
36601
|
-
speaker: zod11.string().optional()
|
|
36602
|
-
|
|
36603
|
-
|
|
36604
|
-
|
|
36206
|
+
text: zod11.string(),
|
|
36207
|
+
start_ms: zod11.number().optional(),
|
|
36208
|
+
end_ms: zod11.number().optional(),
|
|
36209
|
+
confidence: zod11.number(),
|
|
36210
|
+
is_final: zod11.boolean(),
|
|
36211
|
+
speaker: zod11.string().optional(),
|
|
36212
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36213
|
+
language: zod11.string().optional(),
|
|
36214
|
+
source_language: zod11.string().optional()
|
|
36605
36215
|
});
|
|
36606
36216
|
var sonioxStreamingResponseSchema = zod11.object({
|
|
36607
|
-
text: zod11.string()
|
|
36608
|
-
tokens: zod11.array(sonioxTokenSchema)
|
|
36609
|
-
final_audio_proc_ms: zod11.number()
|
|
36610
|
-
total_audio_proc_ms: zod11.number()
|
|
36611
|
-
finished: zod11.boolean().optional()
|
|
36612
|
-
|
|
36613
|
-
|
|
36217
|
+
text: zod11.string(),
|
|
36218
|
+
tokens: zod11.array(sonioxTokenSchema),
|
|
36219
|
+
final_audio_proc_ms: zod11.number(),
|
|
36220
|
+
total_audio_proc_ms: zod11.number(),
|
|
36221
|
+
finished: zod11.boolean().optional(),
|
|
36222
|
+
error_code: zod11.number().optional(),
|
|
36223
|
+
error_message: zod11.string().optional()
|
|
36614
36224
|
});
|
|
36615
36225
|
var sonioxRecorderStateSchema = zod11.enum([
|
|
36616
36226
|
"Init",
|
|
@@ -37176,8 +36786,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37176
36786
|
);
|
|
37177
36787
|
|
|
37178
36788
|
// src/generated/deepgram/schema/index.ts
|
|
37179
|
-
var
|
|
37180
|
-
__export(
|
|
36789
|
+
var schema_exports5 = {};
|
|
36790
|
+
__export(schema_exports5, {
|
|
37181
36791
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37182
36792
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37183
36793
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37432,8 +37042,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37432
37042
|
};
|
|
37433
37043
|
|
|
37434
37044
|
// src/generated/openai/schema/index.ts
|
|
37435
|
-
var
|
|
37436
|
-
__export(
|
|
37045
|
+
var schema_exports6 = {};
|
|
37046
|
+
__export(schema_exports6, {
|
|
37437
37047
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37438
37048
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37439
37049
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37727,6 +37337,16 @@ var ToolChoiceOptions = {
|
|
|
37727
37337
|
required: "required"
|
|
37728
37338
|
};
|
|
37729
37339
|
|
|
37340
|
+
// src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
|
|
37341
|
+
var TranscriptionDiarizedSegmentType = {
|
|
37342
|
+
transcripttextsegment: "transcript.text.segment"
|
|
37343
|
+
};
|
|
37344
|
+
|
|
37345
|
+
// src/generated/openai/schema/transcriptionInclude.ts
|
|
37346
|
+
var TranscriptionInclude = {
|
|
37347
|
+
logprobs: "logprobs"
|
|
37348
|
+
};
|
|
37349
|
+
|
|
37730
37350
|
// src/generated/openai/schema/transcriptTextDeltaEventType.ts
|
|
37731
37351
|
var TranscriptTextDeltaEventType = {
|
|
37732
37352
|
transcripttextdelta: "transcript.text.delta"
|
|
@@ -37752,16 +37372,6 @@ var TranscriptTextUsageTokensType = {
|
|
|
37752
37372
|
tokens: "tokens"
|
|
37753
37373
|
};
|
|
37754
37374
|
|
|
37755
|
-
// src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
|
|
37756
|
-
var TranscriptionDiarizedSegmentType = {
|
|
37757
|
-
transcripttextsegment: "transcript.text.segment"
|
|
37758
|
-
};
|
|
37759
|
-
|
|
37760
|
-
// src/generated/openai/schema/transcriptionInclude.ts
|
|
37761
|
-
var TranscriptionInclude = {
|
|
37762
|
-
logprobs: "logprobs"
|
|
37763
|
-
};
|
|
37764
|
-
|
|
37765
37375
|
// src/generated/openai/schema/vadConfigType.ts
|
|
37766
37376
|
var VadConfigType = {
|
|
37767
37377
|
server_vad: "server_vad"
|
|
@@ -37773,8 +37383,8 @@ var VoiceResourceObject = {
|
|
|
37773
37383
|
};
|
|
37774
37384
|
|
|
37775
37385
|
// src/generated/speechmatics/schema/index.ts
|
|
37776
|
-
var
|
|
37777
|
-
__export(
|
|
37386
|
+
var schema_exports7 = {};
|
|
37387
|
+
__export(schema_exports7, {
|
|
37778
37388
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37779
37389
|
ErrorResponseError: () => ErrorResponseError,
|
|
37780
37390
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -37963,32 +37573,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
37963
37573
|
word: "word"
|
|
37964
37574
|
};
|
|
37965
37575
|
|
|
37966
|
-
// src/generated/soniox/schema/index.ts
|
|
37967
|
-
var schema_exports7 = {};
|
|
37968
|
-
__export(schema_exports7, {
|
|
37969
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
37970
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
37971
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
37972
|
-
TranslationConfigType: () => TranslationConfigType
|
|
37973
|
-
});
|
|
37974
|
-
|
|
37975
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
37976
|
-
var TemporaryApiKeyUsageType = {
|
|
37977
|
-
transcribe_websocket: "transcribe_websocket"
|
|
37978
|
-
};
|
|
37979
|
-
|
|
37980
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
37981
|
-
var TranscriptionMode = {
|
|
37982
|
-
real_time: "real_time",
|
|
37983
|
-
async: "async"
|
|
37984
|
-
};
|
|
37985
|
-
|
|
37986
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
37987
|
-
var TranslationConfigType = {
|
|
37988
|
-
one_way: "one_way",
|
|
37989
|
-
two_way: "two_way"
|
|
37990
|
-
};
|
|
37991
|
-
|
|
37992
37576
|
// src/generated/elevenlabs/schema/index.ts
|
|
37993
37577
|
var schema_exports8 = {};
|
|
37994
37578
|
__export(schema_exports8, {
|
|
@@ -38143,8 +37727,8 @@ var getJobsQueryParams = zod12.object({
|
|
|
38143
37727
|
var getJobsResponseJobsItemDurationMin = 0;
|
|
38144
37728
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38145
37729
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38146
|
-
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38147
|
-
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
37730
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
37731
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38148
37732
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38149
37733
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38150
37734
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38342,8 +37926,8 @@ var getJobsJobidParams = zod12.object({
|
|
|
38342
37926
|
var getJobsJobidResponseJobDurationMin = 0;
|
|
38343
37927
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38344
37928
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38345
|
-
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38346
|
-
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
37929
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
37930
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38347
37931
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38348
37932
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38349
37933
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38540,8 +38124,8 @@ var deleteJobsJobidQueryParams = zod12.object({
|
|
|
38540
38124
|
var deleteJobsJobidResponseJobDurationMin = 0;
|
|
38541
38125
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38542
38126
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38543
|
-
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38544
|
-
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
38127
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38128
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38545
38129
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38546
38130
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38547
38131
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38744,8 +38328,8 @@ var getJobsJobidTranscriptQueryParams = zod12.object({
|
|
|
38744
38328
|
var getJobsJobidTranscriptResponseJobDurationMin = 0;
|
|
38745
38329
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38746
38330
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38747
|
-
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38748
|
-
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
38331
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38332
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38749
38333
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38750
38334
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38751
38335
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -39692,7 +39276,7 @@ export {
|
|
|
39692
39276
|
DeepgramTTSSampleRate,
|
|
39693
39277
|
DeepgramTopicMode,
|
|
39694
39278
|
DeepgramTranscriptionSchema,
|
|
39695
|
-
|
|
39279
|
+
schema_exports5 as DeepgramTypes,
|
|
39696
39280
|
deepgramAPI_zod_exports as DeepgramZodSchemas,
|
|
39697
39281
|
ElevenLabsAdapter,
|
|
39698
39282
|
ElevenLabsCapabilities,
|
|
@@ -39729,7 +39313,7 @@ export {
|
|
|
39729
39313
|
OpenAIResponseFormat,
|
|
39730
39314
|
streaming_types_exports as OpenAIStreamingTypes,
|
|
39731
39315
|
OpenAITranscriptionSchema,
|
|
39732
|
-
|
|
39316
|
+
schema_exports6 as OpenAITypes,
|
|
39733
39317
|
OpenAIWhisperAdapter,
|
|
39734
39318
|
openAIAudioRealtimeAPI_zod_exports as OpenAIZodSchemas,
|
|
39735
39319
|
ProfanityFilterMode,
|
|
@@ -39758,7 +39342,7 @@ export {
|
|
|
39758
39342
|
SonioxStreamingUpdateSchema,
|
|
39759
39343
|
streaming_types_zod_exports as SonioxStreamingZodSchemas,
|
|
39760
39344
|
SonioxTranscriptionSchema,
|
|
39761
|
-
|
|
39345
|
+
schema_exports4 as SonioxTypes,
|
|
39762
39346
|
SpeakV1ContainerParameter,
|
|
39763
39347
|
SpeakV1EncodingParameter,
|
|
39764
39348
|
SpeakV1SampleRateParameter,
|
|
@@ -39773,7 +39357,7 @@ export {
|
|
|
39773
39357
|
SpeechmaticsStreamingSchema,
|
|
39774
39358
|
SpeechmaticsStreamingUpdateSchema,
|
|
39775
39359
|
SpeechmaticsTranscriptionSchema,
|
|
39776
|
-
|
|
39360
|
+
schema_exports7 as SpeechmaticsTypes,
|
|
39777
39361
|
speechmaticsASRRESTAPI_zod_exports as SpeechmaticsZodSchemas,
|
|
39778
39362
|
StreamingProviders,
|
|
39779
39363
|
StreamingSupportedBitDepthEnum,
|