voice-router-dev 0.8.9 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +83 -0
- package/dist/constants.d.mts +23 -21
- package/dist/constants.d.ts +23 -21
- package/dist/constants.js +24 -24
- package/dist/constants.mjs +24 -24
- package/dist/{field-configs-CSOt3yc9.d.mts → field-configs-CH0lgAe8.d.mts} +5691 -6202
- package/dist/{field-configs-CSOt3yc9.d.ts → field-configs-CH0lgAe8.d.ts} +5691 -6202
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +160 -247
- package/dist/field-configs.mjs +160 -247
- package/dist/index.d.mts +2362 -2810
- package/dist/index.d.ts +2362 -2810
- package/dist/index.js +531 -947
- package/dist/index.mjs +531 -947
- package/dist/{speechToTextChunkResponseModel-B4kVoFc3.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +2405 -1918
- package/dist/{speechToTextChunkResponseModel-DmajV4F-.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +2405 -1918
- package/dist/webhooks.d.mts +70 -2
- package/dist/webhooks.d.ts +70 -2
- package/dist/webhooks.js +113 -1
- package/dist/webhooks.mjs +111 -1
- package/package.json +2 -1
package/dist/index.js
CHANGED
|
@@ -82,7 +82,7 @@ __export(src_exports, {
|
|
|
82
82
|
DeepgramTTSSampleRate: () => DeepgramTTSSampleRate,
|
|
83
83
|
DeepgramTopicMode: () => DeepgramTopicMode,
|
|
84
84
|
DeepgramTranscriptionSchema: () => DeepgramTranscriptionSchema,
|
|
85
|
-
DeepgramTypes: () =>
|
|
85
|
+
DeepgramTypes: () => schema_exports5,
|
|
86
86
|
DeepgramZodSchemas: () => deepgramAPI_zod_exports,
|
|
87
87
|
ElevenLabsAdapter: () => ElevenLabsAdapter,
|
|
88
88
|
ElevenLabsCapabilities: () => ElevenLabsCapabilities,
|
|
@@ -119,7 +119,7 @@ __export(src_exports, {
|
|
|
119
119
|
OpenAIResponseFormat: () => OpenAIResponseFormat,
|
|
120
120
|
OpenAIStreamingTypes: () => streaming_types_exports,
|
|
121
121
|
OpenAITranscriptionSchema: () => OpenAITranscriptionSchema,
|
|
122
|
-
OpenAITypes: () =>
|
|
122
|
+
OpenAITypes: () => schema_exports6,
|
|
123
123
|
OpenAIWhisperAdapter: () => OpenAIWhisperAdapter,
|
|
124
124
|
OpenAIZodSchemas: () => openAIAudioRealtimeAPI_zod_exports,
|
|
125
125
|
ProfanityFilterMode: () => ProfanityFilterMode,
|
|
@@ -148,7 +148,7 @@ __export(src_exports, {
|
|
|
148
148
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
149
149
|
SonioxStreamingZodSchemas: () => streaming_types_zod_exports,
|
|
150
150
|
SonioxTranscriptionSchema: () => SonioxTranscriptionSchema,
|
|
151
|
-
SonioxTypes: () =>
|
|
151
|
+
SonioxTypes: () => schema_exports4,
|
|
152
152
|
SpeakV1ContainerParameter: () => SpeakV1ContainerParameter,
|
|
153
153
|
SpeakV1EncodingParameter: () => SpeakV1EncodingParameter,
|
|
154
154
|
SpeakV1SampleRateParameter: () => SpeakV1SampleRateParameter,
|
|
@@ -163,7 +163,7 @@ __export(src_exports, {
|
|
|
163
163
|
SpeechmaticsStreamingSchema: () => SpeechmaticsStreamingSchema,
|
|
164
164
|
SpeechmaticsStreamingUpdateSchema: () => SpeechmaticsStreamingUpdateSchema,
|
|
165
165
|
SpeechmaticsTranscriptionSchema: () => SpeechmaticsTranscriptionSchema,
|
|
166
|
-
SpeechmaticsTypes: () =>
|
|
166
|
+
SpeechmaticsTypes: () => schema_exports7,
|
|
167
167
|
SpeechmaticsZodSchemas: () => speechmaticsASRRESTAPI_zod_exports,
|
|
168
168
|
StreamingProviders: () => StreamingProviders,
|
|
169
169
|
StreamingSupportedBitDepthEnum: () => StreamingSupportedBitDepthEnum,
|
|
@@ -983,60 +983,60 @@ var SonioxLanguage = {
|
|
|
983
983
|
// src/generated/soniox/models.ts
|
|
984
984
|
var SonioxModels = [
|
|
985
985
|
{ id: "stt-rt-v4", name: "Speech-to-Text Real-time v4", mode: "real_time" },
|
|
986
|
-
{ id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time" },
|
|
987
986
|
{ id: "stt-async-v4", name: "Speech-to-Text Async v4", mode: "async" },
|
|
988
|
-
{ id: "stt-
|
|
989
|
-
{ id: "stt-
|
|
990
|
-
{ id: "stt-
|
|
991
|
-
{ id: "stt-rt-
|
|
992
|
-
{ id: "stt-
|
|
993
|
-
{ id: "stt-
|
|
987
|
+
{ id: "stt-rt-preview", name: "Speech-to-Text Real-time Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
988
|
+
{ id: "stt-async-preview", name: "Speech-to-Text Async Preview", mode: "async", aliasOf: "stt-async-v4" },
|
|
989
|
+
{ id: "stt-rt-v3-preview", name: "Speech-to-Text Real-time v3 Preview", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
990
|
+
{ id: "stt-rt-preview-v2", name: "Speech-to-Text Real-time Preview v2", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
991
|
+
{ id: "stt-async-preview-v1", name: "Speech-to-Text Async Preview v1", mode: "async", aliasOf: "stt-async-v4" },
|
|
992
|
+
{ id: "stt-rt-v3", name: "Speech-to-Text Real-time v3", mode: "real_time", aliasOf: "stt-rt-v4" },
|
|
993
|
+
{ id: "stt-async-v3", name: "Speech-to-Text Async v3", mode: "async", aliasOf: "stt-async-v4" }
|
|
994
994
|
];
|
|
995
995
|
var SonioxModelCodes = [
|
|
996
996
|
"stt-rt-v4",
|
|
997
|
-
"stt-rt-v3",
|
|
998
997
|
"stt-async-v4",
|
|
999
|
-
"stt-async-v3",
|
|
1000
998
|
"stt-rt-preview",
|
|
1001
999
|
"stt-async-preview",
|
|
1002
1000
|
"stt-rt-v3-preview",
|
|
1003
1001
|
"stt-rt-preview-v2",
|
|
1004
|
-
"stt-async-preview-v1"
|
|
1002
|
+
"stt-async-preview-v1",
|
|
1003
|
+
"stt-rt-v3",
|
|
1004
|
+
"stt-async-v3"
|
|
1005
1005
|
];
|
|
1006
1006
|
var SonioxModelLabels = {
|
|
1007
1007
|
"stt-rt-v4": "Speech-to-Text Real-time v4",
|
|
1008
|
-
"stt-rt-v3": "Speech-to-Text Real-time v3",
|
|
1009
1008
|
"stt-async-v4": "Speech-to-Text Async v4",
|
|
1010
|
-
"stt-async-v3": "Speech-to-Text Async v3",
|
|
1011
1009
|
"stt-rt-preview": "Speech-to-Text Real-time Preview",
|
|
1012
1010
|
"stt-async-preview": "Speech-to-Text Async Preview",
|
|
1013
1011
|
"stt-rt-v3-preview": "Speech-to-Text Real-time v3 Preview",
|
|
1014
1012
|
"stt-rt-preview-v2": "Speech-to-Text Real-time Preview v2",
|
|
1015
|
-
"stt-async-preview-v1": "Speech-to-Text Async Preview v1"
|
|
1013
|
+
"stt-async-preview-v1": "Speech-to-Text Async Preview v1",
|
|
1014
|
+
"stt-rt-v3": "Speech-to-Text Real-time v3",
|
|
1015
|
+
"stt-async-v3": "Speech-to-Text Async v3"
|
|
1016
1016
|
};
|
|
1017
1017
|
var SonioxModel = {
|
|
1018
1018
|
stt_rt_v4: "stt-rt-v4",
|
|
1019
|
-
stt_rt_v3: "stt-rt-v3",
|
|
1020
1019
|
stt_async_v4: "stt-async-v4",
|
|
1021
|
-
stt_async_v3: "stt-async-v3",
|
|
1022
1020
|
stt_rt_preview: "stt-rt-preview",
|
|
1023
1021
|
stt_async_preview: "stt-async-preview",
|
|
1024
1022
|
stt_rt_v3_preview: "stt-rt-v3-preview",
|
|
1025
1023
|
stt_rt_preview_v2: "stt-rt-preview-v2",
|
|
1026
|
-
stt_async_preview_v1: "stt-async-preview-v1"
|
|
1024
|
+
stt_async_preview_v1: "stt-async-preview-v1",
|
|
1025
|
+
stt_rt_v3: "stt-rt-v3",
|
|
1026
|
+
stt_async_v3: "stt-async-v3"
|
|
1027
1027
|
};
|
|
1028
1028
|
var SonioxRealtimeModel = {
|
|
1029
1029
|
stt_rt_v4: "stt-rt-v4",
|
|
1030
|
-
stt_rt_v3: "stt-rt-v3",
|
|
1031
1030
|
stt_rt_preview: "stt-rt-preview",
|
|
1032
1031
|
stt_rt_v3_preview: "stt-rt-v3-preview",
|
|
1033
|
-
stt_rt_preview_v2: "stt-rt-preview-v2"
|
|
1032
|
+
stt_rt_preview_v2: "stt-rt-preview-v2",
|
|
1033
|
+
stt_rt_v3: "stt-rt-v3"
|
|
1034
1034
|
};
|
|
1035
1035
|
var SonioxAsyncModel = {
|
|
1036
1036
|
stt_async_v4: "stt-async-v4",
|
|
1037
|
-
stt_async_v3: "stt-async-v3",
|
|
1038
1037
|
stt_async_preview: "stt-async-preview",
|
|
1039
|
-
stt_async_preview_v1: "stt-async-preview-v1"
|
|
1038
|
+
stt_async_preview_v1: "stt-async-preview-v1",
|
|
1039
|
+
stt_async_v3: "stt-async-v3"
|
|
1040
1040
|
};
|
|
1041
1041
|
|
|
1042
1042
|
// src/generated/speechmatics/languages.ts
|
|
@@ -4011,17 +4011,17 @@ var SummaryTypesEnum = {
|
|
|
4011
4011
|
concise: "concise"
|
|
4012
4012
|
};
|
|
4013
4013
|
|
|
4014
|
-
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
4015
|
-
var TranscriptMessageType = {
|
|
4016
|
-
transcript: "transcript"
|
|
4017
|
-
};
|
|
4018
|
-
|
|
4019
4014
|
// src/generated/gladia/schema/transcriptionControllerListV2KindItem.ts
|
|
4020
4015
|
var TranscriptionControllerListV2KindItem = {
|
|
4021
4016
|
"pre-recorded": "pre-recorded",
|
|
4022
4017
|
live: "live"
|
|
4023
4018
|
};
|
|
4024
4019
|
|
|
4020
|
+
// src/generated/gladia/schema/transcriptMessageType.ts
|
|
4021
|
+
var TranscriptMessageType = {
|
|
4022
|
+
transcript: "transcript"
|
|
4023
|
+
};
|
|
4024
|
+
|
|
4025
4025
|
// src/generated/gladia/schema/translationMessageType.ts
|
|
4026
4026
|
var TranslationMessageType = {
|
|
4027
4027
|
translation: "translation"
|
|
@@ -4293,7 +4293,7 @@ var WebhookTranscriptionSuccessPayloadEvent = {
|
|
|
4293
4293
|
|
|
4294
4294
|
// src/generated/gladia/api/gladiaControlAPI.ts
|
|
4295
4295
|
var preRecordedControllerInitPreRecordedJobV2 = (initTranscriptionRequest, options) => {
|
|
4296
|
-
return import_axios.default.post(
|
|
4296
|
+
return import_axios.default.post("/v2/pre-recorded", initTranscriptionRequest, options);
|
|
4297
4297
|
};
|
|
4298
4298
|
var preRecordedControllerGetPreRecordedJobV2 = (id, options) => {
|
|
4299
4299
|
return import_axios.default.get(`/v2/pre-recorded/${id}`, options);
|
|
@@ -4308,13 +4308,13 @@ var preRecordedControllerGetAudioV2 = (id, options) => {
|
|
|
4308
4308
|
});
|
|
4309
4309
|
};
|
|
4310
4310
|
var transcriptionControllerListV2 = (params, options) => {
|
|
4311
|
-
return import_axios.default.get(
|
|
4311
|
+
return import_axios.default.get("/v2/transcription", {
|
|
4312
4312
|
...options,
|
|
4313
4313
|
params: { ...params, ...options?.params }
|
|
4314
4314
|
});
|
|
4315
4315
|
};
|
|
4316
4316
|
var streamingControllerInitStreamingSessionV2 = (streamingRequest, params, options) => {
|
|
4317
|
-
return import_axios.default.post(
|
|
4317
|
+
return import_axios.default.post("/v2/live", streamingRequest, {
|
|
4318
4318
|
...options,
|
|
4319
4319
|
params: { ...params, ...options?.params }
|
|
4320
4320
|
});
|
|
@@ -4580,7 +4580,6 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
4580
4580
|
sentiment: result?.sentiment_analysis || void 0,
|
|
4581
4581
|
audioToLlm: result?.audio_to_llm || void 0,
|
|
4582
4582
|
chapters: result?.chapterization || void 0,
|
|
4583
|
-
speakerReidentification: result?.speaker_reidentification || void 0,
|
|
4584
4583
|
structuredData: result?.structured_data_extraction || void 0,
|
|
4585
4584
|
customMetadata: response.custom_metadata || void 0
|
|
4586
4585
|
},
|
|
@@ -5642,17 +5641,17 @@ var PiiPolicy = {
|
|
|
5642
5641
|
zodiac_sign: "zodiac_sign"
|
|
5643
5642
|
};
|
|
5644
5643
|
|
|
5644
|
+
// src/generated/assemblyai/schema/redactedAudioStatus.ts
|
|
5645
|
+
var RedactedAudioStatus = {
|
|
5646
|
+
redacted_audio_ready: "redacted_audio_ready"
|
|
5647
|
+
};
|
|
5648
|
+
|
|
5645
5649
|
// src/generated/assemblyai/schema/redactPiiAudioQuality.ts
|
|
5646
5650
|
var RedactPiiAudioQuality = {
|
|
5647
5651
|
mp3: "mp3",
|
|
5648
5652
|
wav: "wav"
|
|
5649
5653
|
};
|
|
5650
5654
|
|
|
5651
|
-
// src/generated/assemblyai/schema/redactedAudioStatus.ts
|
|
5652
|
-
var RedactedAudioStatus = {
|
|
5653
|
-
redacted_audio_ready: "redacted_audio_ready"
|
|
5654
|
-
};
|
|
5655
|
-
|
|
5656
5655
|
// src/generated/assemblyai/schema/sentiment.ts
|
|
5657
5656
|
var Sentiment = {
|
|
5658
5657
|
POSITIVE: "POSITIVE",
|
|
@@ -5716,10 +5715,10 @@ var TranscriptRemoveAudioTags = {
|
|
|
5716
5715
|
|
|
5717
5716
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
5718
5717
|
var createTranscript = (transcriptParams, options) => {
|
|
5719
|
-
return import_axios2.default.post(
|
|
5718
|
+
return import_axios2.default.post("/v2/transcript", transcriptParams, options);
|
|
5720
5719
|
};
|
|
5721
5720
|
var listTranscripts = (params, options) => {
|
|
5722
|
-
return import_axios2.default.get(
|
|
5721
|
+
return import_axios2.default.get("/v2/transcript", {
|
|
5723
5722
|
...options,
|
|
5724
5723
|
params: { ...params, ...options?.params }
|
|
5725
5724
|
});
|
|
@@ -6065,23 +6064,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
|
|
|
6065
6064
|
"AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
|
|
6066
6065
|
);
|
|
6067
6066
|
}
|
|
6068
|
-
const
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6072
|
-
|
|
6073
|
-
|
|
6067
|
+
const passthrough = options?.assemblyai;
|
|
6068
|
+
let speechModels;
|
|
6069
|
+
if (passthrough?.speech_model != null && !passthrough.speech_models) {
|
|
6070
|
+
speechModels = [passthrough.speech_model];
|
|
6071
|
+
} else if (passthrough?.speech_models) {
|
|
6072
|
+
speechModels = passthrough.speech_models;
|
|
6074
6073
|
}
|
|
6074
|
+
const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
|
|
6075
6075
|
const request = {
|
|
6076
|
-
...
|
|
6076
|
+
...typedOpts,
|
|
6077
6077
|
audio_url: audioUrl,
|
|
6078
6078
|
// speech_models is required — default to universal-3-pro
|
|
6079
|
-
speech_models:
|
|
6080
|
-
"universal-3-pro"
|
|
6081
|
-
],
|
|
6079
|
+
speech_models: speechModels ?? ["universal-3-pro"],
|
|
6082
6080
|
// Enable punctuation and formatting by default
|
|
6083
|
-
punctuate:
|
|
6084
|
-
format_text:
|
|
6081
|
+
punctuate: typedOpts.punctuate ?? true,
|
|
6082
|
+
format_text: typedOpts.format_text ?? true
|
|
6085
6083
|
};
|
|
6086
6084
|
if (options) {
|
|
6087
6085
|
if (options.model) {
|
|
@@ -6796,8 +6794,10 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6796
6794
|
/**
|
|
6797
6795
|
* Submit audio for transcription
|
|
6798
6796
|
*
|
|
6799
|
-
* Sends audio to Deepgram API for transcription. Deepgram processes
|
|
6800
|
-
* synchronously and returns results immediately
|
|
6797
|
+
* Sends audio to Deepgram API for transcription. Deepgram normally processes
|
|
6798
|
+
* synchronously and returns results immediately. When `webhookUrl` is set,
|
|
6799
|
+
* Deepgram can instead return an async callback acknowledgment containing a
|
|
6800
|
+
* request ID.
|
|
6801
6801
|
*
|
|
6802
6802
|
* @param audio - Audio input (URL or file buffer)
|
|
6803
6803
|
* @param options - Transcription options
|
|
@@ -6848,17 +6848,59 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
6848
6848
|
{ params }
|
|
6849
6849
|
).then((res) => res.data);
|
|
6850
6850
|
} else if (audio.type === "file") {
|
|
6851
|
-
response = await this.client.post(
|
|
6852
|
-
|
|
6853
|
-
|
|
6854
|
-
|
|
6851
|
+
response = await this.client.post(
|
|
6852
|
+
"/listen",
|
|
6853
|
+
audio.file,
|
|
6854
|
+
{
|
|
6855
|
+
params,
|
|
6856
|
+
headers: {
|
|
6857
|
+
"Content-Type": "audio/*"
|
|
6858
|
+
}
|
|
6855
6859
|
}
|
|
6856
|
-
|
|
6860
|
+
).then((res) => res.data);
|
|
6857
6861
|
} else {
|
|
6858
6862
|
throw new Error(
|
|
6859
6863
|
"Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
|
|
6860
6864
|
);
|
|
6861
6865
|
}
|
|
6866
|
+
if (options?.webhookUrl) {
|
|
6867
|
+
const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
|
|
6868
|
+
if (!requestId) {
|
|
6869
|
+
return {
|
|
6870
|
+
success: false,
|
|
6871
|
+
provider: this.name,
|
|
6872
|
+
error: {
|
|
6873
|
+
code: "MISSING_REQUEST_ID",
|
|
6874
|
+
message: "Deepgram callback mode did not return a request ID"
|
|
6875
|
+
},
|
|
6876
|
+
raw: response
|
|
6877
|
+
};
|
|
6878
|
+
}
|
|
6879
|
+
return {
|
|
6880
|
+
success: true,
|
|
6881
|
+
provider: this.name,
|
|
6882
|
+
data: {
|
|
6883
|
+
id: requestId,
|
|
6884
|
+
text: "",
|
|
6885
|
+
status: "queued"
|
|
6886
|
+
},
|
|
6887
|
+
tracking: {
|
|
6888
|
+
requestId
|
|
6889
|
+
},
|
|
6890
|
+
raw: response
|
|
6891
|
+
};
|
|
6892
|
+
}
|
|
6893
|
+
if (!("results" in response) || !("metadata" in response)) {
|
|
6894
|
+
return {
|
|
6895
|
+
success: false,
|
|
6896
|
+
provider: this.name,
|
|
6897
|
+
error: {
|
|
6898
|
+
code: "INVALID_RESPONSE",
|
|
6899
|
+
message: "Deepgram did not return a synchronous transcription payload"
|
|
6900
|
+
},
|
|
6901
|
+
raw: response
|
|
6902
|
+
};
|
|
6903
|
+
}
|
|
6862
6904
|
return this.normalizeResponse(response);
|
|
6863
6905
|
} catch (error) {
|
|
6864
6906
|
return this.createErrorResponse(error);
|
|
@@ -7519,7 +7561,8 @@ var DeepgramAdapter = class extends BaseAdapter {
|
|
|
7519
7561
|
break;
|
|
7520
7562
|
}
|
|
7521
7563
|
case "Metadata": {
|
|
7522
|
-
|
|
7564
|
+
const { type: _, ...metadata } = message;
|
|
7565
|
+
callbacks?.onMetadata?.(metadata);
|
|
7523
7566
|
break;
|
|
7524
7567
|
}
|
|
7525
7568
|
case "Error": {
|
|
@@ -7856,13 +7899,13 @@ var TextNormalizationKind = {
|
|
|
7856
7899
|
|
|
7857
7900
|
// src/generated/azure/api/speechServicesAPIVersion32.ts
|
|
7858
7901
|
var transcriptionsList = (params, options) => {
|
|
7859
|
-
return import_axios4.default.get(
|
|
7902
|
+
return import_axios4.default.get("/transcriptions", {
|
|
7860
7903
|
...options,
|
|
7861
7904
|
params: { ...params, ...options?.params }
|
|
7862
7905
|
});
|
|
7863
7906
|
};
|
|
7864
7907
|
var transcriptionsCreate = (transcription, options) => {
|
|
7865
|
-
return import_axios4.default.post(
|
|
7908
|
+
return import_axios4.default.post("/transcriptions", transcription, options);
|
|
7866
7909
|
};
|
|
7867
7910
|
var transcriptionsGet = (id, options) => {
|
|
7868
7911
|
return import_axios4.default.get(`/transcriptions/${id}`, options);
|
|
@@ -7877,13 +7920,13 @@ var transcriptionsListFiles = (id, params, options) => {
|
|
|
7877
7920
|
});
|
|
7878
7921
|
};
|
|
7879
7922
|
var webHooksList = (params, options) => {
|
|
7880
|
-
return import_axios4.default.get(
|
|
7923
|
+
return import_axios4.default.get("/webhooks", {
|
|
7881
7924
|
...options,
|
|
7882
7925
|
params: { ...params, ...options?.params }
|
|
7883
7926
|
});
|
|
7884
7927
|
};
|
|
7885
7928
|
var webHooksCreate = (webHook, options) => {
|
|
7886
|
-
return import_axios4.default.post(
|
|
7929
|
+
return import_axios4.default.post("/webhooks", webHook, options);
|
|
7887
7930
|
};
|
|
7888
7931
|
var webHooksDelete = (id, options) => {
|
|
7889
7932
|
return import_axios4.default.delete(`/webhooks/${id}`, options);
|
|
@@ -7955,10 +7998,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
7955
7998
|
contentUrls: [audio.url],
|
|
7956
7999
|
properties: this.buildTranscriptionProperties(options)
|
|
7957
8000
|
};
|
|
7958
|
-
const response = await transcriptionsCreate(
|
|
7959
|
-
transcriptionRequest,
|
|
7960
|
-
this.getAxiosConfig()
|
|
7961
|
-
);
|
|
8001
|
+
const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
|
|
7962
8002
|
const transcription = response.data;
|
|
7963
8003
|
const transcriptId = transcription.self?.split("/").pop() || "";
|
|
7964
8004
|
return await this.pollForCompletion(transcriptId);
|
|
@@ -8011,7 +8051,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
8011
8051
|
this.getAxiosConfig()
|
|
8012
8052
|
);
|
|
8013
8053
|
const files = filesResponse.data?.values || [];
|
|
8014
|
-
const resultFile = files.find((file) => file.kind ===
|
|
8054
|
+
const resultFile = files.find((file) => file.kind === FileKind.Transcription);
|
|
8015
8055
|
if (!resultFile?.links?.contentUrl) {
|
|
8016
8056
|
return {
|
|
8017
8057
|
success: false,
|
|
@@ -8235,15 +8275,20 @@ var AzureSTTAdapter = class extends BaseAdapter {
|
|
|
8235
8275
|
return properties;
|
|
8236
8276
|
}
|
|
8237
8277
|
/**
|
|
8238
|
-
* Normalize Azure status to unified status
|
|
8278
|
+
* Normalize Azure status to unified status using generated AzureStatus constants
|
|
8239
8279
|
*/
|
|
8240
8280
|
normalizeStatus(status) {
|
|
8241
|
-
|
|
8242
|
-
|
|
8243
|
-
|
|
8244
|
-
|
|
8245
|
-
|
|
8246
|
-
|
|
8281
|
+
switch (status) {
|
|
8282
|
+
case Status.Succeeded:
|
|
8283
|
+
return "completed";
|
|
8284
|
+
case Status.Running:
|
|
8285
|
+
return "processing";
|
|
8286
|
+
case Status.Failed:
|
|
8287
|
+
return "error";
|
|
8288
|
+
case Status.NotStarted:
|
|
8289
|
+
default:
|
|
8290
|
+
return "queued";
|
|
8291
|
+
}
|
|
8247
8292
|
}
|
|
8248
8293
|
/**
|
|
8249
8294
|
* Normalize Azure transcription response to unified format
|
|
@@ -8363,30 +8408,30 @@ function getAzureOpenAIRealtimeUrl(endpoint, deployment, apiVersion = "2024-10-0
|
|
|
8363
8408
|
var import_axios6 = __toESM(require("axios"));
|
|
8364
8409
|
var createTranscription = (createTranscriptionRequest, options) => {
|
|
8365
8410
|
const formData = new FormData();
|
|
8366
|
-
formData.append(
|
|
8367
|
-
formData.append(
|
|
8411
|
+
formData.append("file", createTranscriptionRequest.file);
|
|
8412
|
+
formData.append("model", createTranscriptionRequest.model);
|
|
8368
8413
|
if (createTranscriptionRequest.language !== void 0) {
|
|
8369
|
-
formData.append(
|
|
8414
|
+
formData.append("language", createTranscriptionRequest.language);
|
|
8370
8415
|
}
|
|
8371
8416
|
if (createTranscriptionRequest.prompt !== void 0) {
|
|
8372
|
-
formData.append(
|
|
8417
|
+
formData.append("prompt", createTranscriptionRequest.prompt);
|
|
8373
8418
|
}
|
|
8374
8419
|
if (createTranscriptionRequest.response_format !== void 0) {
|
|
8375
|
-
formData.append(
|
|
8420
|
+
formData.append("response_format", createTranscriptionRequest.response_format);
|
|
8376
8421
|
}
|
|
8377
8422
|
if (createTranscriptionRequest.temperature !== void 0) {
|
|
8378
|
-
formData.append(
|
|
8423
|
+
formData.append("temperature", createTranscriptionRequest.temperature.toString());
|
|
8379
8424
|
}
|
|
8380
8425
|
if (createTranscriptionRequest.include !== void 0) {
|
|
8381
|
-
createTranscriptionRequest.include.forEach((value) => formData.append(
|
|
8426
|
+
createTranscriptionRequest.include.forEach((value) => formData.append("include", value));
|
|
8382
8427
|
}
|
|
8383
8428
|
if (createTranscriptionRequest.timestamp_granularities !== void 0) {
|
|
8384
8429
|
createTranscriptionRequest.timestamp_granularities.forEach(
|
|
8385
|
-
(value) => formData.append(
|
|
8430
|
+
(value) => formData.append("timestamp_granularities", value)
|
|
8386
8431
|
);
|
|
8387
8432
|
}
|
|
8388
8433
|
if (createTranscriptionRequest.stream !== void 0 && createTranscriptionRequest.stream !== null) {
|
|
8389
|
-
formData.append(
|
|
8434
|
+
formData.append("stream", createTranscriptionRequest.stream.toString());
|
|
8390
8435
|
}
|
|
8391
8436
|
if (createTranscriptionRequest.chunking_strategy !== void 0 && createTranscriptionRequest.chunking_strategy !== null) {
|
|
8392
8437
|
formData.append(
|
|
@@ -8396,15 +8441,15 @@ var createTranscription = (createTranscriptionRequest, options) => {
|
|
|
8396
8441
|
}
|
|
8397
8442
|
if (createTranscriptionRequest.known_speaker_names !== void 0) {
|
|
8398
8443
|
createTranscriptionRequest.known_speaker_names.forEach(
|
|
8399
|
-
(value) => formData.append(
|
|
8444
|
+
(value) => formData.append("known_speaker_names", value)
|
|
8400
8445
|
);
|
|
8401
8446
|
}
|
|
8402
8447
|
if (createTranscriptionRequest.known_speaker_references !== void 0) {
|
|
8403
8448
|
createTranscriptionRequest.known_speaker_references.forEach(
|
|
8404
|
-
(value) => formData.append(
|
|
8449
|
+
(value) => formData.append("known_speaker_references", value)
|
|
8405
8450
|
);
|
|
8406
8451
|
}
|
|
8407
|
-
return import_axios6.default.post(
|
|
8452
|
+
return import_axios6.default.post("/audio/transcriptions", formData, options);
|
|
8408
8453
|
};
|
|
8409
8454
|
|
|
8410
8455
|
// src/generated/openai/schema/createTranscriptionRequestTimestampGranularitiesItem.ts
|
|
@@ -8493,7 +8538,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
|
|
|
8493
8538
|
const request = {
|
|
8494
8539
|
...options?.openai,
|
|
8495
8540
|
file: audioData,
|
|
8496
|
-
// Generated type expects Blob
|
|
8497
8541
|
model
|
|
8498
8542
|
};
|
|
8499
8543
|
if (options?.language) {
|
|
@@ -8920,7 +8964,6 @@ function createOpenAIWhisperAdapter(config) {
|
|
|
8920
8964
|
|
|
8921
8965
|
// src/adapters/speechmatics-adapter.ts
|
|
8922
8966
|
var import_axios8 = __toESM(require("axios"));
|
|
8923
|
-
var import_ws5 = __toESM(require("ws"));
|
|
8924
8967
|
|
|
8925
8968
|
// src/generated/speechmatics/schema/notificationConfigContentsItem.ts
|
|
8926
8969
|
var NotificationConfigContentsItem = {
|
|
@@ -8970,7 +9013,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8970
9013
|
super(...arguments);
|
|
8971
9014
|
this.name = "speechmatics";
|
|
8972
9015
|
this.capabilities = {
|
|
8973
|
-
streaming:
|
|
9016
|
+
streaming: false,
|
|
9017
|
+
// Batch only (streaming available via separate WebSocket API)
|
|
8974
9018
|
diarization: true,
|
|
8975
9019
|
wordTimestamps: true,
|
|
8976
9020
|
languageDetection: false,
|
|
@@ -9105,16 +9149,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9105
9149
|
jobConfig.fetch_data = {
|
|
9106
9150
|
url: audio.url
|
|
9107
9151
|
};
|
|
9108
|
-
|
|
9109
|
-
|
|
9110
|
-
requestBody = formData;
|
|
9111
|
-
headers = { "Content-Type": "multipart/form-data" };
|
|
9152
|
+
requestBody = { config: JSON.stringify(jobConfig) };
|
|
9153
|
+
headers = { "Content-Type": "application/json" };
|
|
9112
9154
|
} else if (audio.type === "file") {
|
|
9113
|
-
|
|
9114
|
-
|
|
9115
|
-
|
|
9116
|
-
|
|
9117
|
-
requestBody = formData;
|
|
9155
|
+
requestBody = {
|
|
9156
|
+
config: JSON.stringify(jobConfig),
|
|
9157
|
+
data_file: audio.file
|
|
9158
|
+
};
|
|
9118
9159
|
headers = { "Content-Type": "multipart/form-data" };
|
|
9119
9160
|
} else {
|
|
9120
9161
|
return {
|
|
@@ -9219,389 +9260,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9219
9260
|
throw error;
|
|
9220
9261
|
}
|
|
9221
9262
|
}
|
|
9222
|
-
/**
|
|
9223
|
-
* Build WebSocket URL for real-time streaming
|
|
9224
|
-
*
|
|
9225
|
-
* Note: Real-time API uses a different host from the batch API:
|
|
9226
|
-
* - Batch: {region}.asr.api.speechmatics.com
|
|
9227
|
-
* - Real-time: {region}.rt.speechmatics.com
|
|
9228
|
-
*
|
|
9229
|
-
* @param region - Regional endpoint identifier
|
|
9230
|
-
* @returns WebSocket URL for real-time API
|
|
9231
|
-
*/
|
|
9232
|
-
getRegionalWsUrl(region) {
|
|
9233
|
-
if (this.config?.wsBaseUrl) {
|
|
9234
|
-
return this.config.wsBaseUrl;
|
|
9235
|
-
}
|
|
9236
|
-
const rtRegionMap = {
|
|
9237
|
-
eu1: "eu",
|
|
9238
|
-
eu2: "eu",
|
|
9239
|
-
us1: "us",
|
|
9240
|
-
us2: "us",
|
|
9241
|
-
au1: "eu"
|
|
9242
|
-
// No AU RT endpoint — fall back to EU
|
|
9243
|
-
};
|
|
9244
|
-
const rtPrefix = rtRegionMap[region || ""] || "eu";
|
|
9245
|
-
return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
|
|
9246
|
-
}
|
|
9247
|
-
/**
|
|
9248
|
-
* Stream audio for real-time transcription via WebSocket
|
|
9249
|
-
*
|
|
9250
|
-
* Connects to Speechmatics' real-time API and sends audio chunks
|
|
9251
|
-
* for transcription with results returned via callbacks.
|
|
9252
|
-
*
|
|
9253
|
-
* @param options - Streaming configuration options
|
|
9254
|
-
* @param callbacks - Event callbacks for transcription results
|
|
9255
|
-
* @returns Promise that resolves with a StreamingSession
|
|
9256
|
-
*
|
|
9257
|
-
* @example Basic streaming
|
|
9258
|
-
* ```typescript
|
|
9259
|
-
* const session = await adapter.transcribeStream({
|
|
9260
|
-
* language: 'en',
|
|
9261
|
-
* speechmaticsStreaming: {
|
|
9262
|
-
* enablePartials: true,
|
|
9263
|
-
* operatingPoint: 'enhanced'
|
|
9264
|
-
* }
|
|
9265
|
-
* }, {
|
|
9266
|
-
* onTranscript: (event) => console.log(event.text),
|
|
9267
|
-
* onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
|
|
9268
|
-
* onError: (error) => console.error(error)
|
|
9269
|
-
* });
|
|
9270
|
-
*
|
|
9271
|
-
* await session.sendAudio({ data: audioBuffer });
|
|
9272
|
-
* await session.close();
|
|
9273
|
-
* ```
|
|
9274
|
-
*/
|
|
9275
|
-
async transcribeStream(options, callbacks) {
|
|
9276
|
-
this.validateConfig();
|
|
9277
|
-
const smOpts = options?.speechmaticsStreaming || {};
|
|
9278
|
-
const region = smOpts.region || this.config?.region;
|
|
9279
|
-
const wsUrl = this.getRegionalWsUrl(region);
|
|
9280
|
-
const ws = new import_ws5.default(wsUrl, {
|
|
9281
|
-
headers: {
|
|
9282
|
-
Authorization: `Bearer ${this.config.apiKey}`
|
|
9283
|
-
}
|
|
9284
|
-
});
|
|
9285
|
-
let sessionStatus = "connecting";
|
|
9286
|
-
const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
9287
|
-
let seqNo = 0;
|
|
9288
|
-
let utteranceResults = [];
|
|
9289
|
-
const sessionReady = new Promise((resolve, reject) => {
|
|
9290
|
-
const timeout = setTimeout(() => {
|
|
9291
|
-
reject(new Error("WebSocket connection timeout"));
|
|
9292
|
-
}, 1e4);
|
|
9293
|
-
let wsOpen = false;
|
|
9294
|
-
ws.once("error", (error) => {
|
|
9295
|
-
clearTimeout(timeout);
|
|
9296
|
-
reject(error);
|
|
9297
|
-
});
|
|
9298
|
-
ws.once("open", () => {
|
|
9299
|
-
wsOpen = true;
|
|
9300
|
-
const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
|
|
9301
|
-
const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
|
|
9302
|
-
const startMsg = {
|
|
9303
|
-
message: "StartRecognition",
|
|
9304
|
-
audio_format: {
|
|
9305
|
-
type: "raw",
|
|
9306
|
-
encoding,
|
|
9307
|
-
sample_rate: sampleRate
|
|
9308
|
-
},
|
|
9309
|
-
transcription_config: {
|
|
9310
|
-
language: smOpts.language || options?.language || "en",
|
|
9311
|
-
enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
|
|
9312
|
-
}
|
|
9313
|
-
};
|
|
9314
|
-
const txConfig = startMsg.transcription_config;
|
|
9315
|
-
if (smOpts.domain) txConfig.domain = smOpts.domain;
|
|
9316
|
-
if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
|
|
9317
|
-
if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
|
|
9318
|
-
if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
|
|
9319
|
-
if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
|
|
9320
|
-
if (smOpts.diarization === "speaker" || options?.diarization) {
|
|
9321
|
-
txConfig.diarization = "speaker";
|
|
9322
|
-
if (smOpts.maxSpeakers) {
|
|
9323
|
-
txConfig.speaker_diarization_config = {
|
|
9324
|
-
max_speakers: smOpts.maxSpeakers
|
|
9325
|
-
};
|
|
9326
|
-
} else if (options?.speakersExpected) {
|
|
9327
|
-
txConfig.speaker_diarization_config = {
|
|
9328
|
-
max_speakers: options.speakersExpected
|
|
9329
|
-
};
|
|
9330
|
-
}
|
|
9331
|
-
}
|
|
9332
|
-
if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
|
|
9333
|
-
txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
|
|
9334
|
-
content: word
|
|
9335
|
-
}));
|
|
9336
|
-
} else if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9337
|
-
txConfig.additional_vocab = options.customVocabulary.map((word) => ({
|
|
9338
|
-
content: word
|
|
9339
|
-
}));
|
|
9340
|
-
}
|
|
9341
|
-
if (smOpts.conversationConfig) {
|
|
9342
|
-
txConfig.conversation_config = {
|
|
9343
|
-
end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
|
|
9344
|
-
};
|
|
9345
|
-
}
|
|
9346
|
-
const startPayload = JSON.stringify(startMsg);
|
|
9347
|
-
if (callbacks?.onRawMessage) {
|
|
9348
|
-
callbacks.onRawMessage({
|
|
9349
|
-
provider: "speechmatics",
|
|
9350
|
-
direction: "outgoing",
|
|
9351
|
-
timestamp: Date.now(),
|
|
9352
|
-
payload: startPayload,
|
|
9353
|
-
messageType: "StartRecognition"
|
|
9354
|
-
});
|
|
9355
|
-
}
|
|
9356
|
-
ws.send(startPayload);
|
|
9357
|
-
});
|
|
9358
|
-
const onMessage = (data) => {
|
|
9359
|
-
const rawPayload = data.toString();
|
|
9360
|
-
try {
|
|
9361
|
-
const msg = JSON.parse(rawPayload);
|
|
9362
|
-
if (msg.message === "RecognitionStarted") {
|
|
9363
|
-
clearTimeout(timeout);
|
|
9364
|
-
ws.removeListener("message", onMessage);
|
|
9365
|
-
ws.emit("message", data);
|
|
9366
|
-
resolve();
|
|
9367
|
-
} else if (msg.message === "Error") {
|
|
9368
|
-
clearTimeout(timeout);
|
|
9369
|
-
ws.removeListener("message", onMessage);
|
|
9370
|
-
reject(new Error(msg.reason || "Recognition failed to start"));
|
|
9371
|
-
}
|
|
9372
|
-
} catch {
|
|
9373
|
-
}
|
|
9374
|
-
};
|
|
9375
|
-
ws.on("message", onMessage);
|
|
9376
|
-
});
|
|
9377
|
-
ws.on("message", (data) => {
|
|
9378
|
-
const rawPayload = data.toString();
|
|
9379
|
-
try {
|
|
9380
|
-
const message = JSON.parse(rawPayload);
|
|
9381
|
-
if (callbacks?.onRawMessage) {
|
|
9382
|
-
callbacks.onRawMessage({
|
|
9383
|
-
provider: "speechmatics",
|
|
9384
|
-
direction: "incoming",
|
|
9385
|
-
timestamp: Date.now(),
|
|
9386
|
-
payload: rawPayload,
|
|
9387
|
-
messageType: message.message
|
|
9388
|
-
});
|
|
9389
|
-
}
|
|
9390
|
-
this.handleStreamingMessage(message, callbacks, utteranceResults);
|
|
9391
|
-
} catch (error) {
|
|
9392
|
-
if (callbacks?.onRawMessage) {
|
|
9393
|
-
callbacks.onRawMessage({
|
|
9394
|
-
provider: "speechmatics",
|
|
9395
|
-
direction: "incoming",
|
|
9396
|
-
timestamp: Date.now(),
|
|
9397
|
-
payload: rawPayload,
|
|
9398
|
-
messageType: "parse_error"
|
|
9399
|
-
});
|
|
9400
|
-
}
|
|
9401
|
-
callbacks?.onError?.({
|
|
9402
|
-
code: "PARSE_ERROR",
|
|
9403
|
-
message: "Failed to parse WebSocket message",
|
|
9404
|
-
details: error
|
|
9405
|
-
});
|
|
9406
|
-
}
|
|
9407
|
-
});
|
|
9408
|
-
ws.on("error", (error) => {
|
|
9409
|
-
callbacks?.onError?.({
|
|
9410
|
-
code: "WEBSOCKET_ERROR",
|
|
9411
|
-
message: error.message,
|
|
9412
|
-
details: error
|
|
9413
|
-
});
|
|
9414
|
-
});
|
|
9415
|
-
ws.on("close", (code, reason) => {
|
|
9416
|
-
sessionStatus = "closed";
|
|
9417
|
-
callbacks?.onClose?.(code, reason.toString());
|
|
9418
|
-
});
|
|
9419
|
-
await sessionReady;
|
|
9420
|
-
sessionStatus = "open";
|
|
9421
|
-
callbacks?.onOpen?.();
|
|
9422
|
-
return {
|
|
9423
|
-
id: sessionId,
|
|
9424
|
-
provider: this.name,
|
|
9425
|
-
createdAt: /* @__PURE__ */ new Date(),
|
|
9426
|
-
getStatus: () => sessionStatus,
|
|
9427
|
-
sendAudio: async (chunk) => {
|
|
9428
|
-
if (sessionStatus !== "open") {
|
|
9429
|
-
throw new Error(`Cannot send audio: session is ${sessionStatus}`);
|
|
9430
|
-
}
|
|
9431
|
-
if (ws.readyState !== import_ws5.default.OPEN) {
|
|
9432
|
-
throw new Error("WebSocket is not open");
|
|
9433
|
-
}
|
|
9434
|
-
if (callbacks?.onRawMessage) {
|
|
9435
|
-
const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
|
|
9436
|
-
chunk.data.byteOffset,
|
|
9437
|
-
chunk.data.byteOffset + chunk.data.byteLength
|
|
9438
|
-
);
|
|
9439
|
-
callbacks.onRawMessage({
|
|
9440
|
-
provider: this.name,
|
|
9441
|
-
direction: "outgoing",
|
|
9442
|
-
timestamp: Date.now(),
|
|
9443
|
-
payload: audioPayload,
|
|
9444
|
-
messageType: "audio"
|
|
9445
|
-
});
|
|
9446
|
-
}
|
|
9447
|
-
ws.send(chunk.data);
|
|
9448
|
-
seqNo++;
|
|
9449
|
-
if (chunk.isLast) {
|
|
9450
|
-
const endMsg = JSON.stringify({
|
|
9451
|
-
message: "EndOfStream",
|
|
9452
|
-
last_seq_no: seqNo
|
|
9453
|
-
});
|
|
9454
|
-
if (callbacks?.onRawMessage) {
|
|
9455
|
-
callbacks.onRawMessage({
|
|
9456
|
-
provider: this.name,
|
|
9457
|
-
direction: "outgoing",
|
|
9458
|
-
timestamp: Date.now(),
|
|
9459
|
-
payload: endMsg,
|
|
9460
|
-
messageType: "EndOfStream"
|
|
9461
|
-
});
|
|
9462
|
-
}
|
|
9463
|
-
ws.send(endMsg);
|
|
9464
|
-
}
|
|
9465
|
-
},
|
|
9466
|
-
close: async () => {
|
|
9467
|
-
if (sessionStatus === "closed" || sessionStatus === "closing") {
|
|
9468
|
-
return;
|
|
9469
|
-
}
|
|
9470
|
-
sessionStatus = "closing";
|
|
9471
|
-
if (ws.readyState === import_ws5.default.OPEN) {
|
|
9472
|
-
seqNo++;
|
|
9473
|
-
ws.send(
|
|
9474
|
-
JSON.stringify({
|
|
9475
|
-
message: "EndOfStream",
|
|
9476
|
-
last_seq_no: seqNo
|
|
9477
|
-
})
|
|
9478
|
-
);
|
|
9479
|
-
}
|
|
9480
|
-
return new Promise((resolve) => {
|
|
9481
|
-
const timeout = setTimeout(() => {
|
|
9482
|
-
ws.terminate();
|
|
9483
|
-
sessionStatus = "closed";
|
|
9484
|
-
resolve();
|
|
9485
|
-
}, 5e3);
|
|
9486
|
-
const onMsg = (data) => {
|
|
9487
|
-
try {
|
|
9488
|
-
const msg = JSON.parse(data.toString());
|
|
9489
|
-
if (msg.message === "EndOfTranscript") {
|
|
9490
|
-
ws.removeListener("message", onMsg);
|
|
9491
|
-
clearTimeout(timeout);
|
|
9492
|
-
ws.close();
|
|
9493
|
-
}
|
|
9494
|
-
} catch {
|
|
9495
|
-
}
|
|
9496
|
-
};
|
|
9497
|
-
ws.on("message", onMsg);
|
|
9498
|
-
ws.once("close", () => {
|
|
9499
|
-
clearTimeout(timeout);
|
|
9500
|
-
sessionStatus = "closed";
|
|
9501
|
-
resolve();
|
|
9502
|
-
});
|
|
9503
|
-
});
|
|
9504
|
-
}
|
|
9505
|
-
};
|
|
9506
|
-
}
|
|
9507
|
-
/**
|
|
9508
|
-
* Handle incoming Speechmatics real-time WebSocket messages
|
|
9509
|
-
*/
|
|
9510
|
-
handleStreamingMessage(message, callbacks, utteranceResults) {
|
|
9511
|
-
switch (message.message) {
|
|
9512
|
-
case "RecognitionStarted": {
|
|
9513
|
-
break;
|
|
9514
|
-
}
|
|
9515
|
-
case "AddPartialTranscript": {
|
|
9516
|
-
const results = message.results || [];
|
|
9517
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9518
|
-
if (text) {
|
|
9519
|
-
callbacks?.onTranscript?.({
|
|
9520
|
-
type: "transcript",
|
|
9521
|
-
text,
|
|
9522
|
-
isFinal: false,
|
|
9523
|
-
words: this.extractWordsFromResults(results),
|
|
9524
|
-
data: message
|
|
9525
|
-
});
|
|
9526
|
-
}
|
|
9527
|
-
break;
|
|
9528
|
-
}
|
|
9529
|
-
case "AddTranscript": {
|
|
9530
|
-
const results = message.results || [];
|
|
9531
|
-
const text = buildTextFromSpeechmaticsResults(results);
|
|
9532
|
-
if (utteranceResults) {
|
|
9533
|
-
utteranceResults.push(...results);
|
|
9534
|
-
}
|
|
9535
|
-
if (text) {
|
|
9536
|
-
callbacks?.onTranscript?.({
|
|
9537
|
-
type: "transcript",
|
|
9538
|
-
text,
|
|
9539
|
-
isFinal: true,
|
|
9540
|
-
words: this.extractWordsFromResults(results),
|
|
9541
|
-
data: message
|
|
9542
|
-
});
|
|
9543
|
-
}
|
|
9544
|
-
break;
|
|
9545
|
-
}
|
|
9546
|
-
case "EndOfUtterance": {
|
|
9547
|
-
if (utteranceResults && utteranceResults.length > 0) {
|
|
9548
|
-
const text = buildTextFromSpeechmaticsResults(utteranceResults);
|
|
9549
|
-
const words = this.extractWordsFromResults(utteranceResults);
|
|
9550
|
-
const utterances = buildUtterancesFromWords(words);
|
|
9551
|
-
if (utterances.length > 0) {
|
|
9552
|
-
for (const utt of utterances) {
|
|
9553
|
-
callbacks?.onUtterance?.(utt);
|
|
9554
|
-
}
|
|
9555
|
-
} else if (text) {
|
|
9556
|
-
callbacks?.onUtterance?.({
|
|
9557
|
-
text,
|
|
9558
|
-
start: words.length > 0 ? words[0].start : 0,
|
|
9559
|
-
end: words.length > 0 ? words[words.length - 1].end : 0,
|
|
9560
|
-
words
|
|
9561
|
-
});
|
|
9562
|
-
}
|
|
9563
|
-
utteranceResults.length = 0;
|
|
9564
|
-
}
|
|
9565
|
-
break;
|
|
9566
|
-
}
|
|
9567
|
-
case "AudioAdded": {
|
|
9568
|
-
break;
|
|
9569
|
-
}
|
|
9570
|
-
case "EndOfTranscript": {
|
|
9571
|
-
break;
|
|
9572
|
-
}
|
|
9573
|
-
case "Info":
|
|
9574
|
-
case "Warning": {
|
|
9575
|
-
callbacks?.onMetadata?.(message);
|
|
9576
|
-
break;
|
|
9577
|
-
}
|
|
9578
|
-
case "Error": {
|
|
9579
|
-
const errMsg = message;
|
|
9580
|
-
callbacks?.onError?.({
|
|
9581
|
-
code: errMsg.type || "SPEECHMATICS_ERROR",
|
|
9582
|
-
message: errMsg.reason || "Unknown error",
|
|
9583
|
-
details: message
|
|
9584
|
-
});
|
|
9585
|
-
break;
|
|
9586
|
-
}
|
|
9587
|
-
default: {
|
|
9588
|
-
callbacks?.onMetadata?.(message);
|
|
9589
|
-
break;
|
|
9590
|
-
}
|
|
9591
|
-
}
|
|
9592
|
-
}
|
|
9593
|
-
/**
|
|
9594
|
-
* Extract unified Word[] from Speechmatics recognition results
|
|
9595
|
-
*/
|
|
9596
|
-
extractWordsFromResults(results) {
|
|
9597
|
-
return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
|
|
9598
|
-
word: result.alternatives?.[0]?.content || "",
|
|
9599
|
-
start: result.start_time,
|
|
9600
|
-
end: result.end_time,
|
|
9601
|
-
confidence: result.alternatives?.[0]?.confidence,
|
|
9602
|
-
speaker: result.alternatives?.[0]?.speaker
|
|
9603
|
-
}));
|
|
9604
|
-
}
|
|
9605
9263
|
/**
|
|
9606
9264
|
* Normalize Speechmatics status to unified status
|
|
9607
9265
|
* Uses generated JobDetailsStatus enum values
|
|
@@ -9670,9 +9328,6 @@ function createSpeechmaticsAdapter(config) {
|
|
|
9670
9328
|
return adapter;
|
|
9671
9329
|
}
|
|
9672
9330
|
|
|
9673
|
-
// src/adapters/soniox-adapter.ts
|
|
9674
|
-
var import_axios9 = __toESM(require("axios"));
|
|
9675
|
-
|
|
9676
9331
|
// src/generated/soniox/schema/transcriptionStatus.ts
|
|
9677
9332
|
var TranscriptionStatus = {
|
|
9678
9333
|
queued: "queued",
|
|
@@ -9681,6 +9336,57 @@ var TranscriptionStatus = {
|
|
|
9681
9336
|
error: "error"
|
|
9682
9337
|
};
|
|
9683
9338
|
|
|
9339
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9340
|
+
var import_axios9 = __toESM(require("axios"));
|
|
9341
|
+
|
|
9342
|
+
// src/generated/soniox/schema/index.ts
|
|
9343
|
+
var schema_exports4 = {};
|
|
9344
|
+
__export(schema_exports4, {
|
|
9345
|
+
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9346
|
+
TranscriptionMode: () => TranscriptionMode,
|
|
9347
|
+
TranscriptionStatus: () => TranscriptionStatus,
|
|
9348
|
+
TranslationConfigType: () => TranslationConfigType
|
|
9349
|
+
});
|
|
9350
|
+
|
|
9351
|
+
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9352
|
+
var TemporaryApiKeyUsageType = {
|
|
9353
|
+
transcribe_websocket: "transcribe_websocket"
|
|
9354
|
+
};
|
|
9355
|
+
|
|
9356
|
+
// src/generated/soniox/schema/transcriptionMode.ts
|
|
9357
|
+
var TranscriptionMode = {
|
|
9358
|
+
real_time: "real_time",
|
|
9359
|
+
async: "async"
|
|
9360
|
+
};
|
|
9361
|
+
|
|
9362
|
+
// src/generated/soniox/schema/translationConfigType.ts
|
|
9363
|
+
var TranslationConfigType = {
|
|
9364
|
+
one_way: "one_way",
|
|
9365
|
+
two_way: "two_way"
|
|
9366
|
+
};
|
|
9367
|
+
|
|
9368
|
+
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9369
|
+
var uploadFile = (uploadFileBody2, options) => {
|
|
9370
|
+
const formData = new FormData();
|
|
9371
|
+
if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
|
|
9372
|
+
formData.append("client_reference_id", uploadFileBody2.client_reference_id);
|
|
9373
|
+
}
|
|
9374
|
+
formData.append("file", uploadFileBody2.file);
|
|
9375
|
+
return import_axios9.default.post("/v1/files", formData, options);
|
|
9376
|
+
};
|
|
9377
|
+
var createTranscription2 = (createTranscriptionPayload, options) => {
|
|
9378
|
+
return import_axios9.default.post("/v1/transcriptions", createTranscriptionPayload, options);
|
|
9379
|
+
};
|
|
9380
|
+
var getTranscription = (transcriptionId, options) => {
|
|
9381
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}`, options);
|
|
9382
|
+
};
|
|
9383
|
+
var getTranscriptionTranscript = (transcriptionId, options) => {
|
|
9384
|
+
return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
|
|
9385
|
+
};
|
|
9386
|
+
var getModels = (options) => {
|
|
9387
|
+
return import_axios9.default.get("/v1/models", options);
|
|
9388
|
+
};
|
|
9389
|
+
|
|
9684
9390
|
// src/adapters/soniox-adapter.ts
|
|
9685
9391
|
var SonioxAdapter = class extends BaseAdapter {
|
|
9686
9392
|
constructor() {
|
|
@@ -9735,11 +9441,17 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9735
9441
|
}
|
|
9736
9442
|
}
|
|
9737
9443
|
/**
|
|
9738
|
-
* Get the base URL for API requests
|
|
9444
|
+
* Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
|
|
9739
9445
|
*/
|
|
9740
9446
|
get baseUrl() {
|
|
9741
9447
|
if (this.config?.baseUrl) return this.config.baseUrl;
|
|
9742
|
-
return `https://${this.getRegionalHost()}
|
|
9448
|
+
return `https://${this.getRegionalHost()}`;
|
|
9449
|
+
}
|
|
9450
|
+
/**
|
|
9451
|
+
* Build axios config with Soniox Bearer auth
|
|
9452
|
+
*/
|
|
9453
|
+
getAxiosConfig() {
|
|
9454
|
+
return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
|
|
9743
9455
|
}
|
|
9744
9456
|
initialize(config) {
|
|
9745
9457
|
super.initialize(config);
|
|
@@ -9749,15 +9461,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9749
9461
|
if (config.model) {
|
|
9750
9462
|
this.defaultModel = config.model;
|
|
9751
9463
|
}
|
|
9752
|
-
this.client = import_axios9.default.create({
|
|
9753
|
-
baseURL: this.baseUrl,
|
|
9754
|
-
timeout: config.timeout || 12e4,
|
|
9755
|
-
headers: {
|
|
9756
|
-
Authorization: `Bearer ${config.apiKey}`,
|
|
9757
|
-
"Content-Type": "application/json",
|
|
9758
|
-
...config.headers
|
|
9759
|
-
}
|
|
9760
|
-
});
|
|
9761
9464
|
}
|
|
9762
9465
|
/**
|
|
9763
9466
|
* Get current region
|
|
@@ -9787,23 +9490,12 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9787
9490
|
*/
|
|
9788
9491
|
setRegion(region) {
|
|
9789
9492
|
this.region = region;
|
|
9790
|
-
if (this.config?.apiKey) {
|
|
9791
|
-
this.client = import_axios9.default.create({
|
|
9792
|
-
baseURL: this.baseUrl,
|
|
9793
|
-
timeout: this.config.timeout || 12e4,
|
|
9794
|
-
headers: {
|
|
9795
|
-
Authorization: `Bearer ${this.config.apiKey}`,
|
|
9796
|
-
"Content-Type": "application/json",
|
|
9797
|
-
...this.config.headers
|
|
9798
|
-
}
|
|
9799
|
-
});
|
|
9800
|
-
}
|
|
9801
9493
|
}
|
|
9802
9494
|
/**
|
|
9803
9495
|
* Submit audio for transcription
|
|
9804
9496
|
*
|
|
9805
|
-
*
|
|
9806
|
-
*
|
|
9497
|
+
* Uses the async v1 API: createTranscription returns status `queued`,
|
|
9498
|
+
* then polls until completed (or returns immediately if webhook is set).
|
|
9807
9499
|
*
|
|
9808
9500
|
* @param audio - Audio input (URL or file)
|
|
9809
9501
|
* @param options - Transcription options
|
|
@@ -9812,21 +9504,44 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9812
9504
|
async transcribe(audio, options) {
|
|
9813
9505
|
this.validateConfig();
|
|
9814
9506
|
try {
|
|
9815
|
-
const
|
|
9816
|
-
|
|
9817
|
-
};
|
|
9818
|
-
if (audio.type === "url") {
|
|
9819
|
-
requestBody.audio_url = audio.url;
|
|
9820
|
-
} else if (audio.type === "file") {
|
|
9821
|
-
const formData = new FormData();
|
|
9507
|
+
const sonioxOpts = options?.soniox;
|
|
9508
|
+
if (audio.type === "file") {
|
|
9822
9509
|
const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
|
|
9823
|
-
|
|
9824
|
-
const
|
|
9825
|
-
|
|
9826
|
-
|
|
9827
|
-
|
|
9828
|
-
|
|
9829
|
-
|
|
9510
|
+
const uploadBody = { file: audioBlob };
|
|
9511
|
+
const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
|
|
9512
|
+
const payload = {
|
|
9513
|
+
...sonioxOpts,
|
|
9514
|
+
model: options?.model || this.defaultModel,
|
|
9515
|
+
file_id: fileResp.data.id,
|
|
9516
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9517
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9518
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9519
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9520
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9521
|
+
};
|
|
9522
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9523
|
+
const meta = createResp.data;
|
|
9524
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9525
|
+
return this.normalizeTranscription(meta);
|
|
9526
|
+
}
|
|
9527
|
+
return this.pollForCompletion(meta.id);
|
|
9528
|
+
} else if (audio.type === "url") {
|
|
9529
|
+
const payload = {
|
|
9530
|
+
...sonioxOpts,
|
|
9531
|
+
model: options?.model || this.defaultModel,
|
|
9532
|
+
audio_url: audio.url,
|
|
9533
|
+
language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
|
|
9534
|
+
enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
|
|
9535
|
+
enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
|
|
9536
|
+
context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
|
|
9537
|
+
webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
|
|
9538
|
+
};
|
|
9539
|
+
const createResp = await createTranscription2(payload, this.getAxiosConfig());
|
|
9540
|
+
const meta = createResp.data;
|
|
9541
|
+
if (options?.webhookUrl || sonioxOpts?.webhook_url) {
|
|
9542
|
+
return this.normalizeTranscription(meta);
|
|
9543
|
+
}
|
|
9544
|
+
return this.pollForCompletion(meta.id);
|
|
9830
9545
|
} else {
|
|
9831
9546
|
return {
|
|
9832
9547
|
success: false,
|
|
@@ -9837,23 +9552,6 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9837
9552
|
}
|
|
9838
9553
|
};
|
|
9839
9554
|
}
|
|
9840
|
-
if (options?.language) {
|
|
9841
|
-
requestBody.language_hints = [options.language];
|
|
9842
|
-
}
|
|
9843
|
-
if (options?.diarization) {
|
|
9844
|
-
requestBody.enable_speaker_diarization = true;
|
|
9845
|
-
}
|
|
9846
|
-
if (options?.languageDetection) {
|
|
9847
|
-
requestBody.enable_language_identification = true;
|
|
9848
|
-
}
|
|
9849
|
-
if (options?.customVocabulary && options.customVocabulary.length > 0) {
|
|
9850
|
-
requestBody.context = {
|
|
9851
|
-
terms: options.customVocabulary
|
|
9852
|
-
};
|
|
9853
|
-
}
|
|
9854
|
-
const response = await this.client.post("/transcriptions", requestBody);
|
|
9855
|
-
const transcriptionId = response.data.id;
|
|
9856
|
-
return await this.pollForCompletion(transcriptionId);
|
|
9857
9555
|
} catch (error) {
|
|
9858
9556
|
return this.createErrorResponse(error);
|
|
9859
9557
|
}
|
|
@@ -9861,9 +9559,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9861
9559
|
/**
|
|
9862
9560
|
* Get transcription result by ID
|
|
9863
9561
|
*
|
|
9864
|
-
*
|
|
9865
|
-
*
|
|
9866
|
-
* when completed.
|
|
9562
|
+
* Fetches transcription metadata and, if completed, the transcript text/tokens.
|
|
9563
|
+
* Used by pollForCompletion() for async polling.
|
|
9867
9564
|
*
|
|
9868
9565
|
* @param transcriptId - Transcript ID
|
|
9869
9566
|
* @returns Transcription response
|
|
@@ -9871,39 +9568,20 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9871
9568
|
async getTranscript(transcriptId) {
|
|
9872
9569
|
this.validateConfig();
|
|
9873
9570
|
try {
|
|
9874
|
-
const
|
|
9875
|
-
const
|
|
9876
|
-
if (
|
|
9877
|
-
|
|
9878
|
-
|
|
9879
|
-
|
|
9880
|
-
|
|
9881
|
-
|
|
9882
|
-
|
|
9883
|
-
|
|
9884
|
-
|
|
9885
|
-
|
|
9886
|
-
if (job.status !== "completed") {
|
|
9887
|
-
return {
|
|
9888
|
-
success: true,
|
|
9889
|
-
provider: this.name,
|
|
9890
|
-
data: {
|
|
9891
|
-
id: job.id,
|
|
9892
|
-
text: "",
|
|
9893
|
-
status: job.status
|
|
9894
|
-
},
|
|
9895
|
-
raw: job
|
|
9896
|
-
};
|
|
9571
|
+
const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
|
|
9572
|
+
const meta = metaResp.data;
|
|
9573
|
+
if (meta.status === TranscriptionStatus.completed) {
|
|
9574
|
+
try {
|
|
9575
|
+
const transcriptResp = await getTranscriptionTranscript(
|
|
9576
|
+
transcriptId,
|
|
9577
|
+
this.getAxiosConfig()
|
|
9578
|
+
);
|
|
9579
|
+
return this.normalizeTranscription(meta, transcriptResp.data);
|
|
9580
|
+
} catch (transcriptError) {
|
|
9581
|
+
return this.createErrorResponse(transcriptError);
|
|
9582
|
+
}
|
|
9897
9583
|
}
|
|
9898
|
-
|
|
9899
|
-
`/transcriptions/${transcriptId}/transcript`
|
|
9900
|
-
);
|
|
9901
|
-
return this.normalizeResponse({
|
|
9902
|
-
...transcriptResponse.data,
|
|
9903
|
-
// Carry over job metadata
|
|
9904
|
-
id: job.id,
|
|
9905
|
-
audio_duration_ms: job.audio_duration_ms
|
|
9906
|
-
});
|
|
9584
|
+
return this.normalizeTranscription(meta);
|
|
9907
9585
|
} catch (error) {
|
|
9908
9586
|
return this.createErrorResponse(error);
|
|
9909
9587
|
}
|
|
@@ -9923,51 +9601,50 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9923
9601
|
const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
9924
9602
|
const createdAt = /* @__PURE__ */ new Date();
|
|
9925
9603
|
const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
|
|
9926
|
-
const wsUrl = `${wsBase}/transcribe-websocket
|
|
9927
|
-
|
|
9928
|
-
const
|
|
9929
|
-
|
|
9930
|
-
|
|
9931
|
-
model: modelId
|
|
9932
|
-
};
|
|
9933
|
-
if (sonioxOpts?.audioFormat) {
|
|
9934
|
-
initMessage.audio_format = sonioxOpts.audioFormat;
|
|
9935
|
-
} else if (options?.encoding) {
|
|
9604
|
+
const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
|
|
9605
|
+
wsUrl.searchParams.set("api_key", this.config.apiKey);
|
|
9606
|
+
const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
|
|
9607
|
+
wsUrl.searchParams.set("model", modelId);
|
|
9608
|
+
if (options?.encoding) {
|
|
9936
9609
|
const encodingMap = {
|
|
9937
9610
|
linear16: "pcm_s16le",
|
|
9938
9611
|
pcm: "pcm_s16le",
|
|
9939
9612
|
mulaw: "mulaw",
|
|
9940
9613
|
alaw: "alaw"
|
|
9941
9614
|
};
|
|
9942
|
-
|
|
9615
|
+
wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
|
|
9943
9616
|
}
|
|
9944
|
-
if (
|
|
9945
|
-
|
|
9617
|
+
if (options?.sampleRate) {
|
|
9618
|
+
wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
|
|
9946
9619
|
}
|
|
9947
|
-
if (
|
|
9948
|
-
|
|
9620
|
+
if (options?.channels) {
|
|
9621
|
+
wsUrl.searchParams.set("num_channels", options.channels.toString());
|
|
9949
9622
|
}
|
|
9623
|
+
const sonioxOpts = options?.sonioxStreaming;
|
|
9950
9624
|
if (sonioxOpts) {
|
|
9951
9625
|
if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
|
|
9952
|
-
|
|
9626
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
|
|
9953
9627
|
}
|
|
9954
9628
|
if (sonioxOpts.enableLanguageIdentification) {
|
|
9955
|
-
|
|
9629
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9956
9630
|
}
|
|
9957
9631
|
if (sonioxOpts.enableEndpointDetection) {
|
|
9958
|
-
|
|
9632
|
+
wsUrl.searchParams.set("enable_endpoint_detection", "true");
|
|
9959
9633
|
}
|
|
9960
9634
|
if (sonioxOpts.enableSpeakerDiarization) {
|
|
9961
|
-
|
|
9635
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9962
9636
|
}
|
|
9963
9637
|
if (sonioxOpts.context) {
|
|
9964
|
-
|
|
9638
|
+
wsUrl.searchParams.set(
|
|
9639
|
+
"context",
|
|
9640
|
+
typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
|
|
9641
|
+
);
|
|
9965
9642
|
}
|
|
9966
9643
|
if (sonioxOpts.translation) {
|
|
9967
|
-
|
|
9644
|
+
wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
|
|
9968
9645
|
}
|
|
9969
9646
|
if (sonioxOpts.clientReferenceId) {
|
|
9970
|
-
|
|
9647
|
+
wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
|
|
9971
9648
|
}
|
|
9972
9649
|
}
|
|
9973
9650
|
if (!sonioxOpts?.languageHints && options?.language) {
|
|
@@ -9976,33 +9653,24 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
9976
9653
|
`[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
|
|
9977
9654
|
);
|
|
9978
9655
|
}
|
|
9979
|
-
|
|
9656
|
+
wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
|
|
9980
9657
|
}
|
|
9981
9658
|
if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
|
|
9982
|
-
|
|
9659
|
+
wsUrl.searchParams.set("enable_speaker_diarization", "true");
|
|
9983
9660
|
}
|
|
9984
9661
|
if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
|
|
9985
|
-
|
|
9662
|
+
wsUrl.searchParams.set("enable_language_identification", "true");
|
|
9663
|
+
}
|
|
9664
|
+
if (options?.interimResults !== false) {
|
|
9986
9665
|
}
|
|
9987
9666
|
let status = "connecting";
|
|
9988
9667
|
let openedAt = null;
|
|
9989
9668
|
let receivedData = false;
|
|
9990
9669
|
const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
|
|
9991
|
-
const ws = new WebSocketImpl(wsUrl);
|
|
9670
|
+
const ws = new WebSocketImpl(wsUrl.toString());
|
|
9992
9671
|
ws.onopen = () => {
|
|
9993
|
-
openedAt = Date.now();
|
|
9994
|
-
const initPayload = JSON.stringify(initMessage);
|
|
9995
|
-
if (callbacks?.onRawMessage) {
|
|
9996
|
-
callbacks.onRawMessage({
|
|
9997
|
-
provider: this.name,
|
|
9998
|
-
direction: "outgoing",
|
|
9999
|
-
timestamp: Date.now(),
|
|
10000
|
-
payload: initPayload,
|
|
10001
|
-
messageType: "init"
|
|
10002
|
-
});
|
|
10003
|
-
}
|
|
10004
|
-
ws.send(initPayload);
|
|
10005
9672
|
status = "open";
|
|
9673
|
+
openedAt = Date.now();
|
|
10006
9674
|
callbacks?.onOpen?.();
|
|
10007
9675
|
};
|
|
10008
9676
|
ws.onmessage = (event) => {
|
|
@@ -10011,7 +9679,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10011
9679
|
let messageType;
|
|
10012
9680
|
try {
|
|
10013
9681
|
const data = JSON.parse(rawPayload);
|
|
10014
|
-
|
|
9682
|
+
const errorMessage = data.error_message || data.error;
|
|
9683
|
+
if (errorMessage) {
|
|
10015
9684
|
messageType = "error";
|
|
10016
9685
|
} else if (data.finished) {
|
|
10017
9686
|
messageType = "finished";
|
|
@@ -10027,10 +9696,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10027
9696
|
messageType
|
|
10028
9697
|
});
|
|
10029
9698
|
}
|
|
10030
|
-
if (
|
|
9699
|
+
if (errorMessage) {
|
|
10031
9700
|
callbacks?.onError?.({
|
|
10032
9701
|
code: data.error_code?.toString() || "STREAM_ERROR",
|
|
10033
|
-
message:
|
|
9702
|
+
message: errorMessage
|
|
10034
9703
|
});
|
|
10035
9704
|
return;
|
|
10036
9705
|
}
|
|
@@ -10044,7 +9713,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10044
9713
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
10045
9714
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
10046
9715
|
confidence: token.confidence,
|
|
10047
|
-
speaker: token.speaker
|
|
9716
|
+
speaker: token.speaker ?? void 0
|
|
10048
9717
|
}));
|
|
10049
9718
|
const text = data.text || data.tokens.map((t) => t.text).join("");
|
|
10050
9719
|
const isFinal = data.tokens.every((t) => t.is_final);
|
|
@@ -10053,8 +9722,8 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10053
9722
|
text,
|
|
10054
9723
|
isFinal,
|
|
10055
9724
|
words,
|
|
10056
|
-
speaker: data.tokens[0]?.speaker,
|
|
10057
|
-
language: data.tokens[0]?.language,
|
|
9725
|
+
speaker: data.tokens[0]?.speaker ?? void 0,
|
|
9726
|
+
language: data.tokens[0]?.language ?? void 0,
|
|
10058
9727
|
confidence: data.tokens[0]?.confidence
|
|
10059
9728
|
};
|
|
10060
9729
|
callbacks?.onTranscript?.(event2);
|
|
@@ -10081,10 +9750,10 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10081
9750
|
ws.onclose = (event) => {
|
|
10082
9751
|
status = "closed";
|
|
10083
9752
|
const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
|
|
10084
|
-
const
|
|
10085
|
-
if (
|
|
9753
|
+
const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
|
|
9754
|
+
if (isImmediateClose && event.code === 1e3) {
|
|
10086
9755
|
const errorMessage = [
|
|
10087
|
-
"Soniox closed connection
|
|
9756
|
+
"Soniox closed connection immediately after opening.",
|
|
10088
9757
|
`Current config: region=${this.region}, model=${modelId}`,
|
|
10089
9758
|
"Likely causes:",
|
|
10090
9759
|
" - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
|
|
@@ -10170,7 +9839,7 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10170
9839
|
async getModels() {
|
|
10171
9840
|
this.validateConfig();
|
|
10172
9841
|
try {
|
|
10173
|
-
const response = await this.
|
|
9842
|
+
const response = await getModels(this.getAxiosConfig());
|
|
10174
9843
|
return response.data.models || [];
|
|
10175
9844
|
} catch (error) {
|
|
10176
9845
|
console.error("Failed to fetch Soniox models:", error);
|
|
@@ -10197,55 +9866,82 @@ var SonioxAdapter = class extends BaseAdapter {
|
|
|
10197
9866
|
start: token.start_ms ? token.start_ms / 1e3 : 0,
|
|
10198
9867
|
end: token.end_ms ? token.end_ms / 1e3 : 0,
|
|
10199
9868
|
confidence: token.confidence,
|
|
10200
|
-
speaker: token.speaker
|
|
9869
|
+
speaker: token.speaker ?? void 0
|
|
10201
9870
|
}));
|
|
10202
9871
|
return buildUtterancesFromWords(words);
|
|
10203
9872
|
}
|
|
10204
9873
|
/**
|
|
10205
|
-
* Normalize
|
|
9874
|
+
* Normalize v1 API response to unified format
|
|
9875
|
+
*
|
|
9876
|
+
* @param meta - Transcription metadata from getTranscription/createTranscription
|
|
9877
|
+
* @param transcript - Transcript data (text/tokens), only present when status is completed
|
|
10206
9878
|
*/
|
|
10207
|
-
|
|
10208
|
-
|
|
10209
|
-
|
|
10210
|
-
|
|
10211
|
-
|
|
9879
|
+
normalizeTranscription(meta, transcript) {
|
|
9880
|
+
if (meta.status === TranscriptionStatus.error) {
|
|
9881
|
+
return {
|
|
9882
|
+
success: false,
|
|
9883
|
+
provider: this.name,
|
|
9884
|
+
data: {
|
|
9885
|
+
id: meta.id,
|
|
9886
|
+
text: "",
|
|
9887
|
+
status: "error"
|
|
9888
|
+
},
|
|
9889
|
+
error: {
|
|
9890
|
+
code: meta.error_type || "TRANSCRIPTION_ERROR",
|
|
9891
|
+
message: meta.error_message || "Transcription failed"
|
|
9892
|
+
},
|
|
9893
|
+
raw: { meta, transcript }
|
|
9894
|
+
};
|
|
9895
|
+
}
|
|
9896
|
+
if (!transcript) {
|
|
9897
|
+
return {
|
|
9898
|
+
success: true,
|
|
9899
|
+
provider: this.name,
|
|
9900
|
+
data: {
|
|
9901
|
+
id: meta.id,
|
|
9902
|
+
text: "",
|
|
9903
|
+
status: meta.status,
|
|
9904
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
|
|
9905
|
+
},
|
|
9906
|
+
raw: { meta }
|
|
9907
|
+
};
|
|
9908
|
+
}
|
|
9909
|
+
const tokens = transcript.tokens || [];
|
|
9910
|
+
const text = transcript.text || tokens.map((t) => t.text).join("");
|
|
9911
|
+
const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
|
|
10212
9912
|
word: token.text,
|
|
10213
9913
|
start: token.start_ms / 1e3,
|
|
10214
9914
|
end: token.end_ms / 1e3,
|
|
10215
9915
|
confidence: token.confidence,
|
|
10216
|
-
speaker: token.speaker
|
|
10217
|
-
}))
|
|
9916
|
+
speaker: token.speaker ?? void 0
|
|
9917
|
+
}));
|
|
10218
9918
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
10219
|
-
|
|
10220
|
-
|
|
10221
|
-
|
|
10222
|
-
});
|
|
10223
|
-
}
|
|
9919
|
+
tokens.forEach((t) => {
|
|
9920
|
+
if (t.speaker) speakerSet.add(String(t.speaker));
|
|
9921
|
+
});
|
|
10224
9922
|
const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
|
|
10225
9923
|
id,
|
|
10226
9924
|
label: `Speaker ${id}`
|
|
10227
9925
|
})) : void 0;
|
|
10228
|
-
const
|
|
10229
|
-
const
|
|
10230
|
-
const language = response.tokens?.find((t) => t.language)?.language;
|
|
9926
|
+
const utterances = this.buildUtterancesFromTokens(tokens);
|
|
9927
|
+
const language = tokens.find((t) => t.language)?.language ?? void 0;
|
|
10231
9928
|
return {
|
|
10232
9929
|
success: true,
|
|
10233
9930
|
provider: this.name,
|
|
10234
9931
|
data: {
|
|
10235
|
-
id:
|
|
9932
|
+
id: meta.id,
|
|
10236
9933
|
text,
|
|
10237
9934
|
status: TranscriptionStatus.completed,
|
|
10238
9935
|
language,
|
|
10239
|
-
duration:
|
|
9936
|
+
duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
|
|
10240
9937
|
speakers,
|
|
10241
9938
|
words: words.length > 0 ? words : void 0,
|
|
10242
9939
|
utterances: utterances.length > 0 ? utterances : void 0
|
|
10243
9940
|
},
|
|
10244
9941
|
tracking: {
|
|
10245
|
-
requestId:
|
|
10246
|
-
processingTimeMs: response.total_audio_proc_ms
|
|
9942
|
+
requestId: meta.id
|
|
10247
9943
|
},
|
|
10248
|
-
raw:
|
|
9944
|
+
raw: { meta, transcript }
|
|
10249
9945
|
};
|
|
10250
9946
|
}
|
|
10251
9947
|
};
|
|
@@ -10669,7 +10365,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
|
|
|
10669
10365
|
* - Multi-channel: `MultichannelSpeechToTextResponseModel` with `transcripts[]`
|
|
10670
10366
|
*/
|
|
10671
10367
|
normalizeResponse(response) {
|
|
10672
|
-
const chunks =
|
|
10368
|
+
const chunks = "transcripts" in response ? response.transcripts : [response];
|
|
10673
10369
|
const text = chunks.map((c) => c.text).join(" ");
|
|
10674
10370
|
const words = [];
|
|
10675
10371
|
const speakerSet = /* @__PURE__ */ new Set();
|
|
@@ -11063,9 +10759,7 @@ var listenTranscribeQueryParams = import_zod.z.object({
|
|
|
11063
10759
|
),
|
|
11064
10760
|
dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
11065
10761
|
encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
11066
|
-
filler_words: import_zod.z.boolean().optional().describe(
|
|
11067
|
-
'Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'
|
|
11068
|
-
),
|
|
10762
|
+
filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
11069
10763
|
keyterm: import_zod.z.array(import_zod.z.string()).optional().describe(
|
|
11070
10764
|
"Key term prompting can boost or suppress specialized terminology and brands. Only compatible with Nova-3"
|
|
11071
10765
|
),
|
|
@@ -11769,6 +11463,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11769
11463
|
createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault,
|
|
11770
11464
|
createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault: () => createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault,
|
|
11771
11465
|
createTranscriptBodySummarizationDefault: () => createTranscriptBodySummarizationDefault,
|
|
11466
|
+
createTranscriptBodyTemperatureDefault: () => createTranscriptBodyTemperatureDefault,
|
|
11772
11467
|
createTranscriptResponse: () => createTranscriptResponse,
|
|
11773
11468
|
createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault,
|
|
11774
11469
|
createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault: () => createTranscriptResponseLanguageDetectionOptionsCodeSwitchingDefault,
|
|
@@ -11838,6 +11533,7 @@ var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
|
11838
11533
|
var createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault = true;
|
|
11839
11534
|
var createTranscriptBodySpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
11840
11535
|
var createTranscriptBodySummarizationDefault = false;
|
|
11536
|
+
var createTranscriptBodyTemperatureDefault = 0;
|
|
11841
11537
|
var createTranscriptBodyCustomTopicsDefault = false;
|
|
11842
11538
|
var createTranscriptBody = import_zod3.z.object({
|
|
11843
11539
|
audio_end_at: import_zod3.z.number().optional().describe(
|
|
@@ -11847,10 +11543,10 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11847
11543
|
"The point in time, in milliseconds, to begin transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
|
|
11848
11544
|
),
|
|
11849
11545
|
auto_chapters: import_zod3.z.boolean().optional().describe(
|
|
11850
|
-
"Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
11546
|
+
"Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
11851
11547
|
),
|
|
11852
11548
|
auto_highlights: import_zod3.z.boolean().optional().describe(
|
|
11853
|
-
"Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
11549
|
+
"Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights), either true or false"
|
|
11854
11550
|
),
|
|
11855
11551
|
content_safety: import_zod3.z.boolean().optional().describe(
|
|
11856
11552
|
"Enable [Content Moderation](https://www.assemblyai.com/docs/content-moderation), can be true or false"
|
|
@@ -11866,16 +11562,16 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11866
11562
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
11867
11563
|
)
|
|
11868
11564
|
).optional().describe(
|
|
11869
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11565
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11870
11566
|
),
|
|
11871
11567
|
disfluencies: import_zod3.z.boolean().optional().describe(
|
|
11872
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
11568
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11873
11569
|
),
|
|
11874
11570
|
domain: import_zod3.z.string().nullish().describe(
|
|
11875
11571
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
11876
11572
|
),
|
|
11877
11573
|
entity_detection: import_zod3.z.boolean().optional().describe(
|
|
11878
|
-
"Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11574
|
+
"Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript), can be true or false"
|
|
11879
11575
|
),
|
|
11880
11576
|
filter_profanity: import_zod3.z.boolean().optional().describe(
|
|
11881
11577
|
"Filter profanity from the transcribed text, can be true or false. See [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) for more details."
|
|
@@ -11884,7 +11580,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11884
11580
|
"Enable [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
|
|
11885
11581
|
),
|
|
11886
11582
|
iab_categories: import_zod3.z.boolean().optional().describe(
|
|
11887
|
-
"Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
11583
|
+
"Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics), can be true or false"
|
|
11888
11584
|
),
|
|
11889
11585
|
keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
|
|
11890
11586
|
"Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
|
|
@@ -12132,7 +11828,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12132
11828
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12133
11829
|
),
|
|
12134
11830
|
multichannel: import_zod3.z.boolean().optional().describe(
|
|
12135
|
-
"Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11831
|
+
"Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) transcription, can be true or false."
|
|
12136
11832
|
),
|
|
12137
11833
|
prompt: import_zod3.z.string().optional().describe(
|
|
12138
11834
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -12215,23 +11911,23 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12215
11911
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12216
11912
|
),
|
|
12217
11913
|
sentiment_analysis: import_zod3.z.boolean().optional().describe(
|
|
12218
|
-
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
11914
|
+
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
12219
11915
|
),
|
|
12220
11916
|
speaker_labels: import_zod3.z.boolean().optional().describe(
|
|
12221
|
-
"Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11917
|
+
"Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers), can be true or false"
|
|
12222
11918
|
),
|
|
12223
11919
|
speaker_options: import_zod3.z.object({
|
|
12224
11920
|
min_speakers_expected: import_zod3.z.number().default(createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault).describe(
|
|
12225
|
-
"The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11921
|
+
"The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details."
|
|
12226
11922
|
),
|
|
12227
11923
|
max_speakers_expected: import_zod3.z.number().optional().describe(
|
|
12228
|
-
"<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11924
|
+
"<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers) for more details.\n"
|
|
12229
11925
|
)
|
|
12230
11926
|
}).optional().describe(
|
|
12231
|
-
"Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11927
|
+
"Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
|
|
12232
11928
|
),
|
|
12233
11929
|
speakers_expected: import_zod3.z.number().nullish().describe(
|
|
12234
|
-
"Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
11930
|
+
"Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
12235
11931
|
),
|
|
12236
11932
|
speech_models: import_zod3.z.array(
|
|
12237
11933
|
import_zod3.z.string().describe(
|
|
@@ -12307,7 +12003,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12307
12003
|
"Enable speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
|
|
12308
12004
|
),
|
|
12309
12005
|
summarization: import_zod3.z.boolean().optional().describe(
|
|
12310
|
-
"Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12006
|
+
"Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12311
12007
|
),
|
|
12312
12008
|
summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12313
12009
|
summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
@@ -12316,6 +12012,9 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12316
12012
|
).or(import_zod3.z.null()).optional().describe(
|
|
12317
12013
|
'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12318
12014
|
),
|
|
12015
|
+
temperature: import_zod3.z.number().optional().describe(
|
|
12016
|
+
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
12017
|
+
),
|
|
12319
12018
|
webhook_auth_header_name: import_zod3.z.string().nullish().describe(
|
|
12320
12019
|
"The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
|
|
12321
12020
|
),
|
|
@@ -12337,7 +12036,7 @@ var createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
|
|
|
12337
12036
|
var createTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
12338
12037
|
var createTranscriptResponse = import_zod3.z.object({
|
|
12339
12038
|
audio_channels: import_zod3.z.number().optional().describe(
|
|
12340
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12039
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
12341
12040
|
),
|
|
12342
12041
|
audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
12343
12042
|
audio_end_at: import_zod3.z.number().nullish().describe(
|
|
@@ -12348,10 +12047,10 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12348
12047
|
),
|
|
12349
12048
|
audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
|
|
12350
12049
|
auto_chapters: import_zod3.z.boolean().nullish().describe(
|
|
12351
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12050
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
12352
12051
|
),
|
|
12353
12052
|
auto_highlights: import_zod3.z.boolean().describe(
|
|
12354
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12053
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
12355
12054
|
),
|
|
12356
12055
|
auto_highlights_result: import_zod3.z.object({
|
|
12357
12056
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -12371,9 +12070,9 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12371
12070
|
})
|
|
12372
12071
|
).describe("A temporally-sequential array of Key Phrases")
|
|
12373
12072
|
}).describe(
|
|
12374
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12073
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12375
12074
|
).or(import_zod3.z.null()).optional().describe(
|
|
12376
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12075
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
12377
12076
|
),
|
|
12378
12077
|
chapters: import_zod3.z.array(
|
|
12379
12078
|
import_zod3.z.object({
|
|
@@ -12386,7 +12085,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12386
12085
|
end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
|
|
12387
12086
|
}).describe("Chapter of the audio file")
|
|
12388
12087
|
).nullish().describe(
|
|
12389
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12088
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
12390
12089
|
),
|
|
12391
12090
|
confidence: import_zod3.z.number().nullish().describe(
|
|
12392
12091
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -12442,10 +12141,10 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12442
12141
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
12443
12142
|
)
|
|
12444
12143
|
).nullish().describe(
|
|
12445
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12144
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12446
12145
|
),
|
|
12447
12146
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
12448
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
12147
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12449
12148
|
),
|
|
12450
12149
|
domain: import_zod3.z.string().nullish().describe(
|
|
12451
12150
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12507,10 +12206,10 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12507
12206
|
)
|
|
12508
12207
|
}).describe("A detected entity")
|
|
12509
12208
|
).nullish().describe(
|
|
12510
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12209
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
12511
12210
|
),
|
|
12512
12211
|
entity_detection: import_zod3.z.boolean().nullish().describe(
|
|
12513
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12212
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
12514
12213
|
),
|
|
12515
12214
|
error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
|
|
12516
12215
|
filter_profanity: import_zod3.z.boolean().nullish().describe(
|
|
@@ -12520,7 +12219,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12520
12219
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
12521
12220
|
),
|
|
12522
12221
|
iab_categories: import_zod3.z.boolean().nullish().describe(
|
|
12523
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12222
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
12524
12223
|
),
|
|
12525
12224
|
iab_categories_result: import_zod3.z.object({
|
|
12526
12225
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -12543,9 +12242,9 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12543
12242
|
).describe("An array of results for the Topic Detection model"),
|
|
12544
12243
|
summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
|
|
12545
12244
|
}).describe(
|
|
12546
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12245
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
12547
12246
|
).or(import_zod3.z.null()).optional().describe(
|
|
12548
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
12247
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
12549
12248
|
),
|
|
12550
12249
|
id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
|
|
12551
12250
|
keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
|
|
@@ -12795,7 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12795
12494
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12796
12495
|
),
|
|
12797
12496
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
12798
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12497
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12799
12498
|
),
|
|
12800
12499
|
prompt: import_zod3.z.string().optional().describe(
|
|
12801
12500
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -12878,7 +12577,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12878
12577
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12879
12578
|
),
|
|
12880
12579
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
12881
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
12580
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
12882
12581
|
),
|
|
12883
12582
|
sentiment_analysis_results: import_zod3.z.array(
|
|
12884
12583
|
import_zod3.z.object({
|
|
@@ -12893,17 +12592,17 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12893
12592
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12894
12593
|
),
|
|
12895
12594
|
speaker: import_zod3.z.string().nullable().describe(
|
|
12896
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12595
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12897
12596
|
)
|
|
12898
12597
|
}).describe("The result of the Sentiment Analysis model")
|
|
12899
12598
|
).nullish().describe(
|
|
12900
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
12599
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
12901
12600
|
),
|
|
12902
12601
|
speaker_labels: import_zod3.z.boolean().nullish().describe(
|
|
12903
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12602
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
12904
12603
|
),
|
|
12905
12604
|
speakers_expected: import_zod3.z.number().nullish().describe(
|
|
12906
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12605
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
12907
12606
|
),
|
|
12908
12607
|
speech_model_used: import_zod3.z.string().optional().describe(
|
|
12909
12608
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -13006,22 +12705,25 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13006
12705
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
13007
12706
|
),
|
|
13008
12707
|
summarization: import_zod3.z.boolean().describe(
|
|
13009
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12708
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13010
12709
|
),
|
|
13011
12710
|
summary: import_zod3.z.string().nullish().describe(
|
|
13012
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12711
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13013
12712
|
),
|
|
13014
12713
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13015
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12714
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13016
12715
|
),
|
|
13017
12716
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13018
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
12717
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13019
12718
|
),
|
|
13020
12719
|
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13021
12720
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13022
12721
|
).or(import_zod3.z.null()).optional().describe(
|
|
13023
12722
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13024
12723
|
),
|
|
12724
|
+
temperature: import_zod3.z.number().nullish().describe(
|
|
12725
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
12726
|
+
),
|
|
13025
12727
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13026
12728
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13027
12729
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -13042,7 +12744,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13042
12744
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13043
12745
|
),
|
|
13044
12746
|
speaker: import_zod3.z.string().nullable().describe(
|
|
13045
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12747
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13046
12748
|
)
|
|
13047
12749
|
})
|
|
13048
12750
|
).describe("The words in the utterance."),
|
|
@@ -13057,7 +12759,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13057
12759
|
)
|
|
13058
12760
|
})
|
|
13059
12761
|
).nullish().describe(
|
|
13060
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12762
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13061
12763
|
),
|
|
13062
12764
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13063
12765
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -13081,7 +12783,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13081
12783
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13082
12784
|
),
|
|
13083
12785
|
speaker: import_zod3.z.string().nullable().describe(
|
|
13084
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12786
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13085
12787
|
)
|
|
13086
12788
|
})
|
|
13087
12789
|
).nullish().describe(
|
|
@@ -13154,7 +12856,7 @@ var getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = tr
|
|
|
13154
12856
|
var getTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
13155
12857
|
var getTranscriptResponse = import_zod3.z.object({
|
|
13156
12858
|
audio_channels: import_zod3.z.number().optional().describe(
|
|
13157
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12859
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
13158
12860
|
),
|
|
13159
12861
|
audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
13160
12862
|
audio_end_at: import_zod3.z.number().nullish().describe(
|
|
@@ -13165,10 +12867,10 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13165
12867
|
),
|
|
13166
12868
|
audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
|
|
13167
12869
|
auto_chapters: import_zod3.z.boolean().nullish().describe(
|
|
13168
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12870
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13169
12871
|
),
|
|
13170
12872
|
auto_highlights: import_zod3.z.boolean().describe(
|
|
13171
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12873
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
13172
12874
|
),
|
|
13173
12875
|
auto_highlights_result: import_zod3.z.object({
|
|
13174
12876
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13188,9 +12890,9 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13188
12890
|
})
|
|
13189
12891
|
).describe("A temporally-sequential array of Key Phrases")
|
|
13190
12892
|
}).describe(
|
|
13191
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12893
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13192
12894
|
).or(import_zod3.z.null()).optional().describe(
|
|
13193
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
12895
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13194
12896
|
),
|
|
13195
12897
|
chapters: import_zod3.z.array(
|
|
13196
12898
|
import_zod3.z.object({
|
|
@@ -13203,7 +12905,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13203
12905
|
end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
|
|
13204
12906
|
}).describe("Chapter of the audio file")
|
|
13205
12907
|
).nullish().describe(
|
|
13206
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
12908
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
13207
12909
|
),
|
|
13208
12910
|
confidence: import_zod3.z.number().nullish().describe(
|
|
13209
12911
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -13259,10 +12961,10 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13259
12961
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
13260
12962
|
)
|
|
13261
12963
|
).nullish().describe(
|
|
13262
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
12964
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13263
12965
|
),
|
|
13264
12966
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
13265
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
12967
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13266
12968
|
),
|
|
13267
12969
|
domain: import_zod3.z.string().nullish().describe(
|
|
13268
12970
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13324,10 +13026,10 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13324
13026
|
)
|
|
13325
13027
|
}).describe("A detected entity")
|
|
13326
13028
|
).nullish().describe(
|
|
13327
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13029
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
13328
13030
|
),
|
|
13329
13031
|
entity_detection: import_zod3.z.boolean().nullish().describe(
|
|
13330
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13032
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
13331
13033
|
),
|
|
13332
13034
|
error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
|
|
13333
13035
|
filter_profanity: import_zod3.z.boolean().nullish().describe(
|
|
@@ -13337,7 +13039,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13337
13039
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
13338
13040
|
),
|
|
13339
13041
|
iab_categories: import_zod3.z.boolean().nullish().describe(
|
|
13340
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13042
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
13341
13043
|
),
|
|
13342
13044
|
iab_categories_result: import_zod3.z.object({
|
|
13343
13045
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13360,9 +13062,9 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13360
13062
|
).describe("An array of results for the Topic Detection model"),
|
|
13361
13063
|
summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
|
|
13362
13064
|
}).describe(
|
|
13363
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13065
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13364
13066
|
).or(import_zod3.z.null()).optional().describe(
|
|
13365
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13067
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
13366
13068
|
),
|
|
13367
13069
|
id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
|
|
13368
13070
|
keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
|
|
@@ -13612,7 +13314,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13612
13314
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13613
13315
|
),
|
|
13614
13316
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
13615
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13317
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13616
13318
|
),
|
|
13617
13319
|
prompt: import_zod3.z.string().optional().describe(
|
|
13618
13320
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -13695,7 +13397,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13695
13397
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13696
13398
|
),
|
|
13697
13399
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
13698
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13400
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
13699
13401
|
),
|
|
13700
13402
|
sentiment_analysis_results: import_zod3.z.array(
|
|
13701
13403
|
import_zod3.z.object({
|
|
@@ -13710,17 +13412,17 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13710
13412
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13711
13413
|
),
|
|
13712
13414
|
speaker: import_zod3.z.string().nullable().describe(
|
|
13713
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13415
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13714
13416
|
)
|
|
13715
13417
|
}).describe("The result of the Sentiment Analysis model")
|
|
13716
13418
|
).nullish().describe(
|
|
13717
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
13419
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
13718
13420
|
),
|
|
13719
13421
|
speaker_labels: import_zod3.z.boolean().nullish().describe(
|
|
13720
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13422
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
13721
13423
|
),
|
|
13722
13424
|
speakers_expected: import_zod3.z.number().nullish().describe(
|
|
13723
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13425
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
13724
13426
|
),
|
|
13725
13427
|
speech_model_used: import_zod3.z.string().optional().describe(
|
|
13726
13428
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -13823,22 +13525,25 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13823
13525
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
13824
13526
|
),
|
|
13825
13527
|
summarization: import_zod3.z.boolean().describe(
|
|
13826
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13528
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13827
13529
|
),
|
|
13828
13530
|
summary: import_zod3.z.string().nullish().describe(
|
|
13829
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13531
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13830
13532
|
),
|
|
13831
13533
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13832
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13534
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13833
13535
|
),
|
|
13834
13536
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13835
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
13537
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13836
13538
|
),
|
|
13837
13539
|
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13838
13540
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13839
13541
|
).or(import_zod3.z.null()).optional().describe(
|
|
13840
13542
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
13841
13543
|
),
|
|
13544
|
+
temperature: import_zod3.z.number().nullish().describe(
|
|
13545
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13546
|
+
),
|
|
13842
13547
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13843
13548
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13844
13549
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -13859,7 +13564,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13859
13564
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13860
13565
|
),
|
|
13861
13566
|
speaker: import_zod3.z.string().nullable().describe(
|
|
13862
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13567
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13863
13568
|
)
|
|
13864
13569
|
})
|
|
13865
13570
|
).describe("The words in the utterance."),
|
|
@@ -13874,7 +13579,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13874
13579
|
)
|
|
13875
13580
|
})
|
|
13876
13581
|
).nullish().describe(
|
|
13877
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13582
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13878
13583
|
),
|
|
13879
13584
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13880
13585
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -13898,7 +13603,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13898
13603
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13899
13604
|
),
|
|
13900
13605
|
speaker: import_zod3.z.string().nullable().describe(
|
|
13901
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13606
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13902
13607
|
)
|
|
13903
13608
|
})
|
|
13904
13609
|
).nullish().describe(
|
|
@@ -13931,7 +13636,7 @@ var deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault =
|
|
|
13931
13636
|
var deleteTranscriptResponseSpeechUnderstandingRequestTranslationMatchOriginalUtteranceDefault = false;
|
|
13932
13637
|
var deleteTranscriptResponse = import_zod3.z.object({
|
|
13933
13638
|
audio_channels: import_zod3.z.number().optional().describe(
|
|
13934
|
-
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13639
|
+
"The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) is enabled."
|
|
13935
13640
|
),
|
|
13936
13641
|
audio_duration: import_zod3.z.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
|
|
13937
13642
|
audio_end_at: import_zod3.z.number().nullish().describe(
|
|
@@ -13942,10 +13647,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
13942
13647
|
),
|
|
13943
13648
|
audio_url: import_zod3.z.string().describe("The URL of the media that was transcribed"),
|
|
13944
13649
|
auto_chapters: import_zod3.z.boolean().nullish().describe(
|
|
13945
|
-
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
13650
|
+
"Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
13946
13651
|
),
|
|
13947
13652
|
auto_highlights: import_zod3.z.boolean().describe(
|
|
13948
|
-
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13653
|
+
"Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) is enabled, either true or false"
|
|
13949
13654
|
),
|
|
13950
13655
|
auto_highlights_result: import_zod3.z.object({
|
|
13951
13656
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -13965,9 +13670,9 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
13965
13670
|
})
|
|
13966
13671
|
).describe("A temporally-sequential array of Key Phrases")
|
|
13967
13672
|
}).describe(
|
|
13968
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13673
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13969
13674
|
).or(import_zod3.z.null()).optional().describe(
|
|
13970
|
-
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/
|
|
13675
|
+
"An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/identify-highlights) for more information.\n"
|
|
13971
13676
|
),
|
|
13972
13677
|
chapters: import_zod3.z.array(
|
|
13973
13678
|
import_zod3.z.object({
|
|
@@ -13980,7 +13685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
13980
13685
|
end: import_zod3.z.number().describe("The starting time, in milliseconds, for the chapter")
|
|
13981
13686
|
}).describe("Chapter of the audio file")
|
|
13982
13687
|
).nullish().describe(
|
|
13983
|
-
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/
|
|
13688
|
+
"An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/create-summarized-chapters) for more information."
|
|
13984
13689
|
),
|
|
13985
13690
|
confidence: import_zod3.z.number().nullish().describe(
|
|
13986
13691
|
"The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
|
|
@@ -14036,10 +13741,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14036
13741
|
"Object containing words or phrases to replace, and the word or phrase to replace with"
|
|
14037
13742
|
)
|
|
14038
13743
|
).nullish().describe(
|
|
14039
|
-
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
13744
|
+
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
14040
13745
|
),
|
|
14041
13746
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
14042
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
|
|
13747
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14043
13748
|
),
|
|
14044
13749
|
domain: import_zod3.z.string().nullish().describe(
|
|
14045
13750
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -14101,10 +13806,10 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14101
13806
|
)
|
|
14102
13807
|
}).describe("A detected entity")
|
|
14103
13808
|
).nullish().describe(
|
|
14104
|
-
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13809
|
+
"An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) for more information.\n"
|
|
14105
13810
|
),
|
|
14106
13811
|
entity_detection: import_zod3.z.boolean().nullish().describe(
|
|
14107
|
-
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13812
|
+
"Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/detect-entities-in-transcript) is enabled, can be true or false"
|
|
14108
13813
|
),
|
|
14109
13814
|
error: import_zod3.z.string().optional().describe("Error message of why the transcript failed"),
|
|
14110
13815
|
filter_profanity: import_zod3.z.boolean().nullish().describe(
|
|
@@ -14114,7 +13819,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14114
13819
|
"Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
|
|
14115
13820
|
),
|
|
14116
13821
|
iab_categories: import_zod3.z.boolean().nullish().describe(
|
|
14117
|
-
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13822
|
+
"Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) is enabled, can be true or false"
|
|
14118
13823
|
),
|
|
14119
13824
|
iab_categories_result: import_zod3.z.object({
|
|
14120
13825
|
status: import_zod3.z.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
|
|
@@ -14137,9 +13842,9 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14137
13842
|
).describe("An array of results for the Topic Detection model"),
|
|
14138
13843
|
summary: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.number()).describe("The overall relevance of topic to the entire audio file")
|
|
14139
13844
|
}).describe(
|
|
14140
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13845
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
14141
13846
|
).or(import_zod3.z.null()).optional().describe(
|
|
14142
|
-
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/
|
|
13847
|
+
"The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/detect-discussion-topics) for more information.\n"
|
|
14143
13848
|
),
|
|
14144
13849
|
id: import_zod3.z.string().uuid().describe("The unique identifier of your transcript"),
|
|
14145
13850
|
keyterms_prompt: import_zod3.z.array(import_zod3.z.string()).optional().describe(
|
|
@@ -14389,7 +14094,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14389
14094
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14390
14095
|
),
|
|
14391
14096
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
14392
|
-
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14097
|
+
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14393
14098
|
),
|
|
14394
14099
|
prompt: import_zod3.z.string().optional().describe(
|
|
14395
14100
|
"Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
@@ -14472,7 +14177,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14472
14177
|
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14473
14178
|
),
|
|
14474
14179
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
14475
|
-
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
14180
|
+
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
14476
14181
|
),
|
|
14477
14182
|
sentiment_analysis_results: import_zod3.z.array(
|
|
14478
14183
|
import_zod3.z.object({
|
|
@@ -14487,17 +14192,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14487
14192
|
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14488
14193
|
),
|
|
14489
14194
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14490
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14195
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14491
14196
|
)
|
|
14492
14197
|
}).describe("The result of the Sentiment Analysis model")
|
|
14493
14198
|
).nullish().describe(
|
|
14494
|
-
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-
|
|
14199
|
+
"An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) for more information.\n"
|
|
14495
14200
|
),
|
|
14496
14201
|
speaker_labels: import_zod3.z.boolean().nullish().describe(
|
|
14497
|
-
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14202
|
+
"Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, can be true or false"
|
|
14498
14203
|
),
|
|
14499
14204
|
speakers_expected: import_zod3.z.number().nullish().describe(
|
|
14500
|
-
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14205
|
+
"Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers#set-number-of-speakers-expected) for more details."
|
|
14501
14206
|
),
|
|
14502
14207
|
speech_model_used: import_zod3.z.string().optional().describe(
|
|
14503
14208
|
"The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
|
|
@@ -14600,22 +14305,25 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14600
14305
|
"The status of your transcript. Possible values are queued, processing, completed, or error."
|
|
14601
14306
|
),
|
|
14602
14307
|
summarization: import_zod3.z.boolean().describe(
|
|
14603
|
-
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14308
|
+
"Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
|
|
14604
14309
|
),
|
|
14605
14310
|
summary: import_zod3.z.string().nullish().describe(
|
|
14606
|
-
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14311
|
+
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14607
14312
|
),
|
|
14608
14313
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
14609
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14314
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14610
14315
|
),
|
|
14611
14316
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
14612
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/
|
|
14317
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14613
14318
|
),
|
|
14614
14319
|
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
14615
14320
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
14616
14321
|
).or(import_zod3.z.null()).optional().describe(
|
|
14617
14322
|
"Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
|
|
14618
14323
|
),
|
|
14324
|
+
temperature: import_zod3.z.number().nullish().describe(
|
|
14325
|
+
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14326
|
+
),
|
|
14619
14327
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
14620
14328
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
14621
14329
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
@@ -14636,7 +14344,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14636
14344
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14637
14345
|
),
|
|
14638
14346
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14639
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14347
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14640
14348
|
)
|
|
14641
14349
|
})
|
|
14642
14350
|
).describe("The words in the utterance."),
|
|
@@ -14651,7 +14359,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14651
14359
|
)
|
|
14652
14360
|
})
|
|
14653
14361
|
).nullish().describe(
|
|
14654
|
-
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14362
|
+
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14655
14363
|
),
|
|
14656
14364
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
14657
14365
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
@@ -14675,7 +14383,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14675
14383
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14676
14384
|
),
|
|
14677
14385
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14678
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14386
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14679
14387
|
)
|
|
14680
14388
|
})
|
|
14681
14389
|
).nullish().describe(
|
|
@@ -14720,7 +14428,7 @@ var getTranscriptSentencesResponse = import_zod3.z.object({
|
|
|
14720
14428
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14721
14429
|
),
|
|
14722
14430
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14723
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14431
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14724
14432
|
)
|
|
14725
14433
|
})
|
|
14726
14434
|
).describe("An array of words in the sentence"),
|
|
@@ -14728,7 +14436,7 @@ var getTranscriptSentencesResponse = import_zod3.z.object({
|
|
|
14728
14436
|
"The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14729
14437
|
),
|
|
14730
14438
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14731
|
-
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14439
|
+
"The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14732
14440
|
)
|
|
14733
14441
|
})
|
|
14734
14442
|
).describe("An array of sentences in the transcript")
|
|
@@ -14756,7 +14464,7 @@ var getTranscriptParagraphsResponse = import_zod3.z.object({
|
|
|
14756
14464
|
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14757
14465
|
),
|
|
14758
14466
|
speaker: import_zod3.z.string().nullable().describe(
|
|
14759
|
-
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/
|
|
14467
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14760
14468
|
)
|
|
14761
14469
|
})
|
|
14762
14470
|
).describe("An array of words in the paragraph")
|
|
@@ -17368,23 +17076,6 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
17368
17076
|
}).optional().describe(
|
|
17369
17077
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
17370
17078
|
),
|
|
17371
|
-
speaker_reidentification: import_zod5.z.object({
|
|
17372
|
-
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
17373
|
-
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
17374
|
-
exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
|
|
17375
|
-
error: import_zod5.z.object({
|
|
17376
|
-
status_code: import_zod5.z.number().describe("Status code of the addon error"),
|
|
17377
|
-
exception: import_zod5.z.string().describe("Reason of the addon error"),
|
|
17378
|
-
message: import_zod5.z.string().describe("Detailed message of the addon error")
|
|
17379
|
-
}).nullable().describe(
|
|
17380
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
17381
|
-
),
|
|
17382
|
-
results: import_zod5.z.string().describe(
|
|
17383
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
17384
|
-
)
|
|
17385
|
-
}).optional().describe(
|
|
17386
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
17387
|
-
),
|
|
17388
17079
|
structured_data_extraction: import_zod5.z.object({
|
|
17389
17080
|
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
17390
17081
|
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -18865,23 +18556,6 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18865
18556
|
}).optional().describe(
|
|
18866
18557
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
18867
18558
|
),
|
|
18868
|
-
speaker_reidentification: import_zod5.z.object({
|
|
18869
|
-
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
18870
|
-
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
18871
|
-
exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
|
|
18872
|
-
error: import_zod5.z.object({
|
|
18873
|
-
status_code: import_zod5.z.number().describe("Status code of the addon error"),
|
|
18874
|
-
exception: import_zod5.z.string().describe("Reason of the addon error"),
|
|
18875
|
-
message: import_zod5.z.string().describe("Detailed message of the addon error")
|
|
18876
|
-
}).nullable().describe(
|
|
18877
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
18878
|
-
),
|
|
18879
|
-
results: import_zod5.z.string().describe(
|
|
18880
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
18881
|
-
)
|
|
18882
|
-
}).optional().describe(
|
|
18883
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
18884
|
-
),
|
|
18885
18559
|
structured_data_extraction: import_zod5.z.object({
|
|
18886
18560
|
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
18887
18561
|
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -21019,23 +20693,6 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
21019
20693
|
}).optional().describe(
|
|
21020
20694
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
21021
20695
|
),
|
|
21022
|
-
speaker_reidentification: import_zod5.z.object({
|
|
21023
|
-
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
21024
|
-
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
21025
|
-
exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
|
|
21026
|
-
error: import_zod5.z.object({
|
|
21027
|
-
status_code: import_zod5.z.number().describe("Status code of the addon error"),
|
|
21028
|
-
exception: import_zod5.z.string().describe("Reason of the addon error"),
|
|
21029
|
-
message: import_zod5.z.string().describe("Detailed message of the addon error")
|
|
21030
|
-
}).nullable().describe(
|
|
21031
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
21032
|
-
),
|
|
21033
|
-
results: import_zod5.z.string().describe(
|
|
21034
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
21035
|
-
)
|
|
21036
|
-
}).optional().describe(
|
|
21037
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
21038
|
-
),
|
|
21039
20696
|
structured_data_extraction: import_zod5.z.object({
|
|
21040
20697
|
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
21041
20698
|
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -21335,11 +20992,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
21335
20992
|
channels: import_zod5.z.number().min(1).max(transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsMax).default(
|
|
21336
20993
|
transcriptionControllerListV2ResponseItemsItemRequestParamsChannelsDefault
|
|
21337
20994
|
).describe("The number of channels of the audio stream"),
|
|
21338
|
-
model: import_zod5.z.enum(["solaria-1"]).describe(
|
|
21339
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
21340
|
-
).default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe(
|
|
21341
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
21342
|
-
),
|
|
20995
|
+
model: import_zod5.z.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(transcriptionControllerListV2ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
|
|
21343
20996
|
endpointing: import_zod5.z.number().min(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMin).max(transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingMax).default(
|
|
21344
20997
|
transcriptionControllerListV2ResponseItemsItemRequestParamsEndpointingDefault
|
|
21345
20998
|
).describe(
|
|
@@ -23763,23 +23416,6 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
23763
23416
|
}).optional().describe(
|
|
23764
23417
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
23765
23418
|
),
|
|
23766
|
-
speaker_reidentification: import_zod5.z.object({
|
|
23767
|
-
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
23768
|
-
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
23769
|
-
exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
|
|
23770
|
-
error: import_zod5.z.object({
|
|
23771
|
-
status_code: import_zod5.z.number().describe("Status code of the addon error"),
|
|
23772
|
-
exception: import_zod5.z.string().describe("Reason of the addon error"),
|
|
23773
|
-
message: import_zod5.z.string().describe("Detailed message of the addon error")
|
|
23774
|
-
}).nullable().describe(
|
|
23775
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
23776
|
-
),
|
|
23777
|
-
results: import_zod5.z.string().describe(
|
|
23778
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
23779
|
-
)
|
|
23780
|
-
}).optional().describe(
|
|
23781
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
23782
|
-
),
|
|
23783
23419
|
structured_data_extraction: import_zod5.z.object({
|
|
23784
23420
|
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
23785
23421
|
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -26945,23 +26581,6 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26945
26581
|
}).optional().describe(
|
|
26946
26582
|
"If `name_consistency` has been enabled, Gladia will improve consistency of the names accross the transcription"
|
|
26947
26583
|
),
|
|
26948
|
-
speaker_reidentification: import_zod5.z.object({
|
|
26949
|
-
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
26950
|
-
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
26951
|
-
exec_time: import_zod5.z.number().describe("Time audio intelligence model took to complete the task"),
|
|
26952
|
-
error: import_zod5.z.object({
|
|
26953
|
-
status_code: import_zod5.z.number().describe("Status code of the addon error"),
|
|
26954
|
-
exception: import_zod5.z.string().describe("Reason of the addon error"),
|
|
26955
|
-
message: import_zod5.z.string().describe("Detailed message of the addon error")
|
|
26956
|
-
}).nullable().describe(
|
|
26957
|
-
"`null` if `success` is `true`. Contains the error details of the failed model"
|
|
26958
|
-
),
|
|
26959
|
-
results: import_zod5.z.string().describe(
|
|
26960
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
26961
|
-
)
|
|
26962
|
-
}).optional().describe(
|
|
26963
|
-
"If `speaker_reidentification` has been enabled, results of the AI speaker reidentification."
|
|
26964
|
-
),
|
|
26965
26584
|
structured_data_extraction: import_zod5.z.object({
|
|
26966
26585
|
success: import_zod5.z.boolean().describe("The audio intelligence model succeeded to get a valid output"),
|
|
26967
26586
|
is_empty: import_zod5.z.boolean().describe("The audio intelligence model returned an empty value"),
|
|
@@ -27255,11 +26874,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
27255
26874
|
historyControllerGetListV1ResponseItemsItemRequestParamsSampleRateDefault
|
|
27256
26875
|
).describe("The sample rate of the audio stream"),
|
|
27257
26876
|
channels: import_zod5.z.number().min(1).max(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsMax).default(historyControllerGetListV1ResponseItemsItemRequestParamsChannelsDefault).describe("The number of channels of the audio stream"),
|
|
27258
|
-
model: import_zod5.z.enum(["solaria-1"]).describe(
|
|
27259
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
27260
|
-
).default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe(
|
|
27261
|
-
'The model used to process the audio. "solaria-1" is used by default.'
|
|
27262
|
-
),
|
|
26877
|
+
model: import_zod5.z.enum(["solaria-1"]).describe('The model used to process the audio. "solaria-1" is used by default.').default(historyControllerGetListV1ResponseItemsItemRequestParamsModelDefault).describe('The model used to process the audio. "solaria-1" is used by default.'),
|
|
27263
26878
|
endpointing: import_zod5.z.number().min(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMin).max(historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingMax).default(
|
|
27264
26879
|
historyControllerGetListV1ResponseItemsItemRequestParamsEndpointingDefault
|
|
27265
26880
|
).describe(
|
|
@@ -36420,6 +36035,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36420
36035
|
createTemporaryApiKeyBody: () => createTemporaryApiKeyBody,
|
|
36421
36036
|
createTemporaryApiKeyBodyClientReferenceIdMaxOne: () => createTemporaryApiKeyBodyClientReferenceIdMaxOne,
|
|
36422
36037
|
createTemporaryApiKeyBodyExpiresInSecondsMax: () => createTemporaryApiKeyBodyExpiresInSecondsMax,
|
|
36038
|
+
createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne: () => createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne,
|
|
36423
36039
|
createTranscriptionBody: () => createTranscriptionBody2,
|
|
36424
36040
|
createTranscriptionBodyAudioUrlMaxOne: () => createTranscriptionBodyAudioUrlMaxOne,
|
|
36425
36041
|
createTranscriptionBodyAudioUrlRegExpOne: () => createTranscriptionBodyAudioUrlRegExpOne,
|
|
@@ -36550,11 +36166,11 @@ var getTranscriptionsResponse = import_zod10.z.object({
|
|
|
36550
36166
|
});
|
|
36551
36167
|
var createTranscriptionBodyModelMaxThree = 32;
|
|
36552
36168
|
var createTranscriptionBodyAudioUrlMaxOne = 4096;
|
|
36553
|
-
var createTranscriptionBodyAudioUrlRegExpOne =
|
|
36169
|
+
var createTranscriptionBodyAudioUrlRegExpOne = /^https?:\/\/[^\s]+$/;
|
|
36554
36170
|
var createTranscriptionBodyLanguageHintsItemMax = 10;
|
|
36555
36171
|
var createTranscriptionBodyLanguageHintsMaxOne = 100;
|
|
36556
36172
|
var createTranscriptionBodyWebhookUrlMaxOne = 256;
|
|
36557
|
-
var createTranscriptionBodyWebhookUrlRegExpOne =
|
|
36173
|
+
var createTranscriptionBodyWebhookUrlRegExpOne = /^https?:\/\/[^\s]+$/;
|
|
36558
36174
|
var createTranscriptionBodyWebhookAuthHeaderNameMaxOne = 256;
|
|
36559
36175
|
var createTranscriptionBodyWebhookAuthHeaderValueMaxOne = 256;
|
|
36560
36176
|
var createTranscriptionBodyClientReferenceIdMaxOne = 256;
|
|
@@ -36702,22 +36318,25 @@ var getModelsResponse = import_zod10.z.object({
|
|
|
36702
36318
|
});
|
|
36703
36319
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36704
36320
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36321
|
+
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36705
36322
|
var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
36706
36323
|
usage_type: import_zod10.z.enum(["transcribe_websocket"]),
|
|
36707
36324
|
expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36708
|
-
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique.")
|
|
36325
|
+
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36326
|
+
single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
36327
|
+
max_session_duration_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne).or(import_zod10.z.null()).optional().describe(
|
|
36328
|
+
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36329
|
+
)
|
|
36709
36330
|
});
|
|
36710
36331
|
|
|
36711
36332
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36712
36333
|
var streaming_types_zod_exports = {};
|
|
36713
36334
|
__export(streaming_types_zod_exports, {
|
|
36714
36335
|
sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
|
|
36715
|
-
sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
|
|
36716
36336
|
sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
|
|
36717
36337
|
sonioxContextSchema: () => sonioxContextSchema,
|
|
36718
36338
|
sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
|
|
36719
36339
|
sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
|
|
36720
|
-
sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
|
|
36721
36340
|
sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
|
|
36722
36341
|
sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
|
|
36723
36342
|
sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
|
|
@@ -36731,7 +36350,7 @@ __export(streaming_types_zod_exports, {
|
|
|
36731
36350
|
streamingUpdateConfigParams: () => streamingUpdateConfigParams3
|
|
36732
36351
|
});
|
|
36733
36352
|
var import_zod11 = require("zod");
|
|
36734
|
-
var
|
|
36353
|
+
var sonioxAudioFormatSchema = import_zod11.z.enum([
|
|
36735
36354
|
"auto",
|
|
36736
36355
|
"aac",
|
|
36737
36356
|
"aiff",
|
|
@@ -36741,10 +36360,7 @@ var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
|
|
|
36741
36360
|
"mp3",
|
|
36742
36361
|
"ogg",
|
|
36743
36362
|
"wav",
|
|
36744
|
-
"webm"
|
|
36745
|
-
]);
|
|
36746
|
-
var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
36747
|
-
// Signed PCM
|
|
36363
|
+
"webm",
|
|
36748
36364
|
"pcm_s8",
|
|
36749
36365
|
"pcm_s16le",
|
|
36750
36366
|
"pcm_s16be",
|
|
@@ -36752,7 +36368,6 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36752
36368
|
"pcm_s24be",
|
|
36753
36369
|
"pcm_s32le",
|
|
36754
36370
|
"pcm_s32be",
|
|
36755
|
-
// Unsigned PCM
|
|
36756
36371
|
"pcm_u8",
|
|
36757
36372
|
"pcm_u16le",
|
|
36758
36373
|
"pcm_u16be",
|
|
@@ -36760,86 +36375,81 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
|
|
|
36760
36375
|
"pcm_u24be",
|
|
36761
36376
|
"pcm_u32le",
|
|
36762
36377
|
"pcm_u32be",
|
|
36763
|
-
// Float PCM
|
|
36764
36378
|
"pcm_f32le",
|
|
36765
36379
|
"pcm_f32be",
|
|
36766
36380
|
"pcm_f64le",
|
|
36767
36381
|
"pcm_f64be",
|
|
36768
|
-
// Companded
|
|
36769
36382
|
"mulaw",
|
|
36770
36383
|
"alaw"
|
|
36771
36384
|
]);
|
|
36772
|
-
var sonioxAudioFormatSchema = import_zod11.z.union([
|
|
36773
|
-
sonioxAutoDetectedAudioFormatSchema,
|
|
36774
|
-
sonioxPcmAudioEncodingSchema
|
|
36775
|
-
]);
|
|
36776
36385
|
var sonioxOneWayTranslationSchema = import_zod11.z.object({
|
|
36777
36386
|
type: import_zod11.z.literal("one_way"),
|
|
36778
|
-
target_language: import_zod11.z.string()
|
|
36387
|
+
target_language: import_zod11.z.string()
|
|
36779
36388
|
});
|
|
36780
36389
|
var sonioxTwoWayTranslationSchema = import_zod11.z.object({
|
|
36781
36390
|
type: import_zod11.z.literal("two_way"),
|
|
36782
|
-
language_a: import_zod11.z.string()
|
|
36783
|
-
language_b: import_zod11.z.string()
|
|
36391
|
+
language_a: import_zod11.z.string(),
|
|
36392
|
+
language_b: import_zod11.z.string()
|
|
36784
36393
|
});
|
|
36785
36394
|
var sonioxTranslationConfigSchema = import_zod11.z.union([
|
|
36786
36395
|
sonioxOneWayTranslationSchema,
|
|
36787
36396
|
sonioxTwoWayTranslationSchema
|
|
36788
36397
|
]);
|
|
36789
36398
|
var sonioxContextGeneralItemSchema = import_zod11.z.object({
|
|
36790
|
-
key: import_zod11.z.string()
|
|
36791
|
-
value: import_zod11.z.string()
|
|
36399
|
+
key: import_zod11.z.string(),
|
|
36400
|
+
value: import_zod11.z.string()
|
|
36792
36401
|
});
|
|
36793
36402
|
var sonioxTranslationTermSchema = import_zod11.z.object({
|
|
36794
|
-
source: import_zod11.z.string()
|
|
36795
|
-
target: import_zod11.z.string()
|
|
36403
|
+
source: import_zod11.z.string(),
|
|
36404
|
+
target: import_zod11.z.string()
|
|
36796
36405
|
});
|
|
36797
36406
|
var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
36798
|
-
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional()
|
|
36799
|
-
text: import_zod11.z.string().optional()
|
|
36800
|
-
terms: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36801
|
-
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36407
|
+
general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional(),
|
|
36408
|
+
text: import_zod11.z.string().optional(),
|
|
36409
|
+
terms: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36410
|
+
translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
|
|
36802
36411
|
});
|
|
36803
36412
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36804
36413
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36414
|
+
"stt-rt-v4",
|
|
36805
36415
|
"stt-rt-v3",
|
|
36806
36416
|
"stt-rt-preview",
|
|
36807
36417
|
"stt-rt-v3-preview",
|
|
36808
36418
|
"stt-rt-preview-v2"
|
|
36809
36419
|
]);
|
|
36810
36420
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36811
|
-
model: sonioxRealtimeModelSchema
|
|
36812
|
-
audioFormat: sonioxAudioFormatSchema.optional()
|
|
36813
|
-
sampleRate: import_zod11.z.number().optional()
|
|
36814
|
-
numChannels: import_zod11.z.number().
|
|
36815
|
-
languageHints: import_zod11.z.array(import_zod11.z.string()).optional()
|
|
36816
|
-
context: sonioxContextSchema.optional()
|
|
36817
|
-
enableSpeakerDiarization: import_zod11.z.boolean().optional()
|
|
36818
|
-
enableLanguageIdentification: import_zod11.z.boolean().optional()
|
|
36819
|
-
enableEndpointDetection: import_zod11.z.boolean().optional()
|
|
36820
|
-
translation: sonioxTranslationConfigSchema.optional()
|
|
36821
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
36822
|
-
});
|
|
36823
|
-
var sonioxTranslationStatusSchema = import_zod11.z.enum(["
|
|
36421
|
+
model: sonioxRealtimeModelSchema,
|
|
36422
|
+
audioFormat: sonioxAudioFormatSchema.optional(),
|
|
36423
|
+
sampleRate: import_zod11.z.number().optional(),
|
|
36424
|
+
numChannels: import_zod11.z.number().optional(),
|
|
36425
|
+
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
36426
|
+
context: sonioxContextSchema.optional(),
|
|
36427
|
+
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36428
|
+
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36429
|
+
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
36430
|
+
translation: sonioxTranslationConfigSchema.optional(),
|
|
36431
|
+
clientReferenceId: import_zod11.z.string().optional()
|
|
36432
|
+
});
|
|
36433
|
+
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36824
36434
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
36825
|
-
text: import_zod11.z.string()
|
|
36826
|
-
start_ms: import_zod11.z.number().optional()
|
|
36827
|
-
end_ms: import_zod11.z.number().optional()
|
|
36828
|
-
confidence: import_zod11.z.number()
|
|
36829
|
-
is_final: import_zod11.z.boolean()
|
|
36830
|
-
speaker: import_zod11.z.string().optional()
|
|
36831
|
-
|
|
36832
|
-
|
|
36833
|
-
|
|
36435
|
+
text: import_zod11.z.string(),
|
|
36436
|
+
start_ms: import_zod11.z.number().optional(),
|
|
36437
|
+
end_ms: import_zod11.z.number().optional(),
|
|
36438
|
+
confidence: import_zod11.z.number(),
|
|
36439
|
+
is_final: import_zod11.z.boolean(),
|
|
36440
|
+
speaker: import_zod11.z.string().optional(),
|
|
36441
|
+
translation_status: sonioxTranslationStatusSchema.optional(),
|
|
36442
|
+
language: import_zod11.z.string().optional(),
|
|
36443
|
+
source_language: import_zod11.z.string().optional()
|
|
36834
36444
|
});
|
|
36835
36445
|
var sonioxStreamingResponseSchema = import_zod11.z.object({
|
|
36836
|
-
text: import_zod11.z.string()
|
|
36837
|
-
tokens: import_zod11.z.array(sonioxTokenSchema)
|
|
36838
|
-
final_audio_proc_ms: import_zod11.z.number()
|
|
36839
|
-
total_audio_proc_ms: import_zod11.z.number()
|
|
36840
|
-
finished: import_zod11.z.boolean().optional()
|
|
36841
|
-
|
|
36842
|
-
|
|
36446
|
+
text: import_zod11.z.string(),
|
|
36447
|
+
tokens: import_zod11.z.array(sonioxTokenSchema),
|
|
36448
|
+
final_audio_proc_ms: import_zod11.z.number(),
|
|
36449
|
+
total_audio_proc_ms: import_zod11.z.number(),
|
|
36450
|
+
finished: import_zod11.z.boolean().optional(),
|
|
36451
|
+
error_code: import_zod11.z.number().optional(),
|
|
36452
|
+
error_message: import_zod11.z.string().optional()
|
|
36843
36453
|
});
|
|
36844
36454
|
var sonioxRecorderStateSchema = import_zod11.z.enum([
|
|
36845
36455
|
"Init",
|
|
@@ -37405,8 +37015,8 @@ var BatchOnlyProviders = AllProviders.filter(
|
|
|
37405
37015
|
);
|
|
37406
37016
|
|
|
37407
37017
|
// src/generated/deepgram/schema/index.ts
|
|
37408
|
-
var
|
|
37409
|
-
__export(
|
|
37018
|
+
var schema_exports5 = {};
|
|
37019
|
+
__export(schema_exports5, {
|
|
37410
37020
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37411
37021
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37412
37022
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
@@ -37661,8 +37271,8 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37661
37271
|
};
|
|
37662
37272
|
|
|
37663
37273
|
// src/generated/openai/schema/index.ts
|
|
37664
|
-
var
|
|
37665
|
-
__export(
|
|
37274
|
+
var schema_exports6 = {};
|
|
37275
|
+
__export(schema_exports6, {
|
|
37666
37276
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37667
37277
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37668
37278
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
@@ -37956,6 +37566,16 @@ var ToolChoiceOptions = {
|
|
|
37956
37566
|
required: "required"
|
|
37957
37567
|
};
|
|
37958
37568
|
|
|
37569
|
+
// src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
|
|
37570
|
+
var TranscriptionDiarizedSegmentType = {
|
|
37571
|
+
transcripttextsegment: "transcript.text.segment"
|
|
37572
|
+
};
|
|
37573
|
+
|
|
37574
|
+
// src/generated/openai/schema/transcriptionInclude.ts
|
|
37575
|
+
var TranscriptionInclude = {
|
|
37576
|
+
logprobs: "logprobs"
|
|
37577
|
+
};
|
|
37578
|
+
|
|
37959
37579
|
// src/generated/openai/schema/transcriptTextDeltaEventType.ts
|
|
37960
37580
|
var TranscriptTextDeltaEventType = {
|
|
37961
37581
|
transcripttextdelta: "transcript.text.delta"
|
|
@@ -37981,16 +37601,6 @@ var TranscriptTextUsageTokensType = {
|
|
|
37981
37601
|
tokens: "tokens"
|
|
37982
37602
|
};
|
|
37983
37603
|
|
|
37984
|
-
// src/generated/openai/schema/transcriptionDiarizedSegmentType.ts
|
|
37985
|
-
var TranscriptionDiarizedSegmentType = {
|
|
37986
|
-
transcripttextsegment: "transcript.text.segment"
|
|
37987
|
-
};
|
|
37988
|
-
|
|
37989
|
-
// src/generated/openai/schema/transcriptionInclude.ts
|
|
37990
|
-
var TranscriptionInclude = {
|
|
37991
|
-
logprobs: "logprobs"
|
|
37992
|
-
};
|
|
37993
|
-
|
|
37994
37604
|
// src/generated/openai/schema/vadConfigType.ts
|
|
37995
37605
|
var VadConfigType = {
|
|
37996
37606
|
server_vad: "server_vad"
|
|
@@ -38002,8 +37612,8 @@ var VoiceResourceObject = {
|
|
|
38002
37612
|
};
|
|
38003
37613
|
|
|
38004
37614
|
// src/generated/speechmatics/schema/index.ts
|
|
38005
|
-
var
|
|
38006
|
-
__export(
|
|
37615
|
+
var schema_exports7 = {};
|
|
37616
|
+
__export(schema_exports7, {
|
|
38007
37617
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
38008
37618
|
ErrorResponseError: () => ErrorResponseError,
|
|
38009
37619
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
@@ -38192,32 +37802,6 @@ var WrittenFormRecognitionResultType = {
|
|
|
38192
37802
|
word: "word"
|
|
38193
37803
|
};
|
|
38194
37804
|
|
|
38195
|
-
// src/generated/soniox/schema/index.ts
|
|
38196
|
-
var schema_exports7 = {};
|
|
38197
|
-
__export(schema_exports7, {
|
|
38198
|
-
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
38199
|
-
TranscriptionMode: () => TranscriptionMode,
|
|
38200
|
-
TranscriptionStatus: () => TranscriptionStatus,
|
|
38201
|
-
TranslationConfigType: () => TranslationConfigType
|
|
38202
|
-
});
|
|
38203
|
-
|
|
38204
|
-
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
38205
|
-
var TemporaryApiKeyUsageType = {
|
|
38206
|
-
transcribe_websocket: "transcribe_websocket"
|
|
38207
|
-
};
|
|
38208
|
-
|
|
38209
|
-
// src/generated/soniox/schema/transcriptionMode.ts
|
|
38210
|
-
var TranscriptionMode = {
|
|
38211
|
-
real_time: "real_time",
|
|
38212
|
-
async: "async"
|
|
38213
|
-
};
|
|
38214
|
-
|
|
38215
|
-
// src/generated/soniox/schema/translationConfigType.ts
|
|
38216
|
-
var TranslationConfigType = {
|
|
38217
|
-
one_way: "one_way",
|
|
38218
|
-
two_way: "two_way"
|
|
38219
|
-
};
|
|
38220
|
-
|
|
38221
37805
|
// src/generated/elevenlabs/schema/index.ts
|
|
38222
37806
|
var schema_exports8 = {};
|
|
38223
37807
|
__export(schema_exports8, {
|
|
@@ -38372,8 +37956,8 @@ var getJobsQueryParams = import_zod12.z.object({
|
|
|
38372
37956
|
var getJobsResponseJobsItemDurationMin = 0;
|
|
38373
37957
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38374
37958
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38375
|
-
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38376
|
-
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
37959
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
37960
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38377
37961
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38378
37962
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38379
37963
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38571,8 +38155,8 @@ var getJobsJobidParams = import_zod12.z.object({
|
|
|
38571
38155
|
var getJobsJobidResponseJobDurationMin = 0;
|
|
38572
38156
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38573
38157
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38574
|
-
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38575
|
-
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
38158
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38159
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38576
38160
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38577
38161
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38578
38162
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38769,8 +38353,8 @@ var deleteJobsJobidQueryParams = import_zod12.z.object({
|
|
|
38769
38353
|
var deleteJobsJobidResponseJobDurationMin = 0;
|
|
38770
38354
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38771
38355
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38772
|
-
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38773
|
-
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
38356
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38357
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38774
38358
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38775
38359
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38776
38360
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
@@ -38973,8 +38557,8 @@ var getJobsJobidTranscriptQueryParams = import_zod12.z.object({
|
|
|
38973
38557
|
var getJobsJobidTranscriptResponseJobDurationMin = 0;
|
|
38974
38558
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMin = 0;
|
|
38975
38559
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38976
|
-
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp =
|
|
38977
|
-
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp =
|
|
38560
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38561
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38978
38562
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38979
38563
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38980
38564
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|